In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
DATA_DIR = os.path.join(os.getcwd(), "nifty_symbols")   # <-- change this
OUTPUT_FILE = "missing_minutes_report.csv"

# ------------------------------------------------------------------

files = [f for f in os.listdir(DATA_DIR) if f.lower().endswith(".csv")]

dfs = {}

# Load all files
for file in files:
    symbol = os.path.splitext(file)[0]
    path = os.path.join(DATA_DIR, file)

    df = pd.read_csv(
        path,
        index_col=0,
        parse_dates=True
    )

    dfs[symbol] = df

# ------------------------------------------------------------------
# Find reference file (max rows)

ref_symbol = max(dfs, key=lambda s: len(dfs[s]))
ref_index = dfs[ref_symbol].index

print(f"Reference symbol: {ref_symbol} ({len(ref_index)} rows)")

# ------------------------------------------------------------------
# Compare others

results = []

for symbol, df in dfs.items():
    if symbol == ref_symbol:
        continue

    missing = ref_index.difference(df.index)

    if len(missing) == 0:
        continue

    for ts in missing:
        results.append({
            "symbol": symbol,
            "missing_timestamp": ts,
            "missing_minutes_count": len(missing)
        })

# ------------------------------------------------------------------
# Save result

result_df = pd.DataFrame(results)

if not result_df.empty:
    result_df.sort_values(["symbol", "missing_timestamp"], inplace=True)

result_df.to_csv(
    os.path.join(DATA_DIR, OUTPUT_FILE),
    index=False
)

print(f"Sanity check complete → {OUTPUT_FILE}")


Reference symbol: ADANIENT (361929 rows)
Sanity check complete → missing_minutes_report.csv


In [None]:
result_df['percentage_missing']= result_df['missing_minutes_count']/len(pd.read_csv(rf"nifty_symbols/{symbol}.csv"))*100

In [6]:
result_df.to_csv(
    os.path.join(DATA_DIR, OUTPUT_FILE),
    index=False
)

In [8]:
import os
import pandas as pd

os.makedirs("parquet_files", exist_ok=True)

for f in files:
    symbol = os.path.splitext(f)[0] 
    csv_path = os.path.join("nifty_symbols", f)

    df = pd.read_csv(csv_path, index_col=0, parse_dates=True)

    parquet_path = os.path.join("parquet_files", f"{symbol}.parquet")
    df.to_parquet(parquet_path, engine="pyarrow", compression="snappy")
