In [2]:
import pandas as pd
import numpy as np
import os

RAW_DIR = '../data/raw/'
OUT_PATH = '../data/processed/returns_data.csv'

# Manually map file names to sector labels
sector_files = {
    'NIFTY_IT': 'NIFTY_IT.csv',
    'NIFTY_BANK': 'NIFTY_BANK.csv',
    'NIFTY_FMCG': 'NIFTY_FMCG.csv',
    'NIFTY_PHARMA': 'NIFTY_PHARMA.csv',
    'NIFTY_AUTO': 'NIFTY_AUTO.csv',
    'NIFTY_METAL': 'NIFTY_METAL.csv',
}

returns_df = pd.DataFrame()

for sector, file in sector_files.items():
    df = pd.read_csv(os.path.join(RAW_DIR, file))
    df['Date'] = pd.to_datetime(df['Date'])
    df = df[['Date', 'Close']].copy()
    df.sort_values('Date', inplace=True)
    
    # Clean and convert 'Close' to numeric
    df['Close'] = pd.to_numeric(df['Close'].str.replace(',', ''), errors='coerce')
    
    df.set_index('Date', inplace=True)
    
    # Compute log returns
    df[sector] = np.log(df['Close']).diff()
    returns_df[sector] = df[sector]


# Drop missing first row (from diff)
returns_df = returns_df.dropna()

# Save to processed
returns_df.to_csv(OUT_PATH)
print(f"✅ Saved log returns matrix to: {OUT_PATH}")

# Quick peek
returns_df.tail()


✅ Saved log returns matrix to: ../data/processed/returns_data.csv


Unnamed: 0_level_0,NIFTY_IT,NIFTY_BANK,NIFTY_FMCG,NIFTY_PHARMA,NIFTY_AUTO,NIFTY_METAL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-06-12,-0.008389,-0.006703,-0.012919,-0.000953,-0.016664,-0.015634
2025-06-13,0.000233,-0.009949,-0.010525,-0.002317,-0.003612,-0.009641
2025-06-16,0.015574,0.007492,0.006328,0.002467,0.00175,0.010684
2025-06-17,0.007218,-0.004133,-0.003003,-0.019086,-0.00605,-0.014406
2025-06-18,-0.008297,0.002055,-0.004697,-0.001592,0.003655,-0.007197


In [3]:
returns_df.head()

Unnamed: 0_level_0,NIFTY_IT,NIFTY_BANK,NIFTY_FMCG,NIFTY_PHARMA,NIFTY_AUTO,NIFTY_METAL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-05,-0.010924,-0.002122,0.002093,-0.0032,0.012184,-0.004041
2015-01-06,-0.025996,-0.031334,-0.016056,-0.026237,-0.026293,-0.035394
2015-01-07,-0.004014,-0.006887,-0.001121,-0.003093,0.000788,-0.016142
2015-01-08,0.009314,0.021465,0.02011,0.012431,0.017127,0.015682
2015-01-12,0.046592,0.008474,0.01133,0.003802,0.001493,-0.017035
