In [25]:
import pandas as pd
import numpy as np
import os

In [26]:
# ---- Select Dataset Source ----
# Options: 'NIFTY50', 'NASDAQ', 'Crypto'
dataset_option = 'Crypto'  # Change as needed

if dataset_option == 'NIFTY50':
    DATA_PATH = 'NIFTY50_Combined.csv'
elif dataset_option == 'NASDAQ':
    DATA_PATH = 'NASDAQ100_Combined.csv'
elif dataset_option == 'Crypto':
    DATA_PATH = 'CRYPTO50_Combined.csv'
else:
    raise ValueError('Invalid dataset option selected!')

print(f"Using dataset: {DATA_PATH}")

Using dataset: CRYPTO50_Combined.csv


In [27]:

RESULTS_DIR = "results"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [28]:
# Ensure results subdirectory for selected dataset exists
os.makedirs(os.path.join(RESULTS_DIR, dataset_option), exist_ok=True)

In [29]:
df = pd.read_csv(DATA_PATH)

In [30]:
print("Columns in dataset:", df.columns)

Columns in dataset: Index(['Date', 'Company', 'Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')


In [31]:
# 2. Pivot table: each column = stock, values = Adj Close
price_df = df.pivot(index="Date", columns="Company", values="Close")

In [32]:
price_df.head()

Company,Aave,Algorand,Avalanche,BNB,Bitcoin,Bitcoin Cash,Cardano,Celo,Chainlink,Cosmos,...,Tether,Tezos,Toncoin,UNUS SED LEO,USD Coin,Uniswap,Wrapped Bitcoin,XRP,Zcash,yearn.finance
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01,52.045834,0.177466,10.865915,244.136978,16625.080078,96.95507,0.249771,0.480663,5.622443,9.464469,...,0.999692,0.719059,1.059733,3.523903,0.999993,0.0001,16561.123047,0.338763,37.189072,5137.62207
2023-01-02,53.083157,0.181194,11.153615,245.535904,16688.470703,99.310051,0.253828,0.491749,5.687627,9.802422,...,0.999771,0.739795,1.060018,3.498005,0.999989,0.0001,16659.056641,0.348556,39.523411,5369.373535
2023-01-03,53.040043,0.184302,11.38445,246.133362,16679.857422,99.649353,0.252796,0.509733,5.622096,10.161021,...,0.999759,0.753744,1.058663,3.496508,1.000098,0.0001,16660.253906,0.343824,39.985916,5331.855957
2023-01-04,56.787495,0.187093,12.077613,259.11969,16863.238281,101.482735,0.267677,0.507278,5.804032,10.198363,...,0.999763,0.76699,1.059235,3.510222,1.000004,0.000101,16845.671875,0.347794,40.710484,5379.364258
2023-01-05,55.387627,0.183544,11.740196,256.422852,16836.736328,101.084549,0.269102,0.493617,5.647835,10.076512,...,0.999697,0.761851,1.072343,3.511864,1.000021,0.000101,16812.716797,0.338039,39.746597,5389.466797


In [33]:
# 3. Calculate daily returns (percentage change)
returns_df = price_df.pct_change().dropna()

  returns_df = price_df.pct_change().dropna()


In [34]:
# 4. Calculate expected returns (mean of daily returns)
expected_returns = returns_df.mean()

In [35]:
# 5. Calculate covariance matrix of returns
cov_matrix = returns_df.cov()

In [36]:
returns_df.to_csv(os.path.join(RESULTS_DIR, f"{dataset_option}/daily_returns.csv"))
expected_returns.to_csv(os.path.join(RESULTS_DIR, f"{dataset_option}/expected_returns.csv"))
cov_matrix.to_csv(os.path.join(RESULTS_DIR, f"{dataset_option}/cov_matrix.csv"))

In [37]:

print("✅ Preprocessing done. Files saved in /results")
print("Expected Returns (first few):\n", expected_returns.head())
print("Covariance Matrix shape:", cov_matrix.shape)

✅ Preprocessing done. Files saved in /results
Expected Returns (first few):
 Company
Aave         0.004549
Algorand     0.003807
Avalanche    0.004228
BNB          0.003021
Bitcoin      0.003079
dtype: float64
Covariance Matrix shape: (42, 42)
