In [6]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [4]:
#!pip install yfinance

In [7]:
tickers = ['HDFCBANK.NS','RELIANCE.NS','JSWENERGY.NS','LICI.NS','ZOMATO.NS']

In [19]:
stock_data = yf.download(tickers, start='2023-01-01', end = '2025-01-01')['Close']

[*********************100%***********************]  5 of 5 completed


In [20]:
stock_data.head()

Ticker,HDFCBANK.NS,JSWENERGY.NS,LICI.NS,RELIANCE.NS,ZOMATO.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-02,1588.541748,290.2771,696.21521,1176.415771,60.299999
2023-01-03,1598.929077,286.717285,720.845215,1167.806885,58.950001
2023-01-04,1570.351685,281.278595,717.312622,1150.223877,56.349998
2023-01-05,1560.256714,277.867035,714.172546,1148.168701,56.200001
2023-01-06,1555.087524,275.691559,694.056396,1158.604248,55.25


In [24]:
returns = np.log(stock_data/stock_data.shift(1)).dropna()
returns.head()

Ticker,HDFCBANK.NS,JSWENERGY.NS,LICI.NS,RELIANCE.NS,ZOMATO.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-03,0.006518,-0.012339,0.034766,-0.007345,-0.022642
2023-01-04,-0.018034,-0.019151,-0.004913,-0.015171,-0.045107
2023-01-05,-0.006449,-0.012203,-0.004387,-0.001788,-0.002665
2023-01-06,-0.003319,-0.00786,-0.028571,0.009048,-0.017048
2023-01-09,0.001942,0.020239,0.006061,0.023337,0.013483


In [25]:
scaler = StandardScaler()

In [28]:
returns_scaled = scaler.fit_transform(returns)
returns_scaled

array([[ 0.5016973 , -0.50068784,  1.69998752, -0.57964462, -1.0671817 ],
       [-1.45548584, -0.74497369, -0.26886307, -1.19172384, -1.99779345],
       [-0.53196173, -0.49579583, -0.24278715, -0.14507708, -0.23963401],
       ...,
       [ 0.31530818, -0.66616799, -0.29845617,  0.28355262, -0.42595642],
       [-0.92510535,  0.94208294,  0.99370471, -0.67097148,  1.49550116],
       [-0.24460886, -0.10556883, -0.76497016,  0.30103356, -0.72825385]])

In [45]:
n_components = 5
pca = PCA(n_components = n_components)
pca.fit(returns_scaled)

In [46]:
explained_variance = pca.explained_variance_ratio_

In [47]:
factor_loadings = pd.DataFrame(pca.components_.T, index=returns.columns, columns=[f"PC{i+1}" for i in range(n_components)])

In [48]:
print("Explained Variance Ratio:", explained_variance)
print("Factor Loadings:\n", factor_loadings)

Explained Variance Ratio: [0.4076576  0.1753371  0.16784807 0.12984915 0.11930809]
Factor Loadings:
                    PC1       PC2       PC3       PC4       PC5
Ticker                                                        
HDFCBANK.NS   0.425774  0.586421 -0.427534  0.162626 -0.515358
JSWENERGY.NS  0.465935 -0.478615  0.239222 -0.485065 -0.511192
LICI.NS       0.473003 -0.421754  0.038511  0.763258  0.119776
RELIANCE.NS   0.488543 -0.026943 -0.458398 -0.393247  0.629149
ZOMATO.NS     0.373117  0.498433  0.740524 -0.032533  0.250826
