In [2]:
import numpy as np
import pandas as pd
import networkx as nx
from PMFG import PMFG

In [3]:
raw_asset_prices_df = pd.read_csv("IVV_historical.csv", index_col='Date')
log_returns_df = np.log(raw_asset_prices_df).diff().dropna()
# drop first row of raw prices so it has the same dimensions as the log-returns DF
raw_asset_prices_df = raw_asset_prices_df.iloc[1:]

df_shape = (raw_asset_prices_df.shape)
print(f"There are {df_shape[0]} rows and {df_shape[1]} columns in the dataset.")
print(f"Data timeperiod covers: {raw_asset_prices_df.index[0]} to {raw_asset_prices_df.index[-1]}")

raw_asset_prices_df.head()

There are 251 rows and 504 columns in the dataset.
Data timeperiod covers: 2020/6/22 to 2021/6/18


Unnamed: 0_level_0,AAPL,MSFT,AMZN,FB,GOOGL,GOOG,JPM,TSLA,JNJ,UNH,...,UNM,NOV,PRGO,RL,FOX,DISCA,HFC,UAA,UA,NWS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020/6/22,89.717499,200.570007,2713.820068,239.220001,1450.660034,1451.859985,96.75,198.863998,143.389999,292.670013,...,16.41,11.81,55.18,73.339996,27.6,21.84,31.73,9.51,8.58,11.99
2020/6/23,91.6325,201.910004,2764.409912,242.240005,1463.97998,1464.410034,97.93,200.356003,142.860001,297.600006,...,16.42,12.13,55.07,73.459999,27.809999,22.16,31.48,9.86,8.93,12.07
2020/6/24,90.014999,197.839996,2734.399902,234.020004,1432.699951,1431.969971,94.660004,192.169998,139.820007,289.179993,...,15.51,11.19,53.950001,70.650002,27.01,21.719999,29.58,9.47,8.58,11.69
2020/6/25,91.209999,200.339996,2754.580078,235.679993,1441.099976,1441.329956,97.959999,197.195999,139.669998,296.220001,...,16.35,12.29,54.73,70.32,26.75,21.26,29.639999,9.6,8.67,11.77
2020/6/26,88.407501,196.330002,2692.870117,216.080002,1362.540039,1359.900024,92.589996,191.947998,137.809998,286.880005,...,15.52,12.19,53.939999,67.699997,25.75,20.26,28.209999,9.11,8.26,11.54


In [4]:
raw_corr = log_returns_df.corr()

shr_coef = 1e-4
#shr_target=np.ones((df_shape[1], df_shape[1]))
shr_target=np.eye(df_shape[1])

correlation_matrix = raw_corr*(1-shr_coef) + shr_target*shr_coef
print('Condition number of sample correlation matrix: %.2e' %np.linalg.cond(raw_corr))
print('Condition number of shrunk correlation matrix: %.2e' %np.linalg.cond(correlation_matrix))
correlation_matrix.head()

Condition number of sample correlation matrix: 4.02e+19
Condition number of shrunk correlation matrix: 1.49e+06


Unnamed: 0,AAPL,MSFT,AMZN,FB,GOOGL,GOOG,JPM,TSLA,JNJ,UNH,...,UNM,NOV,PRGO,RL,FOX,DISCA,HFC,UAA,UA,NWS
AAPL,1.0,0.6942,0.691205,0.634493,0.522702,0.512583,0.029423,0.492938,0.220624,0.205805,...,0.025386,-0.016511,0.115755,-0.01563,0.113051,0.024639,0.014589,-0.048178,-0.046476,0.174052
MSFT,0.6942,1.0,0.741015,0.608564,0.717165,0.705709,0.040791,0.500559,0.284647,0.324833,...,-0.011969,-0.025407,0.099012,-0.058433,0.072387,-0.047254,-0.014123,-0.063651,-0.067568,0.198791
AMZN,0.691205,0.741015,1.0,0.688021,0.622603,0.626005,-0.070769,0.478916,0.109943,0.217174,...,-0.123943,-0.08961,-0.038883,-0.092398,0.002334,-0.070554,-0.108014,-0.055348,-0.060529,0.113214
FB,0.634493,0.608564,0.688021,1.0,0.658567,0.657867,0.020714,0.349706,0.187031,0.319269,...,-0.031311,-0.046235,0.007846,-0.03758,0.074092,0.007478,-0.002316,0.010992,0.011035,0.176672
GOOGL,0.522702,0.717165,0.622603,0.658567,1.0,0.993162,0.189587,0.377768,0.2816,0.377338,...,0.127267,0.16843,0.14018,0.083248,0.154922,0.043158,0.147497,0.181127,0.186247,0.266149


In [36]:
fully_connected = nx.Graph()
nodes_list = correlation_matrix.columns.values.tolist()

for i in range(len(nodes_list)):
    fully_connected.add_node(nodes_list[i])

for i in range(len(nodes_list)):
    for j in range(i+1, len(nodes_list)):
        fully_connected.add_edge(nodes_list[i], nodes_list[j], weight=correlation_matrix.iloc[i,j])

In [43]:
pmfg = PMFG(graph=fully_connected)