In [1]:
import sys
from pathlib import Path

# Notebook cell
%load_ext autoreload
%autoreload 2

# Get root directory (assuming notebook is in root/notebooks/)
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR

# Add src directory to Python path
sys.path.append(str(ROOT_DIR / 'src'))

# Verify path
print(f"Python will look in these locations:\n{sys.path}")


# --- Execute the processor ---
import utils

SOURCE_PATH_OHLCV = '..\data\df_OHLCV_2025-03-14_clean.pkl'
SOURCE_PATH = '..\data\df_finviz_n_ratios.pkl'
PICKLE_PATH_COVARIANCE = '..\data\df_covariance_matrix.pkl'
PICKLE_PATH_CORRELATION = '..\data\df_correlation_matrix.pkl'

Python will look in these locations:
['C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5\\python310.zip', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5\\DLLs', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.10.5', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv', '', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages\\win32', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages\\win32\\lib', 'c:\\Users\\ping\\Files_win10\\python\\py310\\.venv\\lib\\site-packages\\Pythonwin', 'c:\\Users\\ping\\Files_win10\\python\\py310\\stocks\\src']


In [2]:
import pandas as pd

df_ohlcv = pd.read_pickle(SOURCE_PATH_OHLCV)
df_finviz = pd.read_pickle(SOURCE_PATH)

print(f"Load df_ohlcv shape: {df_ohlcv.shape}")
print(f"Loaded df_finviz shape: {df_finviz.shape}")


Load df_ohlcv shape: (344750, 9)
Loaded df_finviz shape: (1379, 50)


In [3]:
# Combine indices from both dataframes and remove duplicates
symbols = list(set(df_finviz.index))
print(f"Total unique symbols: {len(symbols)}")

Total unique symbols: 1379


In [4]:
df_close = df_ohlcv['Adj Close'].unstack(level=0)
print(f"Shape of df_close: {df_close.shape}")
display(df_close.head())

Shape of df_close: (250, 1379)


Symbol,UBS,INTC,ADI,IBN,TD,SAN,MELI,SMFG,LRCX,KKR,...,WBA,DTM,EHC,BROS,ERJ,WIX,HESM,AOS,BEPC,PCVX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-03-14,32.73,24.05,208.75,29.29,58.93,6.77,2021.37,15.9,78.69,113.31,...,11.23,95.79,95.98,62.48,52.33,172.59,42.09,66.85,28.22,74.37
2025-03-13,31.71,23.7,203.8,28.95,57.67,6.45,1989.7,15.48,75.26,108.69,...,11.26,93.33,93.68,59.61,51.0,172.83,40.68,65.45,28.32,73.02
2025-03-12,31.94,20.68,206.62,28.79,58.33,6.5,2004.83,15.29,76.59,114.37,...,11.2,93.43,94.28,62.31,50.6,175.0,40.64,65.64,28.93,73.24
2025-03-11,31.38,19.78,209.08,28.52,57.67,6.39,1990.11,14.89,73.36,112.11,...,11.21,89.9,96.99,60.17,50.59,175.87,40.63,67.11,28.3,69.85
2025-03-10,31.88,19.93,216.26,28.09,58.75,6.39,1944.61,14.91,73.81,108.17,...,11.28,86.95,99.11,58.41,51.0,170.4,40.34,69.61,28.61,68.89


In [5]:
# Assume `symbols` is your predefined list of symbols
common_symbols = df_close.columns.intersection(symbols)
df_filtered = df_close[common_symbols]

# Assuming `symbols` is your original list of symbols
missing_symbols = list(set(symbols) - set(df_filtered.columns))

# Print the missing symbols (if any)
if missing_symbols:
    print(f"{len(missing_symbols)} symbols not found in df_filtered:")
    print(missing_symbols)
else:
    print("All symbols in your list are present in df_filtered.")

print(f"\nShape of filtered df_close: {df_filtered.shape}")
display(df_filtered)

All symbols in your list are present in df_filtered.

Shape of filtered df_close: (250, 1379)


Symbol,UBS,INTC,ADI,IBN,TD,SAN,MELI,SMFG,LRCX,KKR,...,WBA,DTM,EHC,BROS,ERJ,WIX,HESM,AOS,BEPC,PCVX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-03-14,32.73,24.05,208.75,29.29,58.93,6.77,2021.37,15.90,78.69,113.31,...,11.23,95.79,95.98,62.48,52.33,172.59,42.09,66.85,28.22,74.37
2025-03-13,31.71,23.70,203.80,28.95,57.67,6.45,1989.70,15.48,75.26,108.69,...,11.26,93.33,93.68,59.61,51.00,172.83,40.68,65.45,28.32,73.02
2025-03-12,31.94,20.68,206.62,28.79,58.33,6.50,2004.83,15.29,76.59,114.37,...,11.20,93.43,94.28,62.31,50.60,175.00,40.64,65.64,28.93,73.24
2025-03-11,31.38,19.78,209.08,28.52,57.67,6.39,1990.11,14.89,73.36,112.11,...,11.21,89.90,96.99,60.17,50.59,175.87,40.63,67.11,28.30,69.85
2025-03-10,31.88,19.93,216.26,28.09,58.75,6.39,1944.61,14.91,73.81,108.17,...,11.28,86.95,99.11,58.41,51.00,170.40,40.34,69.61,28.61,68.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-21,30.45,41.98,191.96,25.76,57.17,4.37,1555.69,11.71,95.55,101.05,...,19.45,57.85,76.74,35.62,24.73,139.89,33.25,88.35,22.87,67.64
2024-03-20,30.42,41.76,191.15,25.96,57.11,4.36,1528.84,11.67,92.29,98.03,...,19.72,56.85,76.11,34.24,25.48,141.79,32.80,86.15,22.86,69.13
2024-03-19,30.35,41.62,187.43,25.68,56.55,4.30,1519.44,11.54,90.54,95.21,...,19.36,57.07,76.70,33.71,25.07,136.52,32.84,85.30,22.26,67.60
2024-03-18,30.74,42.27,188.10,25.73,56.29,4.27,1509.05,11.63,89.98,94.60,...,19.34,55.98,75.44,34.15,23.48,138.52,32.78,84.40,22.27,68.04


In [6]:
# Compute the covariance matrix for df_filtered
covariance_matrix = df_filtered.cov()

# Display the matrix
display(covariance_matrix)

Symbol,UBS,INTC,ADI,IBN,TD,SAN,MELI,SMFG,LRCX,KKR,...,WBA,DTM,EHC,BROS,ERJ,WIX,HESM,AOS,BEPC,PCVX
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
UBS,3.514023,-4.838821,10.523315,1.059734,0.888931,0.506458,225.362045,1.782649,-5.283122,28.119065,...,-2.339480,21.922386,8.995748,18.664307,8.798726,48.004099,3.542561,-8.240084,1.542968,9.324018
INTC,-4.838821,41.931439,-30.613862,-7.175721,-3.226118,-0.818172,-1057.584253,-4.503653,47.940608,-97.022815,...,17.270051,-75.405558,-35.453597,-34.256649,-29.322482,-136.328118,-8.287450,29.382668,-7.563055,-60.146891
ADI,10.523315,-30.613862,184.578105,9.840432,3.554283,1.519106,1474.189736,7.209775,10.347198,77.701306,...,-23.768142,52.003589,37.721262,30.079545,24.562208,127.069344,8.082793,-8.773732,26.012048,100.584318
IBN,1.059734,-7.175721,9.840432,2.687374,0.530907,0.043873,240.777838,1.101896,-9.182897,26.272668,...,-4.633861,17.875178,9.179640,6.099886,5.585714,29.837366,1.300661,-4.813250,2.717335,19.899536
TD,0.888931,-3.226118,3.554283,0.530907,7.444197,0.581578,304.629101,0.230596,-6.692293,4.054826,...,-2.344558,2.630525,4.842879,2.979977,6.428793,-4.080552,2.226851,0.893146,-0.153296,17.151229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WIX,48.004099,-136.328118,127.069344,29.837366,-4.080552,4.938813,3462.931608,35.602278,-156.440177,620.877411,...,-59.840507,492.484437,174.139984,366.328292,158.561099,1124.774720,64.199481,-191.248096,21.754830,155.911131
HESM,3.542561,-8.287450,8.082793,1.300661,2.226851,0.877833,282.840024,2.724975,-8.176600,31.467666,...,-3.554317,30.295546,11.405219,30.733543,13.965713,64.199481,6.508338,-13.732434,0.150208,4.575581
AOS,-8.240084,29.382668,-8.773732,-4.813250,0.893146,-1.852950,-721.969568,-7.047116,39.239688,-103.753248,...,11.000505,-96.532788,-35.894663,-81.919186,-37.205567,-191.248096,-13.732434,49.437869,-2.016417,-15.493686
BEPC,1.542968,-7.563055,26.012048,2.717335,-0.153296,0.181490,351.195262,0.920280,-6.259255,21.891219,...,-5.070280,13.360712,10.223722,1.869795,5.256682,21.754830,0.150208,-2.016417,7.456425,26.446137


In [7]:
# Compute the correlation matrix
correlation_matrix = df_filtered.corr()

# Display the top of the matrix (optional)
display(correlation_matrix)

Symbol,UBS,INTC,ADI,IBN,TD,SAN,MELI,SMFG,LRCX,KKR,...,WBA,DTM,EHC,BROS,ERJ,WIX,HESM,AOS,BEPC,PCVX
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
UBS,1.000000,-0.398628,0.413200,0.344850,0.173803,0.536320,0.552664,0.760604,-0.283884,0.700845,...,-0.381689,0.722564,0.659425,0.728848,0.721688,0.763560,0.740764,-0.625172,0.301432,0.307261
INTC,-0.398628,1.000000,-0.347983,-0.675977,-0.182600,-0.250817,-0.750807,-0.556276,0.745739,-0.700047,...,0.815675,-0.719489,-0.752352,-0.387260,-0.696246,-0.627744,-0.501668,0.645344,-0.427722,-0.573787
ADI,0.413200,-0.347983,1.000000,0.441835,0.095886,0.221963,0.498823,0.424450,0.076716,0.267215,...,-0.535055,0.236502,0.381528,0.162072,0.277977,0.278880,0.233204,-0.091847,0.701163,0.457349
IBN,0.344850,-0.675977,0.441835,1.000000,0.118699,0.053127,0.675205,0.537616,-0.564246,0.748796,...,-0.864514,0.673716,0.769471,0.272386,0.523898,0.542705,0.311003,-0.417585,0.607035,0.749870
TD,0.173803,-0.182600,0.095886,0.118699,1.000000,0.423138,0.513269,0.067599,-0.247069,0.069436,...,-0.262812,0.059570,0.243908,0.079952,0.362287,-0.044594,0.319924,0.046557,-0.020576,0.388323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WIX,0.763560,-0.627744,0.278880,0.542705,-0.044594,0.292330,0.474673,0.849063,-0.469860,0.864961,...,-0.545702,0.907299,0.713504,0.799586,0.726935,1.000000,0.750350,-0.811025,0.237551,0.287178
HESM,0.740764,-0.501668,0.233204,0.311003,0.319924,0.683061,0.509670,0.854324,-0.322842,0.576305,...,-0.426103,0.733727,0.614327,0.881871,0.841705,0.750350,1.000000,-0.765566,0.021562,0.110795
AOS,-0.625172,0.645344,-0.091847,-0.417585,0.046557,-0.523138,-0.472033,-0.801635,0.562145,-0.689438,...,0.478493,-0.848273,-0.701505,-0.852871,-0.813598,-0.811025,-0.765566,1.000000,-0.105023,-0.136123
BEPC,0.301432,-0.427722,0.701163,0.607035,-0.020576,0.131938,0.591243,0.269556,-0.230893,0.374566,...,-0.567884,0.302312,0.514487,0.050125,0.295991,0.237551,0.021562,-0.105023,1.000000,0.598279


In [8]:
covariance_matrix.to_pickle(PICKLE_PATH_COVARIANCE)

correlation_matrix.to_pickle(PICKLE_PATH_CORRELATION)

In [9]:
display(covariance_matrix.info())
display(correlation_matrix.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1379 entries, UBS to PCVX
Columns: 1379 entries, UBS to PCVX
dtypes: float64(1379)
memory usage: 14.6+ MB


None

<class 'pandas.core.frame.DataFrame'>
Index: 1379 entries, UBS to PCVX
Columns: 1379 entries, UBS to PCVX
dtypes: float64(1379)
memory usage: 14.6+ MB


None