## In the beginning...

#### Load the data

In [1]:
import pickle
import pandas as pd

In [2]:
#Load the dictionary of industries from Clean File Part 1 in the Black Litterman page 
with open('9782_industries.pkl', 'rb') as f:
    industry_lists = pickle.load(f)
    
#These are industries; each one is a list of all tickers belonging to that industry (See other project for classification)
print(industry_lists.keys())

dict_keys(['Energy', 'Manufacturing', 'Industrial Applications & Services', 'Financials', 'Life Sciences', 'Real Estate & Construction', 'Trade & Services', 'Technology', 'No Industry'])


In [3]:
#Load the covariance matrix from Clean File Part 2 in the Black Litterman page 
with open('universe_covariance.pkl', 'rb') as f:
    sigma = pickle.load(f)

#Show covariance
sigma

Unnamed: 0,JJSF,DGSE,ELA,PLXS,RMCF,HNGR,ADX,ORCL,MSFT,AADR,...,QEP,SIX,CORN,BNO,VPG,FN,BSFT,CBOE,SANW,TSLA
JJSF,0.000700,-1.064610e-06,0.000162,0.000315,0.000133,0.000400,0.000256,0.000218,0.000225,0.000216,...,0.000788,0.000553,0.000040,0.000232,3.168733e-04,0.000246,0.0,2.191917e-04,1.756365e-04,0.000275
DGSE,-0.000001,1.455986e-03,0.000001,0.000003,-0.000008,-0.000004,0.000004,0.000004,0.000013,0.000007,...,-0.000035,-0.000012,0.000002,-0.000006,8.199031e-07,0.000005,0.0,-1.021150e-07,6.340566e-08,0.000013
ELA,0.000162,1.229681e-06,0.001561,0.000201,0.000069,0.000192,0.000180,0.000180,0.000252,0.000178,...,0.000454,0.000239,0.000023,0.000177,1.849271e-04,0.000210,0.0,1.303568e-04,6.044076e-05,0.000334
PLXS,0.000315,3.372086e-06,0.000201,0.000721,0.000140,0.000382,0.000268,0.000242,0.000278,0.000236,...,0.000857,0.000548,0.000043,0.000262,3.354600e-04,0.000290,0.0,2.031077e-04,1.533405e-04,0.000380
RMCF,0.000133,-7.714566e-06,0.000069,0.000140,0.001175,0.000163,0.000095,0.000075,0.000075,0.000079,...,0.000393,0.000247,0.000014,0.000107,1.355746e-04,0.000098,0.0,7.579879e-05,6.746525e-05,0.000140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FN,0.000246,5.198962e-06,0.000210,0.000290,0.000098,0.000287,0.000244,0.000255,0.000309,0.000231,...,0.000630,0.000383,0.000034,0.000226,2.716795e-04,0.000715,0.0,1.630896e-04,1.287351e-04,0.000397
BSFT,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000e+00,0.000000e+00,0.000000
CBOE,0.000219,-1.021150e-07,0.000130,0.000203,0.000076,0.000259,0.000183,0.000155,0.000186,0.000155,...,0.000454,0.000349,0.000026,0.000145,2.024563e-04,0.000163,0.0,4.392968e-04,9.260510e-05,0.000209
SANW,0.000176,6.340566e-08,0.000060,0.000153,0.000067,0.000209,0.000122,0.000116,0.000097,0.000103,...,0.000442,0.000270,0.000019,0.000130,1.611557e-04,0.000129,0.0,9.260510e-05,1.949239e-03,0.000089


#### Sort all the data

Sort the sublists alphabetically; re-order covariance matrix alphabetically

In [4]:
#Sort the industry lists 
for industry in industry_lists.keys():
    industry_lists[industry].sort()
    
print(f"\033[1mGeneral info regarding industry classifications:\033[0m\n")
    
# Check the structure of industry_lists
for industry, tickers in industry_lists.items():
    print(f"Industry: {industry}")
    print(f"Number of tickers: {len(tickers)}")
    print(f"Sample tickers: {tickers[:5]}")
    print("-" * 40)

[1mGeneral info regarding industry classifications:[0m

Industry: Energy
Number of tickers: 726
Sample tickers: ['AAL', 'AAU', 'AAV', 'AAWW', 'ABX']
----------------------------------------
Industry: Manufacturing
Number of tickers: 830
Sample tickers: ['AA', 'AAOI', 'AAPL', 'AAXN', 'ABB']
----------------------------------------
Industry: Industrial Applications & Services
Number of tickers: 541
Sample tickers: ['A', 'AAC', 'ABAC', 'ABAX', 'ABIO']
----------------------------------------
Industry: Financials
Number of tickers: 450
Sample tickers: ['AAMC', 'AAME', 'AAN', 'AC', 'ACFC']
----------------------------------------
Industry: Life Sciences
Number of tickers: 925
Sample tickers: ['AAAP', 'ABBV', 'ABCL', 'ABEO', 'ABUS']
----------------------------------------
Industry: Real Estate & Construction
Number of tickers: 774
Sample tickers: ['AACQ', 'AAIC', 'AAT', 'AB', 'ABIL']
----------------------------------------
Industry: Trade & Services
Number of tickers: 714
Sample tickers:

In [5]:
#Sort columns and rows appropriately for the covariance matrix 
sigma_sorted = sigma[sorted(sigma.columns)].loc[sorted(sigma.columns)]

sigma_sorted

Unnamed: 0,A,AA,AAA,AAAP,AAAU,AABA,AAC,AACG,AACQ,AADR,...,ZTO,ZTR,ZTS,ZUMZ,ZUO,ZVO,ZX,ZYME,ZYNE,ZYXI
A,3.782604e-04,3.411576e-04,-3.154595e-08,0.0,1.386239e-05,2.575238e-05,9.207180e-05,1.907090e-05,1.670327e-06,2.057216e-04,...,1.259930e-04,2.019596e-04,2.415607e-04,2.232941e-04,2.574872e-04,3.005062e-04,0.0,2.208862e-04,1.876770e-04,2.300076e-04
AA,3.411576e-04,1.620141e-03,5.419161e-08,0.0,1.565142e-05,3.522545e-05,1.337583e-04,7.336827e-05,6.756444e-06,3.386056e-04,...,1.654733e-04,4.471540e-04,2.984082e-04,5.845925e-04,3.144021e-04,5.411544e-04,0.0,3.035570e-04,4.148685e-04,3.320878e-04
AAA,-3.154595e-08,5.419161e-08,6.444340e-08,0.0,-6.047095e-08,-1.818307e-08,3.478699e-08,2.330716e-08,-8.144443e-09,-6.408813e-08,...,-1.099783e-07,-4.566062e-08,-6.836731e-08,6.063322e-09,-6.117898e-08,-1.847619e-07,0.0,-5.946139e-08,5.598563e-08,3.188648e-08
AAAP,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.000000e+00
AAAU,1.386239e-05,1.565142e-05,-6.047095e-08,0.0,1.064761e-04,2.076477e-06,2.259957e-06,9.949352e-06,1.816590e-07,1.936826e-05,...,1.718002e-05,1.996621e-05,1.758020e-05,2.607238e-06,2.584129e-05,4.063646e-05,0.0,1.542429e-05,3.957138e-05,2.815634e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZVO,3.005062e-04,5.411544e-04,-1.847619e-07,0.0,4.063646e-05,3.277571e-05,1.285233e-04,1.061032e-04,5.853913e-06,2.918073e-04,...,1.792483e-04,3.385109e-04,3.151356e-04,3.089164e-04,3.250365e-04,4.839570e-03,0.0,3.300017e-04,3.534790e-04,3.581224e-04
ZX,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.000000e+00
ZYME,2.208862e-04,3.035570e-04,-5.946139e-08,0.0,1.542429e-05,2.279978e-05,9.909730e-05,3.573558e-05,3.143606e-06,1.927900e-04,...,1.192612e-04,1.987061e-04,2.226742e-04,1.955565e-04,2.308641e-04,3.300017e-04,0.0,1.312699e-03,1.972736e-04,2.337508e-04
ZYNE,1.876770e-04,4.148685e-04,5.598563e-08,0.0,3.957138e-05,2.400661e-05,8.253278e-05,8.659198e-05,6.085250e-07,2.202875e-04,...,1.301432e-04,2.507770e-04,1.906399e-04,2.136862e-04,2.774647e-04,3.534790e-04,0.0,1.972736e-04,3.063507e-03,2.882663e-04


In [6]:
## Ensure sorting was done correctly

# Get the list of all tickers from the columns of the sigma matrix
tickers = sigma.columns.tolist()

# Initialize counters for matches and mismatches
match_count = 0
mismatch_count = 0

# Loop through all tickers and check the covariance values for each one with itself
for ticker in tickers:
    # Covariance between ticker and itself in the original sigma matrix
    cov_original = sigma.loc[ticker, ticker]

    # Covariance between ticker and itself in the sorted sigma matrix
    cov_sorted = sigma_sorted.loc[ticker, ticker]

    # Check if the covariance values match
    if cov_original == cov_sorted:
        match_count += 1
    else:
        mismatch_count += 1

# After the loop, print the results
print(f"Number of matching covariances: {match_count}")
print(f"Number of mismatching covariances: {mismatch_count}")

Number of matching covariances: 9782
Number of mismatching covariances: 0


### Begin testing plotter

In [7]:
# Import the function from the new file, and calculate display range as before
from new_plotter import plot_industries_with_zoom

# Calculate overall mean and standard deviation
overall_mean = sigma_sorted.values.mean()
overall_std = sigma_sorted.values.std()

# Set vmin and vmax to -2 and +2 standard deviations
vmin = overall_mean - 2 * overall_std
vmax = overall_mean + 2 * overall_std

In [8]:
# # Define the industries and covariance matrix you want to plot
# industries_to_plot = ["Energy", "Manufacturing"]

# # Test the plot function
# plot_industries_with_zoom(
#     industries=industries_to_plot,
#     industry_tickers=industry_lists,  # Use industry tickers dictionary
#     covariance_matrix=sigma_sorted,  # Covariance matrix sorted by tickers
#     vmin=vmin,
#     vmax=vmax
# )

In [9]:
# # Define the industries you want to plot
# industries_to_plot = ["Energy", "Manufacturing", "Technology", "Trade & Services"]

# # Test the plot function with 3 industries
# plot_industries_with_zoom(
#     industries=industries_to_plot,
#     industry_tickers=industry_lists,  # Use industry tickers dictionary
#     covariance_matrix=sigma_sorted,  # Covariance matrix sorted by tickers
#     vmin=vmin,
#     vmax=vmax
# )


In [18]:
#Pickle industry (sorted) and sigma (sorted)
with open("sorted_industries.pkl", "wb") as f:
    pickle.dump(industry_lists, f)
print("Sorted industry lists have been pickled as 'sorted_industries.pkl'")


# Pickling the DataFrame directly as 'sorted_covariance.pkl'
sigma_sorted.to_pickle("sorted_covariance.pkl")

print("Sorted covariance dataFrame has been pickled as 'sorted_covariance.pkl'")

Sorted industry lists have been pickled as 'sorted_industries.pkl'
Sorted covariance dataFrame has been pickled as 'sorted_covariance.pkl'
