In [4]:
import scipy.cluster.hierarchy as sch
import numpy as np

def plot_clustered_correlation_heatmap(combined_pct_change_df):
    # Calculate the correlation matrix
    corr_matrix = combined_pct_change_df.corr()
    
    # Convert correlation matrix to distance matrix (1 - absolute correlation)
    distance_matrix = 1 - np.abs(corr_matrix)
    
    # Perform hierarchical clustering
    linkage = sch.linkage(distance_matrix, method='complete')
    dendro = sch.dendrogram(linkage, no_plot=True)
    
    # Reorder the correlation matrix according to the clustering
    reordered_corr_matrix = corr_matrix.iloc[dendro['leaves'], dendro['leaves']]
    
    # Plot the reordered correlation heatmap
    plt.figure(figsize=(48, 48))  # Increase figure size for better readability
    sns.heatmap(reordered_corr_matrix, annot=True, cmap='RdBu', center=0, 
                annot_kws={"size": 6}, fmt='.2f', cbar_kws={"shrink": 0.5}, 
                linewidths=0.1, square=True)
    
    # Set axis labels and title
    plt.xticks(rotation=45, ha="right", fontsize=14)
    plt.yticks(fontsize=14)
    plt.title('Clustered Correlation Heatmap of Percentage Changes', fontsize=18)
    plt.tight_layout()  # Adjust layout to fit everything
    
    # Save the plot to a PDF
    pdf.savefig()  # Save the heatmap to the PDF
    plt.close()

In [5]:
import os
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from matplotlib.backends.backend_pdf import PdfPages

# Create a folder for today's date
today = datetime.today().strftime('%Y-%m-%d')
data_folder = f"data/{today}"
output_folder = f"output/{today}"
os.makedirs(data_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# Function to download or load cached data
def get_data(ticker):
    file_path = f"{data_folder}/{ticker}.csv"
    
    # Check if file already exists
    if os.path.exists(file_path):
        print(f"Loading {ticker} data from {file_path}")
        try:
            return pd.read_csv(file_path, index_col=0, parse_dates=True)
        except Exception as e:
            print(f"Error loading cached file for {ticker}: {e}")
            return None
    else:
        print(f"Downloading {ticker} data and saving to {file_path}")
        try:
            df = yf.download(ticker, period='5y')
            if not df.empty:
                df.to_csv(file_path)
                return df
            else:
                print(f"Downloaded data for {ticker} is empty, skipping.")
                return None
        except Exception as e:
            print(f"Error downloading {ticker}: {e}")
            return None

all_assets = current_assets
'''
all_assets = [
    'SPY', 'USO', 'GLD', 'TLT', 'XLU', 'XLK', 'XLY', 'DBC', 'EEM', 'ZROZ',
    'IEF', 'XLP', 'XLRE', 'PDBC', 'GSG', 'DBA', 'JJM', 'COPX', 'TIP', 'UUP',
    'ITB', 'EFA', 'VWO', 'XLC', 'XLV', 'XLF', 'XLI', 'XLB', 'XLE', 'XLRE',
    'XLP', 'XLY', 'XLK', 'XLU', 'FXE', 'FXY', 'FXB', 'FXC', 'FXA', 'FXF',
    'CNY=X', 'CL=F', 'BZ=F', 'NG=F', 'RB=F', 'HO=F', 'GC=F', 'SI=F', 'HG=F',
    'PL=F', 'PA=F', 'ALI=F', 'ZC=F', 'ZW=F', 'ZS=F', 'KC=F', 'SB=F',
    'CT=F', 'CC=F', 'LB=F', 'LE=F', 'HE=F', 'GF=F']
'''

'''
all_assets = [
    'SPY', 'USO', 'GC.F', 'TLT', 'XLU', 'XLK', 'XLY', 'DBC', 'EEM', 'ZROZ', 
    'IEF', 'XLP', 'XLRE', 'PDBC', 'GSG', 'DBA', 'JJM', 'COPX', 'TIP', 'UUP', 
    'ITB', 'EFA', 'VWO', 'XLC', 'XLV', 'XLF', 'XLI', 'XLB', 'XLE', 'XLRE', 
    'XLP', 'XLY', 'XLK', 'XLU', 'FXE', 'FXY', 'FXB', 'FXC', 'FXA', 'FXF', 
    'CNY', 'CL=F', 'BZ=F', 'NG=F', 'RB=F', 'HO=F', 'GC=F', 'SI=F', 'HG=F', 
    'PL=F', 'PA=F', 'ALI=F', 'ZC=F', 'ZW=F', 'ZS=F', 'KC=F', 'SB=F', 
    'CT=F', 'CC=F', 'LB=F', 'LE=F', 'HE=F', 'GF=F']
''' 
# List of assets
#all_assets = [
#    'SPY', 'USO', 'GC=F', 'TLT', 'XLU', 'XLK', 'XLY', 'DBC', 'EEM', 'ZROZ', 
#    'IEF', 'XLP', 'XLRE', 'PDBC', 'GSG', 'DBA', 'JJM', 'COPX', 'TIP', 'UUP', 
#    'ITB', 'EFA', 'VWO', 'XLC', 'XLV', 'XLF', 'XLI', 'XLB', 'XLE', 'XLRE', 
#    'XLP', 'XLY', 'XLK', 'XLU', 'FXE', 'FXY', 'FXB', 'FXC', 'FXA', 'FXF', 
#    'CNY', 'CL=F', 'BZ=F', 'NG=F', 'RB=F', 'HO=F', 'GC=F', 'SI=F', 'HG=F', 
#    'PL=F', 'PA=F', 'ALI=F', 'ZC=F', 'ZW=F', 'ZS=F', 'KC=F', 'SB=F', 
#    'CT=F', 'CC=F', 'LB=F', 'LE=F', 'HE=F', 'GF=F'
#]

# Function to download or load data for all assets in parallel
def download_data_parallel(assets):
    df_dict = {}
    failed_tickers = []
    with ThreadPoolExecutor(max_workers=1) as executor:
        future_to_ticker = {executor.submit(get_data, asset): asset for asset in assets}
        for future in as_completed(future_to_ticker):
            asset = future_to_ticker[future]
            try:
                df = future.result()
                if df is not None and 'Adj Close' in df.columns:
                    df_dict[asset] = df['Adj Close']
                else:
                    print(f"Data for {asset} is invalid or missing 'Adj Close', skipping.")
                    failed_tickers.append(asset)
            except Exception as e:
                print(f"Error processing {asset}: {e}")
                failed_tickers.append(asset)
    if failed_tickers:
        print(f"Failed to download data for: {', '.join(failed_tickers)}")
    return df_dict

df_dict = download_data_parallel(all_assets)

# Ensure we have valid data before continuing
if len(df_dict) == 0:
    print("No valid data was downloaded, exiting...")
else:
    # Calculate percentage change for all assets
    pct_change_dict = {}
    for asset, df in df_dict.items():
        if not df.empty:
            pct_change = df.pct_change().dropna()
            if not pct_change.empty:
                pct_change_dict[asset] = pct_change

    # Convert to DataFrame for easier handling
    combined_pct_change_df = pd.DataFrame(pct_change_dict).dropna()

    # Check if the combined DataFrame is still valid
    if combined_pct_change_df.empty:
        print("No valid percentage change data, exiting...")
    else:
        # Save correlation heatmap to a PDF
        
        with PdfPages(f"{output_folder}/correlation_matrix.pdf") as pdf:
            
            plot_clustered_correlation_heatmap(combined_pct_change_df)

            

        print(f"Correlation matrix saved to {output_folder}/correlation_matrix.pdf")

NameError: name 'current_assets' is not defined

In [6]:
import os
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from matplotlib.backends.backend_pdf import PdfPages

# Create a folder for today's date
today = datetime.today().strftime('%Y-%m-%d')
data_folder = f"data/{today}"
output_folder = f"output/{today}"
os.makedirs(data_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# Function to download or load cached data
def get_data(ticker):
    file_path = f"{data_folder}/{ticker}.csv"
    
    # Check if file already exists
    if os.path.exists(file_path):
        print(f"Loading {ticker} data from {file_path}")
        try:
            return pd.read_csv(file_path, index_col=0, parse_dates=True)
        except Exception as e:
            print(f"Error loading cached file for {ticker}: {e}")
            return None
    else:
        print(f"Downloading {ticker} data and saving to {file_path}")
        try:
            df = yf.download(ticker, period='5y')
            if not df.empty:
                df.to_csv(file_path)
                return df
            else:
                print(f"Downloaded data for {ticker} is empty, skipping.")
                return None
        except Exception as e:
            print(f"Error downloading {ticker}: {e}")
            return None

all_assets = [
    'SPY', 'USO', 'GLD', 'TLT', 'XLU', 'XLK', 'XLY', 'DBC', 'EEM', 'ZROZ',
    'IEF', 'XLP', 'XLRE', 'PDBC', 'GSG', 'DBA', 'JJM', 'COPX', 'TIP', 'UUP',
    'ITB', 'EFA', 'VWO', 'XLC', 'XLV', 'XLF', 'XLI', 'XLB', 'XLE', 'XLRE',
    'XLP', 'XLY', 'XLK', 'XLU', 'FXE', 'FXY', 'FXB', 'FXC', 'FXA', 'FXF',
    'CNY=X', 'CL=F', 'BZ=F', 'NG=F', 'RB=F', 'HO=F', 'GC=F', 'SI=F', 'HG=F',
    'PL=F', 'PA=F', 'ALI=F', 'ZC=F', 'ZW=F', 'ZS=F', 'KC=F', 'SB=F',
    'CT=F', 'CC=F', 'LB=F', 'LE=F', 'HE=F', 'GF=F',
    'TNX', 'IRX', 'FVX', 'TYX', 'DGS10', 'DGS2', 'DGS5', 'DGS30',
    'FEDFUNDS', 'GDPC1', 'CPIAUCSL', 'UMCSENT', 'UNRATE', 'INDPRO',
    'HOUST', 'PERMIT', 'NAPM', 'NAPMNOI', 'DCOILWTICO',
    'XAR', 'XTN', 'XRT', 'XHB', 'XSD', 'XBI',
    'VIX',
    '^N225', 'HSI', '^STOXX50E', '^FTSE', '^GDAXI', '^FCHI', '^IBEX',
    'SSMI', 'BVSP', 'MXX', 'MERV', 'GSPTSE', 'AXJO', 'NZSE50FG'
]
'''
all_assets = [
    'SPY', 'USO', 'GC.F', 'TLT', 'XLU', 'XLK', 'XLY', 'DBC', 'EEM', 'ZROZ', 
    'IEF', 'XLP', 'XLRE', 'PDBC', 'GSG', 'DBA', 'JJM', 'COPX', 'TIP', 'UUP', 
    'ITB', 'EFA', 'VWO', 'XLC', 'XLV', 'XLF', 'XLI', 'XLB', 'XLE', 'XLRE', 
    'XLP', 'XLY', 'XLK', 'XLU', 'FXE', 'FXY', 'FXB', 'FXC', 'FXA', 'FXF', 
    'CNY', 'CL=F', 'BZ=F', 'NG=F', 'RB=F', 'HO=F', 'GC=F', 'SI=F', 'HG=F', 
    'PL=F', 'PA=F', 'ALI=F', 'ZC=F', 'ZW=F', 'ZS=F', 'KC=F', 'SB=F', 
    'CT=F', 'CC=F', 'LB=F', 'LE=F', 'HE=F', 'GF=F',
    'TNX', 'IRX', 'FVX', 'TYX', 'DGS10', 'DGS2', 'DGS5', 'DGS30', 
    'FEDFUNDS', 'GDPC1', 'CPIAUCSL', 'UMCSENT', 'UNRATE', 'INDPRO', 
    'HOUST', 'PERMIT', 'NAPM', 'NAPMNOI', 'DCOILWTICO',
    'XAR', 'XTN', 'XRT', 'XHB', 'XSD', 'XBI',
    'VIX', 
    'N225', 'HSI', 'STOXX50E', 'FTSE', 'GDAXI', 'FCHI', 'IBEX', 
    'SSMI', 'BVSP', 'MXX', 'MERV', 'GSPTSE', 'AXJO', 'NZSE50FG'
]
''' 
# List of assets
#all_assets = [
#    'SPY', 'USO', 'GC=F', 'TLT', 'XLU', 'XLK', 'XLY', 'DBC', 'EEM', 'ZROZ', 
#    'IEF', 'XLP', 'XLRE', 'PDBC', 'GSG', 'DBA', 'JJM', 'COPX', 'TIP', 'UUP', 
#    'ITB', 'EFA', 'VWO', 'XLC', 'XLV', 'XLF', 'XLI', 'XLB', 'XLE', 'XLRE', 
#    'XLP', 'XLY', 'XLK', 'XLU', 'FXE', 'FXY', 'FXB', 'FXC', 'FXA', 'FXF', 
#    'CNY', 'CL=F', 'BZ=F', 'NG=F', 'RB=F', 'HO=F', 'GC=F', 'SI=F', 'HG=F', 
#    'PL=F', 'PA=F', 'ALI=F', 'ZC=F', 'ZW=F', 'ZS=F', 'KC=F', 'SB=F', 
#    'CT=F', 'CC=F', 'LB=F', 'LE=F', 'HE=F', 'GF=F'
#]

# Function to download or load data for all assets in parallel
def download_data_parallel(assets):
    df_dict = {}
    failed_tickers = []
    with ThreadPoolExecutor(max_workers=1) as executor:
        future_to_ticker = {executor.submit(get_data, asset): asset for asset in assets}
        for future in as_completed(future_to_ticker):
            asset = future_to_ticker[future]
            try:
                df = future.result()
                if df is not None and 'Adj Close' in df.columns:
                    df_dict[asset] = df['Adj Close']
                else:
                    print(f"Data for {asset} is invalid or missing 'Adj Close', skipping.")
                    failed_tickers.append(asset)
            except Exception as e:
                print(f"Error processing {asset}: {e}")
                failed_tickers.append(asset)
    if failed_tickers:
        print(f"Failed to download data for: {', '.join(failed_tickers)}")
    return df_dict

df_dict = download_data_parallel(all_assets)

# Ensure we have valid data before continuing
if len(df_dict) == 0:
    print("No valid data was downloaded, exiting...")
else:
    # Calculate percentage change for all assets
    pct_change_dict = {}
    for asset, df in df_dict.items():
        if not df.empty:
            pct_change = df.pct_change().dropna()
            if not pct_change.empty:
                pct_change_dict[asset] = pct_change

    # Convert to DataFrame for easier handling
    combined_pct_change_df = pd.DataFrame(pct_change_dict).dropna()

    # Check if the combined DataFrame is still valid
    if combined_pct_change_df.empty:
        print("No valid percentage change data, exiting...")
    else:
        # Save correlation heatmap to a PDF
        with PdfPages(f"{output_folder}/correlation_matrix.pdf") as pdf:
            import scipy.cluster.hierarchy as sch

            def plot_clustered_correlation_heatmap(combined_pct_change_df):
                # Calculate the correlation matrix
                corr_matrix = combined_pct_change_df.corr()
    
                # Perform hierarchical clustering to reorder the correlation matrix
                pairwise_dists = sch.distance.pdist(corr_matrix)  # Get pairwise distances
                linkage = sch.linkage(pairwise_dists, method='complete')  # Perform clustering
                dendro = sch.dendrogram(linkage, no_plot=True)  # Get the reordering from the clustering
    
                # Reorder the correlation matrix according to the clustering
                reordered_corr_matrix = corr_matrix.iloc[dendro['leaves'], dendro['leaves']]
    
                # Plot the reordered correlation heatmap
                plt.figure(figsize=(24, 24))  # Increase figure size for better readability
                sns.heatmap(reordered_corr_matrix, annot=True, cmap='RdBu', center=0, 
                            annot_kws={"size": 10}, fmt='.2f', cbar_kws={"shrink": 0.5}, 
                            linewidths=0.1, square=True)
    
                # Set axis labels and title
                plt.xticks(rotation=45, ha="right", fontsize=14)
                plt.yticks(fontsize=14)
                plt.title('Clustered Correlation Heatmap of Percentage Changes', fontsize=18)
                plt.tight_layout()  # Adjust layout to fit everything
    
                # Save the plot to a PDF
                pdf.savefig()  # Save the heatmap to the PDF
                plt.close()
            def plot_correlation_heatmapWorks(combined_pct_change_df):
                corr_matrix = combined_pct_change_df.corr()
                plt.figure(figsize=(24, 24))  # Adjust the figure size to focus more on block size
                sns.heatmap(corr_matrix, annot=True, cmap='RdBu', center=0, 
                            annot_kws={"size": 8}, fmt='.2f', cbar_kws={"shrink": 0.5}, 
                linewidths=0.5, square=True)  # Add linewidths and make the cells square
                # Increase font sizes for readability
                plt.xticks(rotation=45, ha="right", fontsize=12)  # Increase x-axis font size
                plt.yticks(fontsize=12)  # Increase y-axis font size
                plt.title('Correlation Heatmap of Percentage Changes Across All Assets', fontsize=16)  # Title font size
                plt.tight_layout()  # Adjust layout to fit everything
                pdf.savefig()  # Save the heatmap to the PDF
                plt.close()
            
            #plot_correlation_heatmap(combined_pct_change_df)
            plot_clustered_correlation_heatmap(combined_pct_change_df)
         

        print(f"Correlation matrix saved to {output_folder}/correlation_matrix.pdf")

Loading SPY data from data/2024-11-24/SPY.csv
Loading USO data from data/2024-11-24/USO.csv
Loading GLD data from data/2024-11-24/GLD.csv
Loading TLT data from data/2024-11-24/TLT.csv
Loading XLU data from data/2024-11-24/XLU.csv
Loading XLK data from data/2024-11-24/XLK.csv
Loading XLY data from data/2024-11-24/XLY.csv
Loading DBC data from data/2024-11-24/DBC.csv
Loading EEM data from data/2024-11-24/EEM.csv
Loading ZROZ data from data/2024-11-24/ZROZ.csv
Loading IEF data from data/2024-11-24/IEF.csv
Loading XLP data from data/2024-11-24/XLP.csv
Loading XLRE data from data/2024-11-24/XLRE.csv
Loading PDBC data from data/2024-11-24/PDBC.csv
Loading GSG data from data/2024-11-24/GSG.csv
Loading DBA data from data/2024-11-24/DBA.csv
Loading JJM data from data/2024-11-24/JJM.csv
Loading COPX data from data/2024-11-24/COPX.csv
Loading TIP data from data/2024-11-24/TIP.csv
Loading UUP data from data/2024-11-24/UUP.csv
Loading ITB data from data/2024-11-24/ITB.csv
Loading EFA data from data

[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['TNX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (period=5y)')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['IRX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (period=5y)')


$TNX: possibly delisted; No price data found  (period=5y)
Downloaded data for TNX is empty, skipping.
Downloading IRX data and saving to data/2024-11-24/IRX.csv
Data for TNX is invalid or missing 'Adj Close', skipping.
$IRX: possibly delisted; No price data found  (period=5y)
Downloaded data for IRX is empty, skipping.
Downloading FVX data and saving to data/2024-11-24/FVX.csv
Data for IRX is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['FVX']: YFInvalidPeriodError("%ticker%: Period '5y' is invalid, must be one of ['1d', '5d']")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['TYX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (period=5y)')


Downloaded data for FVX is empty, skipping.
Data for FVX is invalid or missing 'Adj Close', skipping.
Downloading TYX data and saving to data/2024-11-24/TYX.csv
$TYX: possibly delisted; No price data found  (period=5y)
Downloaded data for TYX is empty, skipping.
Downloading DGS10 data and saving to data/2024-11-24/DGS10.csv
Data for TYX is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['DGS10']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for DGS10 is empty, skipping.
Downloading DGS2 data and saving to data/2024-11-24/DGS2.csv
Data for DGS10 is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['DGS2']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for DGS2 is empty, skipping.
Data for DGS2 is invalid or missing 'Adj Close', skipping.
Downloading DGS5 data and saving to data/2024-11-24/DGS5.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['DGS5']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for DGS5 is empty, skipping.
Downloading DGS30 data and saving to data/2024-11-24/DGS30.csv
Data for DGS5 is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['DGS30']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for DGS30 is empty, skipping.
Data for DGS30 is invalid or missing 'Adj Close', skipping.
Downloading FEDFUNDS data and saving to data/2024-11-24/FEDFUNDS.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['FEDFUNDS']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for FEDFUNDS is empty, skipping.
Downloading GDPC1 data and saving to data/2024-11-24/GDPC1.csv
Data for FEDFUNDS is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GDPC1']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for GDPC1 is empty, skipping.
Downloading CPIAUCSL data and saving to data/2024-11-24/CPIAUCSL.csv
Data for GDPC1 is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['CPIAUCSL']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for CPIAUCSL is empty, skipping.
Downloading UMCSENT data and saving to data/2024-11-24/UMCSENT.csv
Data for CPIAUCSL is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['UMCSENT']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for UMCSENT is empty, skipping.
Downloading UNRATE data and saving to data/2024-11-24/UNRATE.csv
Data for UMCSENT is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['UNRATE']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for UNRATE is empty, skipping.
Downloading INDPRO data and saving to data/2024-11-24/INDPRO.csv
Data for UNRATE is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['INDPRO']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for INDPRO is empty, skipping.
Data for INDPRO is invalid or missing 'Adj Close', skipping.
Downloading HOUST data and saving to data/2024-11-24/HOUST.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['HOUST']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for HOUST is empty, skipping.
Downloading PERMIT data and saving to data/2024-11-24/PERMIT.csv
Data for HOUST is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['PERMIT']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for PERMIT is empty, skipping.
Data for PERMIT is invalid or missing 'Adj Close', skipping.
Downloading NAPM data and saving to data/2024-11-24/NAPM.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['NAPM']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for NAPM is empty, skipping.
Downloading NAPMNOI data and saving to data/2024-11-24/NAPMNOI.csv
Data for NAPM is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['NAPMNOI']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for NAPMNOI is empty, skipping.
Data for NAPMNOI is invalid or missing 'Adj Close', skipping.
Downloading DCOILWTICO data and saving to data/2024-11-24/DCOILWTICO.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['DCOILWTICO']: YFChartError('%ticker%: No data found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['VIX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (period=5y)')


Downloaded data for DCOILWTICO is empty, skipping.
Loading XAR data from data/2024-11-24/XAR.csv
Data for DCOILWTICO is invalid or missing 'Adj Close', skipping.
Loading XTN data from data/2024-11-24/XTN.csv
Loading XRT data from data/2024-11-24/XRT.csv
Loading XHB data from data/2024-11-24/XHB.csv
Loading XSD data from data/2024-11-24/XSD.csv
Loading XBI data from data/2024-11-24/XBI.csv
Downloading VIX data and saving to data/2024-11-24/VIX.csv
$VIX: possibly delisted; No price data found  (period=5y)
Downloaded data for VIX is empty, skipping.
Data for VIX is invalid or missing 'Adj Close', skipping.
Loading ^N225 data from data/2024-11-24/^N225.csv
Loading HSI data from data/2024-11-24/HSI.csv
Loading ^STOXX50E data from data/2024-11-24/^STOXX50E.csv
Loading ^FTSE data from data/2024-11-24/^FTSE.csv
Loading ^GDAXI data from data/2024-11-24/^GDAXI.csv
Loading ^FCHI data from data/2024-11-24/^FCHI.csv
Loading ^IBEX data from data/2024-11-24/^IBEX.csv
Downloading SSMI data and saving 

[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['SSMI']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for SSMI is empty, skipping.
Data for SSMI is invalid or missing 'Adj Close', skipping.
Downloading BVSP data and saving to data/2024-11-24/BVSP.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BVSP']: YFChartError('%ticker%: No data found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['MXX']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (period=5y)')


Downloaded data for BVSP is empty, skipping.
Downloading MXX data and saving to data/2024-11-24/MXX.csv
Data for BVSP is invalid or missing 'Adj Close', skipping.
$MXX: possibly delisted; No price data found  (period=5y)
Downloaded data for MXX is empty, skipping.
Data for MXX is invalid or missing 'Adj Close', skipping.
Downloading MERV data and saving to data/2024-11-24/MERV.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['MERV']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for MERV is empty, skipping.
Downloading GSPTSE data and saving to data/2024-11-24/GSPTSE.csv
Data for MERV is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GSPTSE']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for GSPTSE is empty, skipping.
Data for GSPTSE is invalid or missing 'Adj Close', skipping.
Downloading AXJO data and saving to data/2024-11-24/AXJO.csv


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AXJO']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for AXJO is empty, skipping.
Downloading NZSE50FG data and saving to data/2024-11-24/NZSE50FG.csv
Data for AXJO is invalid or missing 'Adj Close', skipping.


[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['NZSE50FG']: YFChartError('%ticker%: No data found, symbol may be delisted')


Downloaded data for NZSE50FG is empty, skipping.
Data for NZSE50FG is invalid or missing 'Adj Close', skipping.
Failed to download data for: TNX, IRX, FVX, TYX, DGS10, DGS2, DGS5, DGS30, FEDFUNDS, GDPC1, CPIAUCSL, UMCSENT, UNRATE, INDPRO, HOUST, PERMIT, NAPM, NAPMNOI, DCOILWTICO, VIX, SSMI, BVSP, MXX, MERV, GSPTSE, AXJO, NZSE50FG
Correlation matrix saved to output/2024-11-24/correlation_matrix.pdf


In [13]:
xlu_tickers = [
    "NEE", # NextEra Energy
    "DUK", # Duke Energy
    "SO",  # Southern Company
    "D",   # Dominion Energy
    "AEP", # American Electric Power
    "EXC", # Exelon
    "PEG", # Public Service Enterprise Group
    "ED",  # Consolidated Edison
    "SRE", # Sempra Energy
    "WEC", # WEC Energy Group
]

In [32]:
xlb_tickers = [
    "LIN",  # Linde plc
    "APD",  # Air Products and Chemicals
    "SHW",  # Sherwin-Williams
    "PPG",  # PPG Industries
    "DOW",  # Dow Inc.
    "LYB",  # LyondellBasell Industries
    "ECL",  # Ecolab
    "FCX",  # Freeport-McMoRan
    "NEM",  # Newmont
    "IFF",  # International Flavors & Fragrances
]

In [5]:
xlre_tickers = [
    "PLD",  # Prologis
    "EQIX", # Equinix
    "AMT",  # American Tower
    "CCI",  # Crown Castle International
    "WELL", # Welltower
    "AVB",  # AvalonBay Communities
    "PSA",  # Public Storage
    "SPG",  # Simon Property Group
    "VTR",  # Ventas
    "WY",   # Weyerhaeuser
]

In [6]:
xli_tickers = [
    "UNP",  # Union Pacific
    "HON",  # Honeywell International
    "RTX",  # Raytheon Technologies
    "BA",   # Boeing
    "CAT",  # Caterpillar
    "GE",   # General Electric
    "MMM",  # 3M
    "CSX",  # CSX Corporation
    "DE",   # Deere & Company
    "LMT",  # Lockheed Martin
]

In [7]:
xlv_tickers = [
    "JNJ",  # Johnson & Johnson
    "UNH",  # UnitedHealth Group
    "LLY",  # Eli Lilly and Company
    "MRK",  # Merck & Co.
    "ABBV", # AbbVie
    "PFE",  # Pfizer
    "TMO",  # Thermo Fisher Scientific
    "ABT",  # Abbott Laboratories
    "BMY",  # Bristol-Myers Squibb
    "MDT",  # Medtronic
]

In [22]:
xlf_tickers = [
    "BRK-B", # Berkshire Hathaway
    "JPM",   # JPMorgan Chase
    "BAC",   # Bank of America
    "WFC",   # Wells Fargo
    "BLK",   # BlackRock
    "MS",    # Morgan Stanley
    "GS",    # Goldman Sachs
    "C",     # Citigroup
    "USB",   # U.S. Bancorp
    "PNC",   # PNC Financial Services
]

In [9]:
xlk_tickers = [
    "AAPL",  # Apple
    "MSFT",  # Microsoft
    "NVDA",  # NVIDIA
    "TSM",   # Taiwan Semiconductor
    "META",  # Meta Platforms (formerly Facebook)
    "AVGO",  # Broadcom
    "ADBE",  # Adobe
    "TXN",   # Texas Instruments
    "CRM",   # Salesforce
    "INTC",  # Intel
]

In [45]:
xle_tickers = [
    "LNG",   # Nat Gas
    "XOM",   # Exxon Mobil
    "CVX",   # Chevron
    "COP",   # ConocoPhillips
    "EOG",   # EOG Resources
    "SLB",   # Schlumberger
    "VLO",   # Valero Energy
    "MPC",   # Marathon Petroleum
    "PSX",   # Phillips 66
    "OXY",   # Occidental Petroleum
    "HAL",   # Halliburton
]

In [11]:
xlp_tickers = [
    "PG",    # Procter & Gamble
    "KO",    # Coca-Cola
    "PEP",   # PepsiCo
    "COST",  # Costco Wholesale
    "WMT",   # Walmart
    "MDLZ",  # Mondelez International
    "PM",    # Philip Morris International
    "K",     # Kellogg
    "CL",    # Colgate-Palmolive
    "MO",    # Altria Group
]

In [12]:
xly_tickers = [
    "AMZN", # Amazon
    "TSLA", # Tesla
    "HD",   # Home Depot
    "MCD",  # McDonald's
    "NKE",  # Nike
    "LOW",  # Lowe's
    "SBUX", # Starbucks
    "CMG",  # Chipotle Mexican Grill
    "TJX",  # TJX Companies
    "BKNG", # Booking Holdings
]

In [13]:
xlc_tickers = [
    "GOOGL", # Alphabet (Google) Class A
    "GOOG",  # Alphabet (Google) Class C
    "META",  # Meta Platforms (formerly Facebook)
    "T",     # AT&T
    "CMCSA", # Comcast
    "VZ",    # Verizon
    "TMUS",  # T-Mobile US
    "NFLX",  # Netflix
    "CHTR",  # Charter Communications
    "DIS",   # Walt Disney
]


In [31]:
sp500_tickers = [
    "AAPL", "MSFT", "AMZN", "GOOGL", "GOOG", "META", "TSLA", "NVDA", "BRK-B", "JPM", 
    "V", "UNH", "XOM", "JNJ", "WMT", "PG", "MA", "LLY", "AVGO", "HD", "CVX", "MRK", 
    "PEP", "KO", "ABBV", "COST", "PFE", "TMO", "MCD", "ACN", "DHR", "DIS", "CSCO", 
    "ADBE", "ABT", "NFLX", "LIN", "CMCSA", "PM", "VZ", "NEE", "TXN", "NKE", "CRM", 
    "WFC", "BMY", "AMGN", "QCOM", "SCHW", "UPS", "HON", "ORCL", "MS", "RTX", "AMT", 
    "GS", "C", "AMD", "COP", "IBM", "MDT", "INTC", "BA", "BLK", "GE", "SBUX", "LMT", 
    "SPGI", "MMM", "ISRG", "CAT", "T", "CVS", "AXP", "DE", "ELV", "USB", "INTU", 
    "CB", "ZTS", "DUK", "TGT", "CL", "NOW", "MO", "GM", "BKNG", "LOW", "PNC", "SYK", 
    "GILD", "PLD", "BDX", "F", "CI", "NSC", "SO", "ADP", "CCI", "ITW", "ADI", "EW", 
    "TFC", "MU", "CME", "REGN", "SHW", "HUM", "EQIX", "APD", "MAR", "WM", "LRCX", 
    "PGR", "AON", "MMC", "PSA", "COF", "CHTR", "AIG", "AFL", "ORLY", "TJX", "MCO", 
    "HCA", "MET", "KLAC", "TRV", "FDX", "MRNA", "LHX", "FISV", "PRU", "EMR", "IDXX", 
    "CTAS", "CSX", "D", "SRE", "OTIS", "DG", "RMD", "ETN", "PSX", "ROP", "KMB", 
    "ECL", "DHI", "NOC", "EXC", "OXY", "STZ", "FCX", "SLB", "CMG", "TDG", "MSCI", 
    "ADM", "PH", "JCI", "BK", "KMI", "GLW", "WBA", "DFS", "BKR", "DOW", "ALL", 
    "VLO", "WMB", "VRSK", "EOG", "SPG", "KHC", "MPC", "STT", "KR", "CTSH", "MCHP", 
    "KDP", "SYY", "GPN", "PPG", "NUE", "CNC", "FTNT", "YUM", "IQV", "HLT", "AWK", 
    "PAYX", "HSY", "SWK", "WELL", "MCK", "FTV", "MTD", "XYL", "AJG", "TT", "PCAR", 
    "HES", "AZO", "SBAC", "ES", "MSI", "DLR", "VICI", "CDNS", "MLM", "ROST", "PXD", 
    "CBRE", "BAX", "BF.B", "KEYS", "ODFL", "RSG", "ZBH", "LEN", "NDAQ", "CARR", 
    "EFX", "LUV", "AVB", "TTWO", "SNPS", "A", "SIVB", "BXP", "NTRS", "HBAN", 
    "BILL", "EXPE", "MTB", "ARE", "STE", "PFG", "QRVO", "DLTR", "J", "AKAM", "DRI", 
    "IP", "BKR", "DXC", "CTRA", "BENE", "ALB", "GRMN", "PPL", "HIG", "SWKS", "TER", 
    "FE", "AMCR", "CE", "HOLX", "HPE", "VRSN", "ZION", "RCL", "PWR", "ESS", "HAS", 
    "IR", "BRO", "GPC", "FMC", "APA", "TYL", "JXN", "GL", "DVA", "PKI", "CTVA", 
    "HII", "L", "SEE", "PENN", "AAL", "NRG", "SBNY", "FOXA", "RE", "WDC", "LYV", 
    "FANG", "MOS", "IPG", "ATO", "EMN", "WY", "RJF", "FFIV", "LNC", "NWSA", "PVH", 
    "FRC", "DOV", "ZBRA", "NVR", "VTRS", "NLOK", "TPX", "O", "NCLH", "AES", "ALGN", 
    "UAL", "ROL", "MRO", "MGM", "PEAK", "WHR", "LKQ", "CAG", "FLS", "UAL", "CPB", 
    "CHRW", "FDS", "WAB", "AOS", "ALK", "WRB", "TAP", "VTR", "RF", "TXT", "PKG", 
    "GNRC", "SIVB", "NWS", "TDY", "CNP", "LW", "LW", "WU", "RKT", "IVZ", "TRGP", 
    "TOL", "CEG", "ATO", "PENN", "FOXA", "VFC"
]


In [32]:
sectors_list_a = [ "XLU", "XLY","XLP","XLRE","VNQ","KIE","XLF","MOO","XLV","XLI","XLC","XLB","IBB","TAN","KRE","ITB","GDX","XLE","PBW","XLK","XME","SMH"] 

In [33]:
current_assets = sp500_tickers

In [16]:
!pip install scipy


Collecting scipy
  Downloading scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 MB[0m [31m410.6 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: scipy
Successfully installed scipy-1.14.1
