In [None]:
#Notebook description:

# this notebook is used to evaluate the market of assets and find potential assets to invest in
# with the best risk/reward characteristics. This notebook is intented to analyze a broad group of market assets
# and does NOT focus on any particular assets. Computing data for a large number of assets is computationally expensive and thus
# the analysis is relegated to other notebooks 

In [None]:
#Load libraries
import logging
logger = logging.getLogger('yfinance')
logger.disabled = True
logger.propagate = False
# Load libraries
import sys
sys.path.append(r"e:\Coding Projects\Investment Analysis")
from Quantapp.Plotter import Plotter
from Quantapp.Computation import Computation
from Quantapp.EconomicData import EconomicData

import numpy as np
import json
import os
import pandas as pd
import yfinance as yf
from statsmodels.tsa.stattools import coint
from IPython.display import display
from plotly.subplots import make_subplots
from datetime import datetime
import statsmodels.api as sm
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
import plotly.graph_objects as go
import pandas as pd
import holidays
import plotly.express as px
import concurrent.futures
from plotly.subplots import make_subplots
import plotly.graph_objects as go

#shut down warnings
import warnings
warnings.filterwarnings("ignore")



qc = Computation()
qp = Plotter()
qe = EconomicData()

                

In [None]:
#parameters
time_frame_week = 7
time_frame_short = 21
time_frame_mid   = 50
time_frame_long = 200
interval = '1d'
period     = '10y'
risk_free_rate = 0.02 / 252  # Annualized risk-free rate divided by trading days
benchmark = 'SPY'
mode='standard'
asset_class = 'broad' # supported: 'broad', 'equity'
sector = 'healthcare' # supported: 'all', 'industrials, 'materials', 'industrials', 'consumer_discretionary', 'consumer_staples', 'healthcare', 'financials', 'information_technology', 'communication_services', 'utilities', 'real_estate'



In [None]:
#Load: retrieve all tickers / prices 
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Load: retrieve all tickers / prices / spreads for major markets
sp500 = yf.download('SPY', period=period, interval=interval,progress=False)
risk_free_rate = yf.download('^IRX', period=period, interval=interval, progress=False)
market_assets = qe.get_market_assets()

if asset_class == 'broad':
    #Load: retrieve prices
    indices_df             = qe.generate_series(market_assets['INDICES'], columns=['Close'],period=period, interval=interval)
    sectors_df            = qe.generate_series(market_assets['SECTORS'], columns=['Close'],period=period, interval=interval)
    industries_df         = qe.generate_series(market_assets['INDUSTRIES'], columns=['Close'],period=period, interval=interval)
    bonds_df = qe.generate_series(market_assets['BONDS'], columns=['Close'],period=period, interval=interval)
    precious_metals_df = qe.generate_series(market_assets['PRECIOUS_METALS'], columns=['Close'],period=period, interval=interval)
   # crypto_df = qe.generate_series(market_assets['CRYPTO'], columns=['Close'],period=period, interval=interval)
     #crypto_df = crypto_df.loc[sp500.index]
    energy_df = qe.generate_series(market_assets['ENERGY'], columns=['Close'],period=period, interval=interval)
    foreign_markets_df = qe.generate_series(market_assets['FOREIGN_MARKETS'], columns=['Close'],period=period, interval=interval)
    primary_sector_etfs_df = qe.generate_series(market_assets['PRIMARY_SECTORS'], columns=['Close'],period=period, interval=interval)
    capitalizations_df = qe.generate_series(market_assets['CAPITALIZATIONS'], columns=['Close'],period=period, interval=interval)
    innovation_df = qe.generate_series(market_assets['INNOVATION'], columns=['Close'],period=period, interval=interval)
    long_leveraged_df = qe.generate_series(market_assets['LONG_LEVERAGE'], columns=['Close'],period=period, interval=interval)
    short_leveraged_df = qe.generate_series(market_assets['SHORT_LEVERAGE'], columns=['Close'],period=period, interval=interval)
    single_factor_df = qe.generate_series(market_assets['SINGLE_FACTOR'], columns=['Close'],period=period, interval=interval)
    multi_factor_df = qe.generate_series(market_assets['MULTI_FACTOR'], columns=['Close'],period=period, interval=interval)
    minimum_volatility_df = qe.generate_series(market_assets['MINIMUM_VOLATILITY'], columns=['Close'],period=period, interval=interval)


    etf_prices = pd.concat([
        indices_df,
        sectors_df,
        industries_df,
        bonds_df,
        precious_metals_df,
        #crypto_df,
        energy_df,
        foreign_markets_df,
        primary_sector_etfs_df,
        capitalizations_df,
        innovation_df,
        long_leveraged_df,
        short_leveraged_df,
        single_factor_df,
        multi_factor_df,
        minimum_volatility_df
    ], axis=1).loc[:, lambda df: ~df.columns.duplicated()]

    #test
    test_prices = pd.concat([indices_df,
                            sectors_df,
                            #crypto_df
                            ], axis=1).loc[:, lambda df: ~df.columns.duplicated()]

    benchmark_series           = etf_prices[benchmark]

    # List of dataframes for week and short time frames
    etf_dataframes = {
        "indices": indices_df, 
        "sectors": sectors_df, 
        "industries": industries_df, 
        "bonds": bonds_df, 
        "precious_metals": precious_metals_df, 
        #"crypto": crypto_df, 
        "energy": energy_df,
        "foreign_markets": foreign_markets_df, 
        "primary_sector_etfs": primary_sector_etfs_df, 
        "capitalizations": capitalizations_df, 
        "innovation": innovation_df,
        "long_leveraged": long_leveraged_df,
        "short_leveraged": short_leveraged_df,
        "single_factor": single_factor_df,
        "multi_factor": multi_factor_df,
        "minimum_volatility": minimum_volatility_df
    }
    '''
    test_dataframes = {
        "indices": indices_df, 
        "sectors": sectors_df,
    #    "crypto" : crypto_df,
    }
    
    #etf_prices = test_prices
    etf_dataframes = test_dataframes
    etf_prices = test_prices
    display(etf_prices.head())
    '''
    print('Computing the correlation matrix...')
    etf_dataframes_correlation_matrices = {key: value.corr() for key, value in etf_dataframes.items()}
elif asset_class == 'equity':
    xlk_holdings_df = qe.generate_series(market_assets['XLK_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xlf_holdings_df = qe.generate_series(market_assets['XLF_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xli_holdings_df = qe.generate_series(market_assets['XLI_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xlb_holdings_df = qe.generate_series(market_assets['XLB_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xlv_holdings_df = qe.generate_series(market_assets['XLV_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xlu_holdings_df = qe.generate_series(market_assets['XLU_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xly_holdings_df = qe.generate_series(market_assets['XLY_HOLDINGS'], columns=['Close'], period=period, interval=interval)    
    xlc_holdings_df = qe.generate_series(market_assets['XLC_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xlp_holdings_df = qe.generate_series(market_assets['XLP_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xle_holdings_df = qe.generate_series(market_assets['XLE_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    xlre_holdings_df = qe.generate_series(market_assets['XLRE_HOLDINGS'], columns=['Close'], period=period, interval=interval)
    indices_df = qe.generate_series(market_assets['INDICES'], columns=['Close'], period=period, interval=interval)
    etf_prices = pd.concat([indices_df], axis=1).loc[:, lambda df: ~df.columns.duplicated()]
    benchmark_series = etf_prices[benchmark]
    etf_dataframes = {
        "indices": indices_df,
        "xlk_holdings": xlk_holdings_df,
        "xlf_holdings": xlf_holdings_df,
        "xli_holdings": xli_holdings_df,
        "xlb_holdings": xlb_holdings_df,
        "xlv_holdings": xlv_holdings_df,
        "xlu_holdings": xlu_holdings_df,
        "xly_holdings": xly_holdings_df,
        "xlc_holdings": xlc_holdings_df,
        "xlp_holdings": xlp_holdings_df,
        "xle_holdings": xle_holdings_df,
        "xlre_holdings": xlre_holdings_df
    }
    # Mapping of sectors to their respective ETF holdings dataframes
    sector_mapping = {
        'energy': ('xle_holdings', xle_holdings_df),
        'materials': ('xlb_holdings', xlb_holdings_df),
        'industrials': ('xli_holdings', xli_holdings_df),
        'consumer_discretionary': ('xly_holdings', xly_holdings_df),
        'consumer_staples': ('xlp_holdings', xlp_holdings_df),
        'healthcare': ('xlv_holdings', xlv_holdings_df),
        'financials': ('xlf_holdings', xlf_holdings_df),
        'information_technology': ('xlk_holdings', xlk_holdings_df),
        'communication_services': ('xlc_holdings', xlc_holdings_df),
        'utilities': ('xlu_holdings', xlu_holdings_df),
        'real_estate': ('xlre_holdings', xlre_holdings_df)
    }
    
    if sector == 'all':
        # Include all sectors by updating the etf_dataframes dictionary
        etf_dataframes.update({key: df for key, df in sector_mapping.values()})
    elif sector in sector_mapping:
        # Include only the specific sector
        key, df = sector_mapping[sector]
        etf_dataframes = {
            "indices": indices_df,
            key: df
        }
        etf_prices = pd.concat([indices_df, df], axis=1).loc[:, lambda df: ~df.columns.duplicated()]
    else:
        supported_values = "'all', " + ", ".join([f"'{s}'" for s in sector_mapping.keys()])
        raise ValueError(f"Unsupported sector. Supported values are {supported_values}.")
    etf_dataframes_correlation_matrices = {key: value.corr() for key, value in etf_dataframes.items()}    
    
else:
    raise ValueError("Unsupported asset class. Supported values are 'broad' and 'equity'.")




In [None]:
#Computations...

#1. return spreads between benchmark and all assets
#2. the sortino ratios for all assets
#3. the spreads between the sortino ratios of all assets and the benchmark

print("Computing spreads between benchmark and all assets...")
#compute the weekly, short, mid, and long term returns for the benchmark
sp500_monthly_returns = qc.calculate_returns(sp500,frequency='monthly')
sp500_weekly_returns = qc.calculate_returns(sp500,frequency='weekly')
sp500_daily_returns = qc.calculate_returns(sp500,frequency='daily')


#etf_dataframes to list
#-----------------------------------------------------------

#the spreads between the benchmark and all assets
#Calculate: spreads


print("Computing spreads between benchmark and all assets...")
# Create and concatenate spreads for the weekly time frame
benchmark_minus_etf_week = qc.create_and_concat_spreads(
    list(etf_dataframes.values()), benchmark_series, time_frame=time_frame_week, mode=mode
)

print(f"Computing spreads between benchmark and all assets for the short time frame ({time_frame_short} days)...")
# Create and concatenate spreads for the short time frame
benchmark_minus_etf_short = qc.create_and_concat_spreads(
    list(etf_dataframes.values()), benchmark_series, time_frame=time_frame_short, mode=mode
)

print(f"Computing spreads between benchmark and all assets for the mid time frame ({time_frame_mid} days)...")
benchmark_minus_etf_mid = qc.create_and_concat_spreads(
    list(etf_dataframes.values()), benchmark_series, time_frame=time_frame_mid, mode=mode
)

print(f"Computing spreads between benchmark and all assets for the long time frame ({time_frame_long} days)...")
benchmark_minus_etf_long = qc.create_and_concat_spreads(
    list(etf_dataframes.values()), benchmark_series, time_frame=time_frame_long, mode=mode
)
print(" ")
print("-----------------------------------------------------------------------")


#-----------------------------------------------------------

#the sortino ratios for all assets
print(f"computing the rolling sortino ratios for all assets for the short time frame ({time_frame_short} days)...")
rolling_sortino_ratios_etf_21 = qc.compute_rolling_sortino_ratios(etf_prices, n=21)

print(f"computing the rolling sortino ratios for all assets for the mid time frame ({time_frame_mid} days)...")
rolling_sortino_ratios_etf_50 = qc.compute_rolling_sortino_ratios(etf_prices, n=50)

print(f"computing the rolling sortino ratios for all assets for the long time frame ({time_frame_long} days)...")
rolling_sortino_ratios_etf_200 = qc.compute_rolling_sortino_ratios(etf_prices, n=200)

print(" ")
print("-----------------------------------------------------------------------")

#the spreads between the sortino ratios of all assets and the benchmark
print(f"computing the rolling sortino ratios for all assets minus the benchmark for the short time frame ({time_frame_short} days)...")
rolling_sortino_ratios_benchmark_minus_etf_21  = qc.compute_rolling_sortino_ratios_benchmark_minus_asset(etf_prices,'SPY', n=21)

print(f"computing the rolling sortino ratios for all assets minus the benchmark for the mid time frame ({time_frame_mid} days)...")
rolling_sortino_ratios_benchmark_minus_etf_50  = qc.compute_rolling_sortino_ratios_benchmark_minus_asset(etf_prices,'SPY', n=50)

print(f"computing the rolling sortino ratios for all assets minus the benchmark for the long time frame ({time_frame_long} days)...")
rolling_sortino_ratios_benchmark_minus_etf_200  = qc.compute_rolling_sortino_ratios_benchmark_minus_asset(etf_prices,'SPY', n=200)

print(" ")
print("-----------------------------------------------------------------------")

print("Computing pairwise spreads between assets within each category...")
print(f"Computing pairwise spreads for the short time frame ({time_frame_short} days)...")
pairwise_spreads_21 = qc.create_pairwise_spreads(etf_dataframes, window=time_frame_short)

print(f"Computing pairwise spreads for the mid time frame ({time_frame_mid} days)...")
pairwise_spreads_50 = qc.create_pairwise_spreads(etf_dataframes, window=time_frame_mid)

print(f"Computing pairwise spreads for the long time frame ({time_frame_long} days)...")
pairwise_spreads_200 = qc.create_pairwise_spreads(etf_dataframes, window=time_frame_long)


In [None]:
# Create DataFrames for each time frame
z_score_21 = pd.DataFrame()
z_score_50 = pd.DataFrame()
z_score_200 = pd.DataFrame()

# Calculate z-scores for 21-day time frame
z_score_sortino_ratio_21 = qc.calculate_z_scores(rolling_sortino_ratios_etf_21)
z_score_benchmark_minus_etf_21 = qc.calculate_z_scores(rolling_sortino_ratios_benchmark_minus_etf_21)

z_score_21['21 day Sortino Ratio (z score)'] = z_score_sortino_ratio_21
z_score_21['21 day Benchmark Minus ETF Sortino Ratio (z score)'] = z_score_benchmark_minus_etf_21
z_score_21.sort_values(by='21 day Sortino Ratio (z score)', ascending=True, inplace=True)

# Calculate z-scores for 50-day time frame
z_score_sortino_ratio_50 = qc.calculate_z_scores(rolling_sortino_ratios_etf_50)
z_score_benchmark_minus_etf_50 = qc.calculate_z_scores(rolling_sortino_ratios_benchmark_minus_etf_50)

z_score_50['50 day Sortino Ratio (z score)'] = z_score_sortino_ratio_50
z_score_50['50 day Benchmark Minus ETF Sortino Ratio (z score)'] = z_score_benchmark_minus_etf_50
z_score_50.sort_values(by='50 day Sortino Ratio (z score)', ascending=True, inplace=True)

# Calculate z-scores for 200-day time frame
z_score_200['200 day Sortino Ratio (z score)'] = qc.calculate_z_scores(rolling_sortino_ratios_etf_200)
z_score_200['200 day Benchmark Minus ETF Sortino Ratio (z score)'] = qc.calculate_z_scores(rolling_sortino_ratios_benchmark_minus_etf_200)
z_score_200.sort_values(by='200 day Sortino Ratio (z score)', ascending=True, inplace=True)

# Round decimal values
z_score_21 = z_score_21.round(2)
z_score_50 = z_score_50.round(2)
z_score_200 = z_score_200.round(2)

# Combine all time frames into one DataFrame
z_score_combined = pd.concat([z_score_21, z_score_50, z_score_200], axis=1)

# Sort columns by time frame and metric
z_score_combined = z_score_combined.reindex(sorted(z_score_combined.columns, key=lambda x: (x.split()[0], x.split()[2])), axis=1)

# Plot the combined z-scores
qp.plot_z_score_combined(z_score_combined)


In [None]:
#plot pairwise spreads

#-----------------------------------------------------------
# Plot the pairwise spreads for each category and time frame
# Create interactive plots for pairwise spreads by category and time frame
# Combine all pairwise spreads into a single dictionary by time frame
pairwise_spreads_by_timeframe = {
    'short': pairwise_spreads_21,
    'mid': pairwise_spreads_50,
    'long': pairwise_spreads_200
}

# Define time frames for display
time_frames_mapping = {
    'short': time_frame_short,  # 21
    'mid': time_frame_mid,      # 50
    'long': time_frame_long     # 200
}

# Create a single figure with time frame and category dropdowns
print("Creating interactive pairwise spreads plot with time frame and category selection...")
fig_combined = qp.plot_pairwise_spreads(pairwise_spreads_by_timeframe, "Pairwise Return Spreads", time_frames_mapping)
fig_combined.show()

In [None]:
# Usage:
qp.plot_etf_correlation_cointegration(etf_dataframes) 
print("Analysis complete.")

In [None]:
qp.plot_prices_and_returns(etf_dataframes)

In [None]:

qp.plot_diff_from_average(etf_dataframes)