In [None]:
#Parameters
#Retrieve Asset Information
sp500_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
dow_url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
nasdaq_url = 'https://en.wikipedia.org/wiki/NASDAQ-100'


time_frame_week = 7
time_frame_short = 21
time_frame_mid   = 50
time_frame_long = 200
period     = '10y'

risk_free_rate = 0.02 / 252  # Annualized risk-free rate divided by trading days
benchmark = 'SPY'


In [2]:
#Methods & Classes
#Define Parameters

import numpy as np
import pandas as pd

import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from IPython.display import display

import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import plotly.graph_objects as go
import yfinance as yf
'''from Quantapp.Computation import Computation
from Quantapp.Algorithm   import Algorithm
comp = Computation()
algorithm = Algorithm()'''
#plt.rcParams["figure.figsize"] = (20, 7)


def calculate_sortino_ratio(returns):
    excess_returns = returns - risk_free_rate
    downside_deviation = excess_returns[excess_returns < 0].std()
    sortino_ratio = excess_returns.mean() / downside_deviation if downside_deviation != 0 else np.nan
    return sortino_ratio

def calculate_risk_adjusted_returns(df, time_frame):
    daily_returns = df.pct_change()
    rolling_sortino_ratio = daily_returns.rolling(window=time_frame).apply(calculate_sortino_ratio)
    return rolling_sortino_ratio

def generate_series(tickers):
    tickers = [ticker.replace('.', '-') for ticker in tickers]
    try:
        df = yf.download(tickers, period=period)['Close']
    except Exception as e:
        print(f"An error occurred while fetching data: {e}")
        return pd.DataFrame()  
    df.columns = [col.replace('-', '.') for col in df.columns]
    return df


def plot_returns(returns, time_frame):
    threshold = 0
    fig = px.line(returns , x=returns.index, y=returns.columns)
    fig.add_hline(y=threshold, line_dash="dash", line_color="red", annotation_text=f"y={threshold}")
    fig.add_hline(y=threshold, line_dash="dash", line_color="red", annotation_text=f"y={threshold}")
    fig.show()

def create_spreads(asset_series, benchmark_series, time_frame, mode='standard'):
    
    if mode == 'standard':
        asset_returns = asset_series.pct_change(time_frame)
        benchmark_returns= benchmark_series.pct_change(time_frame)
    elif mode == 'sortino':
        asset_returns = calculate_risk_adjusted_returns(asset_series, time_frame)
        benchmark_returns= calculate_risk_adjusted_returns(benchmark_series, time_frame)

    benchmark_minus_asset = asset_returns.apply(lambda x: benchmark_returns - x)
    benchmark_minus_asset.columns = ["Benchmark" + "_minus_" + col for col in benchmark_minus_asset.columns]
    return benchmark_minus_asset    
'''
def create_spreads(asset_series, benchmark_series, time_frame, mode='standard'):
    asset_series = asset_series.ffill().dropna()
    benchmark_series = benchmark_series.ffill().dropna()
    if mode == 'standard':
        asset_returns = asset_series.pct_change(time_frame)
        benchmark_returns= benchmark_series.pct_change(time_frame)
    elif mode == 'sortino':
        asset_returns = calculate_risk_adjusted_returns(asset_series, time_frame)
        benchmark_returns= calculate_risk_adjusted_returns(benchmark_series, time_frame) 

    benchmark_returnsn

    print(benchmark_minus_asset)
    benchmark_minus_asset.columns = ["Benchmark" + "_minus_" + col for col in benchmark_minus_asset.columns]
    return benchmark_minus_asset
'''
def create_spread_plot(asset_spreads):
    spread_threshold = 0
    spread           = asset_spreads
    mean             = spread[spread>=0].mean()
    std_dev = spread[spread >= 0].std()
    #spread = asset_spreads[:200]


    fig = px.line(spread)
    fig.update_layout(title=asset_spreads.name)
    fig.add_hline(y=spread_threshold, line_dash="dash", line_color="red", annotation_text=f"y={spread_threshold}")
    fig.add_hline(y=mean , line_color="red", annotation_text="mean")
    fig.add_hline(y=mean + std_dev, line_dash="dash", line_color="blue", 
                  annotation_text="mean + 1 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean - std_dev, line_dash="dash", line_color="blue", 
                  annotation_text="mean - 1 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean + 2*std_dev, line_dash="dot", line_color="green", 
                  annotation_text="mean + 2 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean - 2*std_dev, line_dash="dot", line_color="green", 
                  annotation_text="mean - 2 std dev", annotation_position="bottom right")
    fig.add_shape(type="rect",
                  xref="paper", yref="y",
                  x0=0, y0=mean, x1=1, y1=spread.max(),
                  fillcolor="green", opacity=0.2, line_width=0)
    #fig.update_layout(height=800)
    return fig

def create_side_by_side_subplots(fig1, fig2):
    fig = make_subplots(rows=1, cols=2, subplot_titles=(fig1.layout.title.text, fig2.layout.title.text))
    
    for trace in fig1.data:
        fig.add_trace(trace,row=1,col=1)

    for trace in fig2.data:
        fig.add_trace(trace,row=1,col=2)
    
    return fig

def plot_multiple_spreads(assets):
    for column in assets:
       asset_spreads = assets[column]
       create_spread_plot(asset_spreads).show()

def plot_risk_adjusted_returns(series,time_frame):
    series_adjusted_returns = calculate_risk_adjusted_returns(series,time_frame)
    negative_returns = series_adjusted_returns[series_adjusted_returns<0]
    mean = negative_returns.mean()
    standard_deviation = negative_returns.std()
    standard_deviation_level_three_fourths = mean - .5 * standard_deviation
    standard_deviation_level_single        = mean - standard_deviation

    fig = px.line(series_adjusted_returns)

    fig.add_hline(y=0, line_dash="dash", line_color="black", 
                annotation_text="Zero Line", annotation_position="bottom right")
    fig.add_hline(y=mean, line_dash="dot", line_color="blue", 
                annotation_text=f"Mean of negative returns: {mean:.2f}", annotation_position="top right")
    fig.add_hline(y=standard_deviation_level_three_fourths , line_dash="dashdot", line_color="red", 
                annotation_text=f".75 Std Dev: {standard_deviation_level_three_fourths :.2f}", annotation_position="top right")
    fig.add_hline(y=standard_deviation_level_single, line_dash="dashdot", line_color="red", 
                annotation_text=f"1 Std Dev: {standard_deviation_level_single:.2f}", annotation_position="top right")

    fig.add_shape(
        type="rect",
        x0=series_adjusted_returns.index.min(),
        x1=series_adjusted_returns.index.max(),
        y0=standard_deviation_level_three_fourths,
        y1=standard_deviation_level_single,
        fillcolor="green",
        opacity=0.2,
        line_width=0,
    )

    return fig
def filter_assets_by_positive_spread_std(asset_spreads):
    spreads = asset_spreads
    positive_spreads = spreads[spreads >= 0] 
    
    mean = positive_spreads.mean()
    std_dev = positive_spreads.std()

    latest_spread = spreads.iloc[-1]
    threshold = mean + std_dev

    return latest_spread>=threshold


def filter_assets_below_negative_std(asset_spreads):
    if not isinstance(asset_spreads, pd.Series):
        raise TypeError("asset_spreads must be a pandas Series")

    negative_spreads = asset_spreads[asset_spreads < 0]
    if negative_spreads.empty:
        return pd.Series(dtype=bool)  
    
    mean_negative = negative_spreads.mean()
    std_dev_negative = negative_spreads.std()

    threshold_negative = mean_negative - 0.75 * std_dev_negative
    return asset_spreads < threshold_negative

def get_sector_info(ticker):
    try:
        stock = yf.Ticker(ticker)
        sector = stock.info.get('sector', 'N/A')
        sub_industry = stock.info.get('industry', 'N/A')
        return {'Ticker': ticker, 'Sector': sector, 'Sub-Industry': sub_industry}
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return {'Ticker': ticker, 'Sector': 'N/A', 'Sub-Industry': 'N/A'}
    

'''
def get_market_caps(table):
    tickers = table['Symbol'].tolist()
    tickers = ['BRK-B'if symbol == 'BRK.B' else symbol for symbol in tickers]
    tickers = ['BF-B'if symbol == 'BF.B' else symbol for symbol in tickers]

    sectors = []
    sub_industry = []
    market_cap = []

    for ticker in tickers:

        info = get_sector_info(ticker)
        sectors.append(info['Sector'])
        sub_industry.append(info['Sub-Industry'])
        market_cap.append(yf.Ticker(ticker).info.get('marketCap'))
    
    table['Sector'] = sectors
    table['Sub-Industry'] = sub_industry
    table['Market Cap'] = market_cap

    return table
'''

def get_market_caps(table):
    print("Starting market cap retrieval process...")
    
    tickers = table['Symbol'].tolist()
    print(f"Original tickers: {tickers[:10]}...")  # Print first 10 for brevity

    tickers = ['BRK-B' if symbol == 'BRK.B' else symbol for symbol in tickers]
    tickers = ['BF-B' if symbol == 'BF.B' else symbol for symbol in tickers]
    print(f"Adjusted tickers: {tickers[:10]}...")  # Print first 10 for brevity

    sectors = []
    sub_industry = []
    market_cap = []

    for ticker in tickers:
        print(f"Processing ticker: {ticker}")
        
        info = get_sector_info(ticker)
        sectors.append(info['Sector'])
        sub_industry.append(info['Sub-Industry'])
        market_cap.append(yf.Ticker(ticker).info.get('marketCap'))

        print(f"Retrieved info for {ticker}: Sector - {info['Sector']}, Sub-Industry - {info['Sub-Industry']}, Market Cap - {market_cap[-1]}")

    table['Sector'] = sectors
    table['Sub-Industry'] = sub_industry
    table['Market Cap'] = market_cap
    
    print("Market cap retrieval process completed.")
    return table
def plot_market_caps(info):
    market_caps = pd.DataFrame(info[['Symbol','Market Cap']])
    #market_caps = market_caps.sort_values(by='Market Cap',ascending=False)
    market_caps = market_caps.sort_values(by='Market Cap')
    market_caps['Log Market Cap'] = np.log(market_caps['Market Cap'])
    percentiles = np.percentile(market_caps['Log Market Cap'], [60, 90])

    def categorize(market_cap):
        if market_cap <= percentiles[0]:
            return 'Small-Cap'
        elif market_cap <= percentiles[1]:
            return 'Mid-Cap'
        else:
            return 'Large-Cap'

    market_caps['category'] = market_caps['Log Market Cap'].apply(categorize)
    value_counts = market_caps['category'].value_counts()
    fig = px.bar(market_caps, x='Symbol', y='Market Cap', 
                labels={'Ticker': 'Symbol', 'Market Cap': 'Market Cap (Billions USD)'},
                title='Market Capitalizations Companies')

    # Customize layout if needed
    fig.update_layout(
        xaxis_title="Ticker",
        yaxis_title="Market Cap (Billions USD)"
    )
    mid_to_large= len(market_caps) - market_caps.tail(value_counts['Large-Cap']).count()['Symbol']+ 2.5
    small_to_mid= market_caps.tail(value_counts['Small-Cap']).count()['Symbol'] - .5
    market_caps[market_caps['category'] == 'Mid-Cap']

    # Add vertical lines to separate the caps
    fig.add_vline(x=small_to_mid, line=dict(color="Red", width=2, dash="dashdot"), annotation_text="Small to Mid", annotation_position="top left")
    fig.add_vline(x=mid_to_large, line=dict(color="Blue", width=2, dash="dashdot"), annotation_text="Mid to Large", annotation_position="top left")

    fig.show()


In [None]:
#Load: retrieve all tickers


sp500_table = pd.read_html(sp500_url)[0]
qqq_table = pd.read_html(nasdaq_url)[4]
dia_table = pd.read_html(dow_url)[1]

sp500_table = sp500_table[['Symbol', 'GICS Sector', 'GICS Sub-Industry']]
sp500_table = sp500_table.rename(columns={'GICS Sector' : 'Sector', "GICS Sub-Industry": 'Sub-Industry'})

qqq_table = qqq_table[['Ticker', 'GICS Sector', 'GICS Sub-Industry']]
qqq_table = qqq_table.rename(columns={'Ticker' : 'Symbol','GICS Sector' : 'Sector', "GICS Sub-Industry": 'Sub-Industry'})
qqq_table = pd.merge(qqq_table, sp500_table[['Symbol', 'Sub-Industry']], on='Symbol', how='left')
qqq_table['Sub-Industry'] = qqq_table['Sub-Industry_x'].combine_first(qqq_table['Sub-Industry_y'])
qqq_table = qqq_table.drop(columns=['Sub-Industry_x', 'Sub-Industry_y'])

tables = pd.read_html(dow_url)
dia_table = tables[1]
dia_table = dia_table[['Symbol', 'Industry']]
dia_table = pd.merge(dia_table, sp500_table[['Symbol', 'Sector']], on='Symbol', how='left')
dia_table = pd.merge(dia_table, sp500_table[['Symbol', 'Sub-Industry']], on='Symbol', how='left')
dia_table = dia_table.drop(columns=['Industry'])

xlk_table = sp500_table[sp500_table['Sector'] == 'Information Technology']
xlf_table = sp500_table[sp500_table['Sector'] == 'Financials']
xlv_table = sp500_table[sp500_table['Sector'] == 'Health Care']
xli_table = sp500_table[sp500_table['Sector'] == 'Industrials']
xly_table = sp500_table[sp500_table['Sector'] == 'Consumer Discretionary']
xle_table = sp500_table[sp500_table['Sector'] == 'Energy']
xlb_table = sp500_table[sp500_table['Sector'] == 'Materials']
xlc_table = sp500_table[sp500_table['Sector'] == 'Communication Services']
xlre_table = sp500_table[sp500_table['Sector'] == 'Real Estate']
xlc_table = sp500_table[sp500_table['Sector'] == 'Communication Services']
xlp_table = sp500_table[sp500_table['Sector'] == 'Consumer Staples']
xlu_table = sp500_table[sp500_table['Sector'] == 'Utilities']

INDICES          = ['SPY','QQQ','DIA','IWM']
SECTORS          = ['SPY','XLF','XLK','XLV','XLC','XLI','XLU','XLB','VNQ','XLP','XLY','XBI','XLE']
INDUSTRIES       = ['SPY', 'SMH', 'KRE','KIE', 'KBE']
SPY_HOLDINGS     = sp500_table['Symbol'].tolist()
QQQ_HOLDINGS     = qqq_table['Symbol'].tolist()
DIA_HOLDINGS     = dia_table['Symbol'].tolist()
XLK_HOLDINGS     = xlk_table['Symbol'].tolist()
XLF_HOLDINGS     = xlf_table['Symbol'].tolist()
XLI_HOLDINGS     = xli_table['Symbol'].tolist()
XLV_HOLDINGS     = xlv_table['Symbol'].tolist()
XLU_HOLDINGS     = xlu_table['Symbol'].tolist()
XLF_HOLDINGS     = xlf_table['Symbol'].tolist()
XLB_HOLDINGS     = xlb_table['Symbol'].tolist()
XLY_HOLDINGS     = xly_table['Symbol'].tolist()
XLRE_HOLDINGS    = xlre_table['Symbol'].tolist()
XLC_HOLDINGS     = xlc_table['Symbol'].tolist()
XLE_HOLDINGS     = xle_table['Symbol'].tolist()
XLP_HOLDINGS     = xlp_table['Symbol'].tolist()
BONDS            = ['AGG','IEF','TLT', 'HYG','LQD','TIPS', 'BKLN']
PRECIOUS_METALS  = ['GLD','SLV','GDX','XME']
CRYPTO           = ['GBTC','BLOK']
ENERGY           = ['USO','UNG','OIH','XOP','TAN','ICLN','URA','URNM','GUSH','KOLD']
CAPITALIZATIONS  = ['SPY', 'IJH' , 'IJR']
INNOVATION       = ['ARKG','ARKF','ARKK']
LONG_LEVERAGE    = ['TQQQ','SOXL','SPXL','TNA','BOIL','NUGT','ERX','DPST']
SHORT_LEVERAGE   = ['SQQQ','SPXS','UDOW','SSO','TECL','FAS','NVDA','TQQQ', 'VXX','UVXY','VIXY','UVIX','SVXY','SOXS','TZA','USD','TSLL','LABU','DPST','NUGT','CONL']
FOREIGN_MARKETS  = ['EWZ','EWJ','EWA','EWG','EWW','EEM','EFA','FEZ','INDA','EWU','EWG']




In [None]:
#Load: retrieve market caps



dow_info    = get_market_caps(qqq_table)
nasdaq_info = get_market_caps(qqq_table)
sp500_info  = get_market_caps(sp500_table)

xlk_info    = sp500_info[ sp500_info['Sector'] == 'Technology']
xlf_info    = sp500_info[sp500_info['Sector'] == 'Financial Services']
xli_info    = sp500_info[ sp500_info['Sector'] == 'Industrials']
xlv_info    = sp500_info[sp500_info['Sector'] == 'Healthcare']
xlu_info    = sp500_info[ sp500_info['Sector'] == 'Utilities']
xlb_info    = sp500_info[sp500_info['Sector'] == 'Basic Materials']
xly_info    = sp500_info[ sp500_info['Sector'] == 'Consumer Cyclical']
xlc_info    = sp500_info[sp500_info['Sector'] == 'Communication Services']
xle_info    = sp500_info[ sp500_info['Sector'] == 'Energy']
xlre_info    = sp500_info[ sp500_info['Sector'] == 'Real Estate']
xlp_info    = sp500_info[ sp500_info['Sector'] == 'Consumer Defensive']

In [None]:
#Load: retrieve prices
'''
indices_df             = generate_series(INDICES)
sectors_df            = generate_series(SECTORS)
industries_df         = generate_series(INDUSTRIES)
qqq_holdings_df        = generate_series(QQQ_HOLDINGS)
dia_holdings_df        = generate_series(DIA_HOLDINGS)
xlk_holdings_df       = generate_series(XLK_HOLDINGS)
xlf_holdings_df      = generate_series(XLF_HOLDINGS)
xli_holdings_df        = generate_series(XLI_HOLDINGS)
xlv_holdings_df       = generate_series(XLV_HOLDINGS)
xlu_holdings_df        = generate_series(XLU_HOLDINGS)
xlb_holdings_df        = generate_series(XLB_HOLDINGS)
xly_holdings_df        = generate_series(XLY_HOLDINGS)
xlc_holdings_df       = generate_series(XLC_HOLDINGS)
xle_holdings_df        = generate_series(XLE_HOLDINGS)
xlre_holdings_df       = generate_series(XLRE_HOLDINGS)
xlp_holdings_df        = generate_series(XLP_HOLDINGS)


all_series = pd.concat([
    indices_df,
    sectors_df,
    industries_df,
    qqq_holdings_df,
    dia_holdings_df,
    xlk_holdings_df,
    xlf_holdings_df,
    xli_holdings_df,
    xlv_holdings_df,
    xlu_holdings_df,
    xlb_holdings_df,
    xly_holdings_df,
    xlc_holdings_df,
    xle_holdings_df,
    xlre_holdings_df,
    xlp_holdings_df,
], axis=1)

benchmark_series           = all_series[benchmark]
benchmark_series=benchmark_series.loc[:, ~benchmark_series.columns.duplicated()]

all_series = pd.concat([
    all_series,
    benchmark_series,
], axis=1)

all_series=all_series.loc[:, ~all_series.columns.duplicated()]
all_series

'''

indices_df             = generate_series(INDICES)
sectors_df            = generate_series(SECTORS)
industries_df         = generate_series(INDUSTRIES)
qqq_holdings_df        = generate_series(QQQ_HOLDINGS)
dia_holdings_df        = generate_series(DIA_HOLDINGS)
xlk_holdings_df       = generate_series(XLK_HOLDINGS)
xlf_holdings_df      = generate_series(XLF_HOLDINGS)
xli_holdings_df        = generate_series(XLI_HOLDINGS)
xlv_holdings_df       = generate_series(XLV_HOLDINGS)
xlu_holdings_df        = generate_series(XLU_HOLDINGS)
xlb_holdings_df        = generate_series(XLB_HOLDINGS)
xly_holdings_df        = generate_series(XLY_HOLDINGS)
xlc_holdings_df       = generate_series(XLC_HOLDINGS)
xle_holdings_df        = generate_series(XLE_HOLDINGS)
xlre_holdings_df       = generate_series(XLRE_HOLDINGS)
xlp_holdings_df        = generate_series(XLP_HOLDINGS)


all_series = pd.concat([
    indices_df,
    sectors_df,
    industries_df,
    qqq_holdings_df,
    dia_holdings_df,
    xlk_holdings_df,
    xlf_holdings_df,
    xli_holdings_df,
    xlv_holdings_df,
    xlu_holdings_df,
    xlb_holdings_df,
    xly_holdings_df,
    xlc_holdings_df,
    xle_holdings_df,
    xlre_holdings_df,
    xlp_holdings_df,
], axis=1)

benchmark_series           = all_series[benchmark]
benchmark_series=benchmark_series.loc[:, ~benchmark_series.columns.duplicated()]

all_series = pd.concat([
    all_series,
    benchmark_series
], axis=1)

benchmark_series = pd.Series(benchmark_series['SPY'])
all_series=all_series.loc[:, ~all_series.columns.duplicated()]


In [None]:
#Calculate: spreads
mode='standard'

benchmark_minus_indices_week          = create_spreads(indices_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_sectors_week          = create_spreads(sectors_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_industries_week       = create_spreads(industries_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_dia_holdings_week       = create_spreads(dia_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_qqq_holdings_week       = create_spreads(qqq_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlk_holdings_week       = create_spreads(xlk_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlf_holdings_week       = create_spreads(xlf_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xli_holdings_week       = create_spreads(xli_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlv_holdings_week       = create_spreads(xlv_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlu_holdings_week       = create_spreads(xlu_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlb_holdings_week       = create_spreads(xlb_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xly_holdings_week       = create_spreads(xly_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlc_holdings_week       = create_spreads(xlc_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xle_holdings_week       = create_spreads(xle_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlre_holdings_week       = create_spreads(xlre_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlp_holdings_week       = create_spreads(xlp_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)


benchmark_minus_all_series_week= pd.concat([
    benchmark_minus_indices_week,
    benchmark_minus_sectors_week,
    benchmark_minus_industries_week,
    benchmark_minus_dia_holdings_week,
    benchmark_minus_xlk_holdings_week,
    benchmark_minus_xlf_holdings_week,
    benchmark_minus_xli_holdings_week,
    benchmark_minus_xlv_holdings_week,
    benchmark_minus_xlu_holdings_week,
    benchmark_minus_xlb_holdings_week,
    benchmark_minus_xly_holdings_week,
    benchmark_minus_xlc_holdings_week,
    benchmark_minus_xle_holdings_week,
    benchmark_minus_xlre_holdings_week,
    benchmark_minus_xlp_holdings_week
], axis=1)

benchmark_minus_all_series_week=benchmark_minus_all_series_week.loc[:, ~benchmark_minus_all_series_week.columns.duplicated()]


benchmark_minus_indices_short          = create_spreads(indices_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_sectors_short          = create_spreads(sectors_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_industries_short       = create_spreads(industries_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_dia_holdings_short       = create_spreads(dia_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_qqq_holdings_short       = create_spreads(qqq_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlk_holdings_short       = create_spreads(xlk_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlf_holdings_short       = create_spreads(xlf_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xli_holdings_short       = create_spreads(xli_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlv_holdings_short       = create_spreads(xlv_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlu_holdings_short       = create_spreads(xlu_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlb_holdings_short       = create_spreads(xlb_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xly_holdings_short       = create_spreads(xly_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlc_holdings_short       = create_spreads(xlc_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xle_holdings_short       = create_spreads(xle_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlre_holdings_short       = create_spreads(xlre_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlp_holdings_short       = create_spreads(xlp_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)




benchmark_minus_all_series_short= pd.concat([
    benchmark_minus_indices_short,
    benchmark_minus_sectors_short,
    benchmark_minus_industries_short,
    benchmark_minus_dia_holdings_short,
    benchmark_minus_xlk_holdings_short,
    benchmark_minus_xlf_holdings_short,
    benchmark_minus_xli_holdings_short,
    benchmark_minus_xlv_holdings_short,
    benchmark_minus_xlu_holdings_short,
    benchmark_minus_xlb_holdings_short,
    benchmark_minus_xly_holdings_short,
    benchmark_minus_xlc_holdings_short,
    benchmark_minus_xle_holdings_short,
    benchmark_minus_xlre_holdings_short,
    benchmark_minus_xlp_holdings_short
], axis=1)

benchmark_minus_all_series_short=benchmark_minus_all_series_short.loc[:, ~benchmark_minus_all_series_short.columns.duplicated()]

benchmark_minus_indices_mid          = create_spreads(indices_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_sectors_mid          = create_spreads(sectors_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_industries_mid       = create_spreads(industries_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_dia_holdings_mid       = create_spreads(dia_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_qqq_holdings_mid       = create_spreads(qqq_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlk_holdings_mid       = create_spreads(xlk_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlf_holdings_mid       = create_spreads(xlf_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xli_holdings_mid       = create_spreads(xli_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlv_holdings_mid       = create_spreads(xlv_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlu_holdings_mid       = create_spreads(xlu_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlb_holdings_mid       = create_spreads(xlb_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xly_holdings_mid       = create_spreads(xly_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlc_holdings_mid       = create_spreads(xlc_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xle_holdings_mid       = create_spreads(xle_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlre_holdings_mid       = create_spreads(xlre_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlp_holdings_mid      = create_spreads(xlp_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)


benchmark_minus_all_series_mid= pd.concat([
    benchmark_minus_indices_mid,
    benchmark_minus_sectors_mid,
    benchmark_minus_industries_mid,
    benchmark_minus_dia_holdings_mid,
    benchmark_minus_xlk_holdings_mid,
    benchmark_minus_xlf_holdings_mid,
    benchmark_minus_xli_holdings_mid,
    benchmark_minus_xlv_holdings_mid,
    benchmark_minus_xlu_holdings_mid,
    benchmark_minus_xlb_holdings_mid,
    benchmark_minus_xly_holdings_mid,
    benchmark_minus_xlc_holdings_mid,
    benchmark_minus_xle_holdings_mid,
    benchmark_minus_xlre_holdings_mid,
    benchmark_minus_xlp_holdings_mid
], axis=1)

benchmark_minus_all_series_mid =benchmark_minus_all_series_mid.loc[:, ~benchmark_minus_all_series_mid.columns.duplicated()]



In [None]:
#Graph: Market Caps
plot_market_caps(sp500_info)
plot_market_caps(nasdaq_info)
plot_market_caps(dow_info)
plot_market_caps(xlk_info)
plot_market_caps(xlf_info)
plot_market_caps(xli_info)
plot_market_caps(xlv_info)
plot_market_caps(xlu_info)
plot_market_caps(xlp_info)
plot_market_caps(xlc_info)
plot_market_caps(xlb_info)
plot_market_caps(xlre_info)
plot_market_caps(xle_info)

In [None]:
#Graph: Find Oversold Investments relative to Benchmark
filtered_assets = filter_assets_by_positive_spread_std(benchmark_minus_all_series_mid)
filtered_assets = filtered_assets[filtered_assets]
#Create a Plotly Table trace
fig = go.Figure(data=[go.Table(
    header=dict(values=['Date', 'Boolean Value']),
    cells=dict(values=[filtered_assets.index, filtered_assets], align='left'))
])

fig.show()



In [None]:
#Find Spreads that are over extended
filtered_assets = filter_assets_by_positive_spread_std(benchmark_minus_all_series_mid)
filtered_assets = filtered_assets[filtered_assets]
# Convert filtered_assets to a DataFrame and reset the index
filtered_assets_df = filtered_assets.reset_index()
filtered_assets_df.columns = ['Symbol', 'Boolean Value']

# Remove any leading or trailing whitespace from symbols
filtered_assets_df['Symbol'] = filtered_assets_df['Symbol'].str.strip()

# Make a copy of sp500_table
sp500_table_copy = sp500_table.copy()

# Prefix 'Benchmark_minus_' to symbols in the copy to match the format
sp500_table_copy['Symbol'] = 'Benchmark_minus_' + sp500_table_copy['Symbol'].str.strip()

# Extract the symbols from filtered_assets_df (no prefix needed)
filtered_symbols = filtered_assets_df['Symbol']

# Filter the copy of sp500_table based on the updated filtered_assets symbols
sp500_filtered = sp500_table_copy[sp500_table_copy['Symbol'].isin(filtered_symbols)]

# Merge sp500_filtered with filtered_assets_df (no need to adjust filtered_assets_df)
merged_df = sp500_filtered.merge(filtered_assets_df, left_on='Symbol', right_on='Symbol')

#add market caps
merged_df = pd.merge(merged_df, sp500_info[['Symbol', 'Market Cap']], on='Symbol', how='left')
merged_df = merged_df.rename(columns={'Market Cap_x': 'Market Cap'})
merged_df.drop(columns=['Market Cap_y'], inplace=True)
merged_df = merged_df.sort_values(by='Market Cap', ascending=False)

# Create a Plotly Table trace
fig = go.Figure(data=[go.Table(
    header=dict(values=['Symbol', 'Sector', 'Sub-Industry', 'Boolean Value', 'Market Cap']),
    cells=dict(values=[merged_df['Symbol'], merged_df['Sector'], merged_df['Sub-Industry'], merged_df['Boolean Value'], merged_df['Market Cap']], align='left'))
])

fig.show()


In [None]:
#Filter extended spreads by sector
Sector = 'Industrials'
merged_df[merged_df['Sector'] == Sector]


In [None]:
#Graph: spreads & risk adjusted performance
TICKER = 'XLK'
SPREAD = 'Benchmark_minus_' + TICKER
create_spread_plot(benchmark_minus_all_series_week[SPREAD]).show()
create_spread_plot(benchmark_minus_all_series_short[SPREAD]).show()
create_spread_plot(benchmark_minus_all_series_mid[SPREAD]).show()
plot_risk_adjusted_returns(all_series[TICKER],time_frame_short).show()
plot_risk_adjusted_returns(all_series[TICKER],time_frame_mid).show()
