In [13]:
from Quantapp.DataManager import DataManager
import numpy as np
import json
import os
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from IPython.display import display
from concurrent.futures import ThreadPoolExecutor
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
from plotly.subplots import make_subplots

# Parameters
sp500_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
dow_url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
nasdaq_url = 'https://en.wikipedia.org/wiki/NASDAQ-100'

time_frame_week = 7
time_frame_short = 21
time_frame_mid   = 50
time_frame_long = 200
period     = '5y'

risk_free_rate = 0.02 / 252  # Annualized risk-free rate divided by trading days
benchmark = 'SPY'



dm = DataManager()
interval = '1d'

def plot_returns(series_returns):
    negative_returns = series_returns[series_returns<0]
    mean = negative_returns.mean()
    standard_deviation = negative_returns.std()
    standard_deviation_level_three_fourths = mean - .5 * standard_deviation
    standard_deviation_level_single        = mean - standard_deviation
    
    fig = px.line(series_returns)

    fig.add_hline(y=0, line_dash="dash", line_color="black", 
                annotation_text="Zero Line", annotation_position="bottom right")
    fig.add_hline(y=mean, line_dash="dot", line_color="blue", 
                annotation_text=f"Mean of negative returns: {mean:.2f}", annotation_position="top right")
    fig.add_hline(y=standard_deviation_level_three_fourths , line_dash="dashdot", line_color="red", 
                annotation_text=f".75 Std Dev: {standard_deviation_level_three_fourths :.2f}", annotation_position="top right")
    fig.add_hline(y=standard_deviation_level_single, line_dash="dashdot", line_color="red", 
                annotation_text=f"1 Std Dev: {standard_deviation_level_single:.2f}", annotation_position="top right")

    fig.add_shape(
        type="rect",
        x0=series_returns.index.min(),
        x1=series_returns.index.max(),
        y0=standard_deviation_level_three_fourths,
        y1=standard_deviation_level_single,
        fillcolor="green",
        opacity=0.2,
        line_width=0,
    )

    return fig

def create_spread_plot(asset_spreads):
    spread_threshold = 0
    spread           = asset_spreads
    mean             = spread[spread>=0].mean()
    std_dev = spread[spread >= 0].std()
    #spread = asset_spreads[:200]


    fig = px.line(spread)
    fig.update_layout(title=asset_spreads.name)
    fig.add_hline(y=spread_threshold, line_dash="dash", line_color="red", annotation_text=f"y={spread_threshold}")
    fig.add_hline(y=mean , line_color="red", annotation_text="mean")
    fig.add_hline(y=mean + std_dev, line_dash="dash", line_color="blue", 
                  annotation_text="mean + 1 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean - std_dev, line_dash="dash", line_color="blue", 
                  annotation_text="mean - 1 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean + 2*std_dev, line_dash="dot", line_color="green", 
                  annotation_text="mean + 2 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean - 2*std_dev, line_dash="dot", line_color="green", 
                  annotation_text="mean - 2 std dev", annotation_position="bottom right")
    fig.add_shape(type="rect",
                  xref="paper", yref="y",
                  x0=0, y0=mean+std_dev, x1=1, y1=spread.max(),
                  fillcolor="green", opacity=0.2, line_width=0)
    fig.update_layout(height=800)
    return fig\




def add_labels(df):
    df['name of day'] = df.index.day_name()
    df['name of month'] = df.index.month_name()
    df['month/day'] = df.index.strftime('%m/%d') 
    return df

def calculate_monthly_returns(data):
    # Ensure DateTime index
    if not isinstance(data.index, pd.DatetimeIndex):
        data.index = pd.to_datetime(data.index)
    
    # Resample to monthly frequency, taking the last observation of each month
    monthly_data = data.resample('M').last()
    
    # Calculate monthly returns
    monthly_returns = monthly_data['Close'].pct_change().dropna()
    
    return monthly_returns

def plot_seasonality(data, title):
    # Ensure the index is a DateTimeIndex
    if not isinstance(data.index, pd.DatetimeIndex):
        data.index = pd.to_datetime(data.index)
    
    # Extract month and calculate average returns for each month
    monthly_avg_returns = data.groupby(data.index.month).mean()
    
    # Define month names
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    # Create a bar plot
    fig = px.bar(x=months, y=monthly_avg_returns, title=title, labels={'x': 'Month', 'y': 'Average Monthly Return'})
    
    # Highlight the current month
    current_date = datetime.now()
    current_month_index = current_date.month - 1  # 0-based index for months
    fig.update_traces(marker_color=['red' if i == current_month_index else 'blue' for i in range(12)])

    return fig
# Function to convert figure to traces and extract layout information
def extract_fig_components(fig):
    traces = fig.data
    layout = fig.layout
    return traces, layout

# Function to add traces, layout components, and a vertical line to subplots
def add_fig_to_subplot(fig, traces, layout, row, col):
    for trace in traces:
        fig.add_trace(trace, row=row, col=col)
    
    # Transfer annotations
    for annotation in layout.annotations:
        fig.add_annotation(annotation.update(xref='paper', yref='paper', x=(col-1)*0.5 + 0.25, y=1 - (row-1)*0.5 - 0.25))

    # Transfer shapes
    for shape in layout.shapes:
        fig.add_shape(shape.update(xref='paper', yref='paper'))
    
    # Add vertical line to current month
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    current_date = datetime.now()
    current_month = months[current_date.month - 1]



sp500 = dm.retrieve_ticker_data('SPY', period=period,interval=interval).drop(['Dividends', 'Stock Splits','Capital Gains'],axis=1)
nasdaq = dm.retrieve_ticker_data('QQQ', period=period,interval=interval).drop(['Dividends', 'Stock Splits','Capital Gains'],axis=1)
russell_2000 = dm.retrieve_ticker_data('IWM', period=period,interval=interval).drop(['Dividends', 'Stock Splits','Capital Gains'],axis=1)
dija = dm.retrieve_ticker_data('DIA', period=period,interval=interval).drop(['Dividends', 'Stock Splits','Capital Gains'],axis=1)
uso = dm.retrieve_ticker_data('USO', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
vnq = dm.retrieve_ticker_data('VNQ', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlc = dm.retrieve_ticker_data('XLC', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlb = dm.retrieve_ticker_data('XLB', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xli = dm.retrieve_ticker_data('XLI', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlf = dm.retrieve_ticker_data('XLF', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlp = dm.retrieve_ticker_data('XLP', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlu = dm.retrieve_ticker_data('XLU', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlv = dm.retrieve_ticker_data('XLV', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xly = dm.retrieve_ticker_data('XLY', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlk = dm.retrieve_ticker_data('XLK', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xlre = dm.retrieve_ticker_data('XLRE', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)
xle = dm.retrieve_ticker_data('XLE', period=period, interval=interval).drop(['Dividends', 'Stock Splits', 'Capital Gains'], axis=1)

gold               = dm.retrieve_ticker_data('GLD', period=period,interval=interval).drop(['Dividends', 'Stock Splits','Capital Gains'],axis=1)
treasury_bonds_20y = dm.retrieve_ticker_data('TLT', period=period,interval=interval).drop(['Dividends', 'Stock Splits','Capital Gains'],axis=1)

risk_free_rate   = dm.retrieve_ticker_data('^IRX',period=period,interval=interval).reindex_like(sp500)
vix   = dm.retrieve_ticker_data('^VIX',period=period).drop(['Dividends', 'Stock Splits'],axis=1)

sp500_monthly_returns = calculate_monthly_returns(sp500)
nasdaq_monthly_returns = calculate_monthly_returns(nasdaq)
russell_2000_monthly_returns = calculate_monthly_returns(russell_2000)
dija_monthly_returns = calculate_monthly_returns(dija)
gold_monthly_returns = calculate_monthly_returns(gold)
treasury_bonds_20y_monthly_returns = calculate_monthly_returns(treasury_bonds_20y)
vix_monthly_returns = calculate_monthly_returns(vix)
uso_monthly_returns = calculate_monthly_returns(uso)
vnq_monthly_returns = calculate_monthly_returns(vnq)

# Calculate monthly returns for each SPDR sector ETF
xlc_monthly_returns = calculate_monthly_returns(xlc)
xlb_monthly_returns = calculate_monthly_returns(xlb)
xli_monthly_returns = calculate_monthly_returns(xli)
xlf_monthly_returns = calculate_monthly_returns(xlf)
xlp_monthly_returns = calculate_monthly_returns(xlp)
xlu_monthly_returns = calculate_monthly_returns(xlu)
xlv_monthly_returns = calculate_monthly_returns(xlv)
xly_monthly_returns = calculate_monthly_returns(xly)
xlk_monthly_returns = calculate_monthly_returns(xlk)
xlre_monthly_returns = calculate_monthly_returns(xlre)
xle_monthly_returns = calculate_monthly_returns(xle)




'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [14]:
#MONTHLY SEASONALITY
# Create individual figures
fig_sp500 = plot_seasonality(sp500_monthly_returns, 'Seasonality: S&P 500')
fig_nasdaq = plot_seasonality(nasdaq_monthly_returns, 'Seasonality: Nasdaq 100')
fig_dija = plot_seasonality(dija_monthly_returns, 'Seasonality: DIJA')
fig_vix = plot_seasonality(vix_monthly_returns, 'Seasonality: VIX')
fig_gold = plot_seasonality(gold_monthly_returns, 'Seasonality: Gold')
fig_treasury_bonds_20y = plot_seasonality(treasury_bonds_20y_monthly_returns, 'Seasonality: Treasury Bonds 20Y')
fig_uso = plot_seasonality(uso_monthly_returns, 'Seasonality: USO')
fig_vnq = plot_seasonality(vnq_monthly_returns, 'Seasonality: VNQ')
# Create individual seasonality figures
# Create individual seasonality figures
fig_xlc = plot_seasonality(xlc_monthly_returns, 'Seasonality: Communication Services')
fig_xlb = plot_seasonality(xlb_monthly_returns, 'Seasonality: Materials')
fig_xli = plot_seasonality(xli_monthly_returns, 'Seasonality: Industrials')
fig_xlf = plot_seasonality(xlf_monthly_returns, 'Seasonality: Financials')
fig_xlp = plot_seasonality(xlp_monthly_returns, 'Seasonality: Consumer Staples')
fig_xlu = plot_seasonality(xlu_monthly_returns, 'Seasonality: Utilities')
fig_xlv = plot_seasonality(xlv_monthly_returns, 'Seasonality: Health Care')
fig_xly = plot_seasonality(xly_monthly_returns, 'Seasonality: Consumer Discretionary')
fig_xlk = plot_seasonality(xlk_monthly_returns, 'Seasonality: Technology')
fig_xlre = plot_seasonality(xlre_monthly_returns, 'Seasonality: Real Estate')
fig_xle = plot_seasonality(xle_monthly_returns, 'Seasonality: Energy')

# Create a 2x4 subplot
fig = make_subplots(rows=2, cols=4, 
                    subplot_titles=('Seasonality: S&P 500', 'Seasonality: Nasdaq 100',
                                    'Seasonality: DIJA', 'Seasonality: VIX',
                                    'Seasonality: Gold', 'Seasonality: Treasury Bonds 20Y',
                                    'Seasonality: USO', 'Seasonality: VNQ'))

# Extract components from existing figures
traces_sp500, layout_sp500 = extract_fig_components(fig_sp500)
traces_nasdaq, layout_nasdaq = extract_fig_components(fig_nasdaq)
traces_dija, layout_dija = extract_fig_components(fig_dija)
traces_vix, layout_vix = extract_fig_components(fig_vix)
traces_gold, layout_gold = extract_fig_components(fig_gold)
traces_treasury_bonds_20y, layout_treasury_bonds_20y = extract_fig_components(fig_treasury_bonds_20y)
traces_uso, layout_uso = extract_fig_components(fig_uso)
traces_vnq, layout_vnq = extract_fig_components(fig_vnq)

# Add traces and layout components to subplots
add_fig_to_subplot(fig, traces_sp500, layout_sp500, 1, 1)
add_fig_to_subplot(fig, traces_nasdaq, layout_nasdaq, 1, 2)
add_fig_to_subplot(fig, traces_dija, layout_dija, 1, 3)
add_fig_to_subplot(fig, traces_vix, layout_vix, 1, 4)
add_fig_to_subplot(fig, traces_gold, layout_gold, 2, 1)
add_fig_to_subplot(fig, traces_treasury_bonds_20y, layout_treasury_bonds_20y, 2, 2)
add_fig_to_subplot(fig, traces_uso, layout_uso, 2, 3)
add_fig_to_subplot(fig, traces_vnq, layout_vnq, 2, 4)

# Update layout for the entire figure
fig.update_layout(height=800, width=1600, 
                  title_text="Seasonality of Major Markets and Key Assets",
                  showlegend=False)  # Hide legend if desired

fig.show()

fig = make_subplots(rows=2, cols=6, 
                    subplot_titles=('Seasonality: Communication Services', 'Seasonality: Materials',
                                    'Seasonality: Industrials', 'Seasonality: Financials',
                                    'Seasonality: Consumer Staples', 'Seasonality: Utilities',
                                    'Seasonality: Health Care', 'Seasonality: Consumer Discretionary',
                                    'Seasonality: Technology', 'Seasonality: Real Estate',
                                    'Seasonality: Energy'))

# Extract components from existing figures
traces_xlc, layout_xlc = extract_fig_components(fig_xlc)
traces_xlb, layout_xlb = extract_fig_components(fig_xlb)
traces_xli, layout_xli = extract_fig_components(fig_xli)
traces_xlf, layout_xlf = extract_fig_components(fig_xlf)
traces_xlp, layout_xlp = extract_fig_components(fig_xlp)
traces_xlu, layout_xlu = extract_fig_components(fig_xlu)
traces_xlv, layout_xlv = extract_fig_components(fig_xlv)
traces_xly, layout_xly = extract_fig_components(fig_xly)
traces_xlk, layout_xlk = extract_fig_components(fig_xlk)
traces_xlre, layout_xlre = extract_fig_components(fig_xlre)
traces_xle, layout_xle = extract_fig_components(fig_xle)

# Add traces and layout components to subplots
add_fig_to_subplot(fig, traces_xlc, layout_xlc, 1, 1)
add_fig_to_subplot(fig, traces_xlb, layout_xlb, 1, 2)
add_fig_to_subplot(fig, traces_xli, layout_xli, 1, 3)
add_fig_to_subplot(fig, traces_xlf, layout_xlf, 1, 4)
add_fig_to_subplot(fig, traces_xlp, layout_xlp, 1, 5)
add_fig_to_subplot(fig, traces_xlu, layout_xlu, 1, 6)
add_fig_to_subplot(fig, traces_xlv, layout_xlv, 2, 1)
add_fig_to_subplot(fig, traces_xly, layout_xly, 2, 2)
add_fig_to_subplot(fig, traces_xlk, layout_xlk, 2, 3)
add_fig_to_subplot(fig, traces_xlre, layout_xlre, 2, 4)
add_fig_to_subplot(fig, traces_xle, layout_xle, 2, 5)

# Update layout for the entire figure
fig.update_layout(height=800, width=1600, 
                  title_text="Seasonality of S&P 500 Sector SPDRs",
                  showlegend=False)  # Hide legend if desired

fig.show()


In [15]:
#Day by Data Statistics

sp500_daily= sp500#.resample('M').last()#.reset_index(inplace=True)
sp500_daily_returns = sp500_daily.pct_change() * 100
sp500_daily_returns['name of day'] = sp500_daily_returns.index.day_name()
sp500_daily_returns['name of month'] = sp500_daily_returns.index.month_name()
sp500_daily_returns['month/day'] = sp500_daily_returns.index.strftime('%m/%d') 
month_days = sp500_daily_returns['month/day'].unique()
name_of_days= sp500_daily_returns['name of day'].unique()
def convert_date(date):
    try:
        return pd.to_datetime(date, format='%m/%d')
    except ValueError:
        return None
    
converted_dates = [convert_date(x) for x in month_days]
converted_dates = [date for date in converted_dates if date is not None]
sorted_dates = sorted(converted_dates)
sorted_dates = [date.strftime('%m/%d') for date in sorted_dates]

data = []
for month_day in sorted_dates:
    data.append(sp500_daily_returns[sp500_daily_returns['month/day'] == month_day]['Close'].mean())


fig = px.bar(y=data, x=sorted_dates, title='Average Returns for Each Day of the Year')
fig.show()

data = []
for name_of_day in name_of_days:
    data.append(sp500_daily_returns[sp500_daily_returns['name of day'] == name_of_day]['Close'].mean())
fig = px.bar(y=data, x=name_of_days, title='Average Returns for Each Day')
fig.show()


days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

for day in days:
    # Filter data for the current day
    daily_returns = sp500_daily_returns[sp500_daily_returns['name of day'] == day]['Close']
    
    # Calculate the rolling mean of positive returns
    rolling_mean = (daily_returns > 0).rolling(window=21).sum() / 21
    
    # Calculate statistics
    mean_value = rolling_mean.mean()
    std_dev = rolling_mean.std()
    
    # Calculate standard deviation lines
    one_std_dev_up = mean_value + std_dev
    one_std_dev_down = mean_value - std_dev
    two_std_dev_up = mean_value + 2 * std_dev
    two_std_dev_down = mean_value - 2 * std_dev
    
    # Create the plot
    fig = px.bar(rolling_mean, title=f'Successive {day} Returns with Fiscal Quarter End Lines', color_discrete_sequence=['#1f77b4'])  # Change bar color to a contrasting blue
    
    # Add horizontal lines with distinct styles and colors
    fig.add_hline(y=mean_value, line_width=3, line_dash="solid", line_color="blue", name='Mean')
    fig.add_hline(y=one_std_dev_up, line_width=2, line_dash="dash", line_color="orange", name='Mean + 1 Std Dev')
    fig.add_hline(y=one_std_dev_down, line_width=2, line_dash="dash", line_color="orange", name='Mean - 1 Std Dev')
    fig.add_hline(y=two_std_dev_up, line_width=2, line_dash="dash", line_color="purple", name='Mean + 2 Std Dev')
    fig.add_hline(y=two_std_dev_down, line_width=2, line_dash="dash", line_color="purple", name='Mean - 2 Std Dev')
    
    # Add shaded areas between the 1 and 2 standard deviation lines
    fig.add_shape(
        type="rect",
        x0=rolling_mean.index.min(),
        x1=rolling_mean.index.max(),
        y0=one_std_dev_up,
        y1=two_std_dev_up,
        fillcolor="rgba(255, 182, 193, 0.5)",  # Light pink with 50% opacity
        line=dict(width=0),
        name='1 to 2 Std Dev'
    )
    fig.add_shape(
        type="rect",
        x0=rolling_mean.index.min(),
        x1=rolling_mean.index.max(),
        y0=two_std_dev_down,
        y1=one_std_dev_down,
        fillcolor="rgba(255, 182, 193, 0.5)",  # Light pink with 50% opacity
        line=dict(width=0),
        name='-2 to -1 Std Dev'
    )
    
    # Add vertical lines at the end of each fiscal quarter
    fiscal_quarter_end_dates = rolling_mean.index.to_period("Q").to_timestamp().drop_duplicates()
    for i, date in enumerate(fiscal_quarter_end_dates):
        fig.add_vline(x=date, line_width=2, line_dash="dash", line_color="gray", name='Fiscal Quarter End')
        
        # Shade every other quarter with light grey from bottom to top of the graph
        if i % 2 == 0:
            fig.add_shape(
                type="rect",
                x0=date - pd.DateOffset(days=90),
                x1=date,
                y0=0,  # Extend from the bottom of the graph
                y1=1,  # Extend to the top of the graph
                xref='x',  # Specify x-axis reference
                yref='paper',  # Specify y-axis reference as the paper (full height of the plot)
                fillcolor="rgba(200, 200, 200, 0.3)",  # Light grey with 30% opacity
                line=dict(width=0),
                name='Quarter Background'
            )
        
        # Add annotations for each fiscal quarter at the top of the plot
        quarter_start = date - pd.DateOffset(days=90)  # Approximate start of the quarter area
        quarter_end = date  # End of the quarter area
        quarter_mid = quarter_start + (quarter_end - quarter_start) / 2  # Midpoint of the quarter area
        
        quarter_label = f"{str(date.year)[-2:]}: Q{date.quarter}"
        fig.add_annotation(
            x=quarter_mid,
            y=1.02,  # Position slightly above the top of the plot
            text=quarter_label,
            showarrow=False,
            font=dict(size=12, color="black"),
            align="center",
            xanchor='center',
            yanchor='bottom'
        )
    
    # Update layout to include a more descriptive legend
    fig.update_layout(
        legend_title_text='Statistics',
        legend=dict(x=0, y=-0.2, orientation="h"),
        xaxis_title='Date',
        yaxis_title='Rolling Mean of Positive Returns',
        title_font_size=16,
        height=600,
        legend_title_font_size=14,
        plot_bgcolor='white',  # Ensure the plot background is white
        paper_bgcolor='white',  # Ensure the overall paper background is white
        font_color='black'  # Ensure text is black for contrast
    )
    
    # Show the plot
    fig.show()


Converting to PeriodArray/Index representation will drop timezone information.




Converting to PeriodArray/Index representation will drop timezone information.




Converting to PeriodArray/Index representation will drop timezone information.




Converting to PeriodArray/Index representation will drop timezone information.




Converting to PeriodArray/Index representation will drop timezone information.



In [16]:
#Parameters
#Retrieve Asset Information
sp500_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
dow_url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
nasdaq_url = 'https://en.wikipedia.org/wiki/NASDAQ-100'


time_frame_week = 7
time_frame_short = 21
time_frame_mid   = 50
time_frame_long = 200
period     = '10y'

risk_free_rate = 0.02 / 252  # Annualized risk-free rate divided by trading days
benchmark = 'SPY'


In [17]:
#Methods & Classes
#Define Parameters

import numpy as np
import pandas as pd

import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from IPython.display import display

import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import plotly.graph_objects as go
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor

#plt.rcParams["figure.figsize"] = (20, 7)


def calculate_sortino_ratio(returns):
    excess_returns = returns - risk_free_rate
    downside_deviation = excess_returns[excess_returns < 0].std()
    sortino_ratio = excess_returns.mean() / downside_deviation if downside_deviation != 0 else np.nan
    return sortino_ratio

def calculate_risk_adjusted_returns(df, time_frame):
    daily_returns = df.pct_change()
    rolling_sortino_ratio = daily_returns.rolling(window=time_frame).apply(calculate_sortino_ratio)
    return rolling_sortino_ratio

def generate_series(tickers):
    tickers = [ticker.replace('.', '-') for ticker in tickers]
    try:
        df = yf.download(tickers, period=period)['Close']
    except Exception as e:
        print(f"An error occurred while fetching data: {e}")
        return pd.DataFrame()  
    df.columns = [col.replace('-', '.') for col in df.columns]
    return df


def plot_returns(returns, time_frame):
    threshold = 0
    fig = px.line(returns , x=returns.index, y=returns.columns)
    fig.add_hline(y=threshold, line_dash="dash", line_color="red", annotation_text=f"y={threshold}")
    fig.add_hline(y=threshold, line_dash="dash", line_color="red", annotation_text=f"y={threshold}")
    fig.show()

def create_spreads(asset_series, benchmark_series, time_frame, mode='standard'):
    
    if mode == 'standard':
        asset_returns = asset_series.pct_change(time_frame)
        benchmark_returns= benchmark_series.pct_change(time_frame)
    elif mode == 'sortino':
        asset_returns = calculate_risk_adjusted_returns(asset_series, time_frame)
        benchmark_returns= calculate_risk_adjusted_returns(benchmark_series, time_frame)

    benchmark_minus_asset = asset_returns.apply(lambda x: benchmark_returns - x)
    benchmark_minus_asset.columns = ["Benchmark" + "_minus_" + col for col in benchmark_minus_asset.columns]
    return benchmark_minus_asset    

def create_spread_plot(asset_spreads):
    spread_threshold = 0
    spread           = asset_spreads.last('1Y') 
    mean             = spread[spread>=0].mean()
    std_dev = spread[spread >= 0].std()
    #spread = asset_spreads[:200]


    fig = px.line(spread)
    fig.update_layout(title=asset_spreads.name)
    fig.add_hline(y=spread_threshold, line_dash="dash", line_color="red", annotation_text=f"y={spread_threshold}")
    fig.add_hline(y=mean , line_color="red", annotation_text="mean")
    fig.add_hline(y=mean + std_dev, line_dash="dash", line_color="blue", 
                  annotation_text="mean + 1 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean - std_dev, line_dash="dash", line_color="blue", 
                  annotation_text="mean - 1 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean + 2*std_dev, line_dash="dot", line_color="green", 
                  annotation_text="mean + 2 std dev", annotation_position="bottom right")
    fig.add_hline(y=mean - 2*std_dev, line_dash="dot", line_color="green", 
                  annotation_text="mean - 2 std dev", annotation_position="bottom right")
    fig.add_shape(type="rect",
                  xref="paper", yref="y",
                  x0=0, y0=mean, x1=1, y1=spread.max(),
                  fillcolor="green", opacity=0.2, line_width=0)
    #fig.update_layout(height=800)
    return fig

def create_side_by_side_subplots(fig1, fig2):
    fig = make_subplots(rows=1, cols=2, subplot_titles=(fig1.layout.title.text, fig2.layout.title.text))
    
    for trace in fig1.data:
        fig.add_trace(trace,row=1,col=1)

    for trace in fig2.data:
        fig.add_trace(trace,row=1,col=2)
    
    return fig

def plot_multiple_spreads(assets):
    for column in assets:
       asset_spreads = assets[column]
       create_spread_plot(asset_spreads).show()


def plot_risk_adjusted_returns(series, time_frame):
    series_adjusted_returns = calculate_risk_adjusted_returns(series, time_frame)
    series_adjusted_returns_last_year = series_adjusted_returns.last('1Y')  # Filter data to only include the last year
    
    negative_returns = series_adjusted_returns_last_year[series_adjusted_returns_last_year < 0]
    mean = negative_returns.mean()
    standard_deviation = negative_returns.std()
    standard_deviation_level_three_fourths = mean - 0.5 * standard_deviation
    standard_deviation_level_single = mean - standard_deviation

    fig = px.line(series_adjusted_returns_last_year)

    fig.add_hline(y=0, line_dash="dash", line_color="black", 
                  annotation_text="Zero Line", annotation_position="bottom right")
    fig.add_hline(y=mean, line_dash="dot", line_color="blue", 
                  annotation_text=f"Mean of negative returns: {mean:.2f}", annotation_position="top right")
    fig.add_hline(y=standard_deviation_level_three_fourths, line_dash="dashdot", line_color="red", 
                  annotation_text=f".75 Std Dev: {standard_deviation_level_three_fourths:.2f}", annotation_position="top right")
    fig.add_hline(y=standard_deviation_level_single, line_dash="dashdot", line_color="red", 
                  annotation_text=f"1 Std Dev: {standard_deviation_level_single:.2f}", annotation_position="top right")

    fig.add_shape(
        type="rect",
        x0=series_adjusted_returns_last_year.index.min(),
        x1=series_adjusted_returns_last_year.index.max(),
        y0=standard_deviation_level_three_fourths,
        y1=standard_deviation_level_single,
        fillcolor="green",
        opacity=0.2,
        line_width=0,
    )
    
    return fig

def filter_assets_by_positive_spread_std(asset_spreads):
    spreads = asset_spreads
    positive_spreads = spreads[spreads >= 0] 
    
    mean = positive_spreads.mean()
    std_dev = positive_spreads.std()

    latest_spread = spreads.iloc[-1]
    threshold = mean + std_dev

    return latest_spread>=threshold


def filter_assets_below_negative_std(asset_spreads):
    if not isinstance(asset_spreads, pd.Series):
        raise TypeError("asset_spreads must be a pandas Series")

    negative_spreads = asset_spreads[asset_spreads < 0]
    if negative_spreads.empty:
        return pd.Series(dtype=bool)  
    
    mean_negative = negative_spreads.mean()
    std_dev_negative = negative_spreads.std()

    threshold_negative = mean_negative - 0.75 * std_dev_negative
    return asset_spreads < threshold_negative

def get_sector_info(ticker):
    try:
        stock = yf.Ticker(ticker)
        sector = stock.info.get('sector', 'N/A')
        sub_industry = stock.info.get('industry', 'N/A')
        return {'Ticker': ticker, 'Sector': sector, 'Sub-Industry': sub_industry}
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return {'Ticker': ticker, 'Sector': 'N/A', 'Sub-Industry': 'N/A'}

def fetch_ticker_info(ticker):
    info = get_sector_info(ticker)
    market_cap = yf.Ticker(ticker).info.get('marketCap')
    return info['Sector'], info['Sub-Industry'], market_cap

def get_market_caps(table):
    print("Starting market cap retrieval process...")
    
    tickers = table['Symbol'].tolist()
    print(f"Original tickers: {tickers[:10]}...")  # Print first 10 for brevity

    # Optimize ticker adjustment
    tickers = ['BRK-B' if symbol == 'BRK.B' else 'BF-B' if symbol == 'BF.B' else symbol for symbol in tickers]
    print(f"Adjusted tickers: {tickers[:10]}...")  # Print first 10 for brevity

    with ThreadPoolExecutor() as executor:
        results = list(executor.map(fetch_ticker_info, tickers))

    # Unpack results
    sectors, sub_industries, market_caps = zip(*results)

    table['Sector'] = sectors
    table['Sub-Industry'] = sub_industries
    table['Market Cap'] = market_caps
    
    print("Market cap retrieval process completed.")
    return table

def plot_market_caps(info):
    market_caps = pd.DataFrame(info[['Symbol','Market Cap']])
    #market_caps = market_caps.sort_values(by='Market Cap',ascending=False)
    market_caps = market_caps.sort_values(by='Market Cap')
    market_caps['Log Market Cap'] = np.log(market_caps['Market Cap'])
    percentiles = np.percentile(market_caps['Log Market Cap'], [60, 90])

    def categorize(market_cap):
        if market_cap <= percentiles[0]:
            return 'Small-Cap'
        elif market_cap <= percentiles[1]:
            return 'Mid-Cap'
        else:
            return 'Large-Cap'

    market_caps['category'] = market_caps['Log Market Cap'].apply(categorize)
    value_counts = market_caps['category'].value_counts()
    fig = px.bar(market_caps, x='Symbol', y='Market Cap', 
                labels={'Ticker': 'Symbol', 'Market Cap': 'Market Cap (Billions USD)'},
                title='Market Capitalizations Companies')

    # Customize layout if needed
    fig.update_layout(
        xaxis_title="Ticker",
        yaxis_title="Market Cap (Billions USD)"
    )
    mid_to_large= len(market_caps) - market_caps.tail(value_counts['Large-Cap']).count()['Symbol']+ 2.5
    small_to_mid= market_caps.tail(value_counts['Small-Cap']).count()['Symbol'] - .5
    market_caps[market_caps['category'] == 'Mid-Cap']

    # Add vertical lines to separate the caps
    fig.add_vline(x=small_to_mid, line=dict(color="Red", width=2, dash="dashdot"), annotation_text="Small to Mid", annotation_position="top left")
    fig.add_vline(x=mid_to_large, line=dict(color="Blue", width=2, dash="dashdot"), annotation_text="Mid to Large", annotation_position="top left")

    fig.show()

In [18]:
#Load: retrieve all tickers


sp500_table = pd.read_html(sp500_url)[0]
qqq_table = pd.read_html(nasdaq_url)[4]
dia_table = pd.read_html(dow_url)[1]

sp500_table = sp500_table[['Symbol', 'GICS Sector', 'GICS Sub-Industry']]
sp500_table = sp500_table.rename(columns={'GICS Sector' : 'Sector', "GICS Sub-Industry": 'Sub-Industry'})

qqq_table = qqq_table[['Ticker', 'GICS Sector', 'GICS Sub-Industry']]
qqq_table = qqq_table.rename(columns={'Ticker' : 'Symbol','GICS Sector' : 'Sector', "GICS Sub-Industry": 'Sub-Industry'})
qqq_table = pd.merge(qqq_table, sp500_table[['Symbol', 'Sub-Industry']], on='Symbol', how='left')
qqq_table['Sub-Industry'] = qqq_table['Sub-Industry_x'].combine_first(qqq_table['Sub-Industry_y'])
qqq_table = qqq_table.drop(columns=['Sub-Industry_x', 'Sub-Industry_y'])

tables = pd.read_html(dow_url)
dia_table = tables[1]
dia_table = dia_table[['Symbol', 'Industry']]
dia_table = pd.merge(dia_table, sp500_table[['Symbol', 'Sector']], on='Symbol', how='left')
dia_table = pd.merge(dia_table, sp500_table[['Symbol', 'Sub-Industry']], on='Symbol', how='left')
dia_table = dia_table.drop(columns=['Industry'])

xlk_table = sp500_table[sp500_table['Sector'] == 'Information Technology']
xlf_table = sp500_table[sp500_table['Sector'] == 'Financials']
xlv_table = sp500_table[sp500_table['Sector'] == 'Health Care']
xli_table = sp500_table[sp500_table['Sector'] == 'Industrials']
xly_table = sp500_table[sp500_table['Sector'] == 'Consumer Discretionary']
xle_table = sp500_table[sp500_table['Sector'] == 'Energy']
xlb_table = sp500_table[sp500_table['Sector'] == 'Materials']
xlc_table = sp500_table[sp500_table['Sector'] == 'Communication Services']
xlre_table = sp500_table[sp500_table['Sector'] == 'Real Estate']
xlc_table = sp500_table[sp500_table['Sector'] == 'Communication Services']
xlp_table = sp500_table[sp500_table['Sector'] == 'Consumer Staples']
xlu_table = sp500_table[sp500_table['Sector'] == 'Utilities']

INDICES          = ['SPY','QQQ','DIA','IWM']
SECTORS          = ['SPY','XLF','XLK','XLV','XLC','XLI','XLU','XLB','VNQ','XLP','XLY','XBI','XLE']
INDUSTRIES       = ['SPY', 'SMH', 'KRE','KIE', 'KBE']
SPY_HOLDINGS     = sp500_table['Symbol'].tolist()
QQQ_HOLDINGS     = qqq_table['Symbol'].tolist()
DIA_HOLDINGS     = dia_table['Symbol'].tolist()
XLK_HOLDINGS     = xlk_table['Symbol'].tolist()
XLF_HOLDINGS     = xlf_table['Symbol'].tolist()
XLI_HOLDINGS     = xli_table['Symbol'].tolist()
XLV_HOLDINGS     = xlv_table['Symbol'].tolist()
XLU_HOLDINGS     = xlu_table['Symbol'].tolist()
XLF_HOLDINGS     = xlf_table['Symbol'].tolist()
XLB_HOLDINGS     = xlb_table['Symbol'].tolist()
XLY_HOLDINGS     = xly_table['Symbol'].tolist()
XLRE_HOLDINGS    = xlre_table['Symbol'].tolist()
XLC_HOLDINGS     = xlc_table['Symbol'].tolist()
XLE_HOLDINGS     = xle_table['Symbol'].tolist()
XLP_HOLDINGS     = xlp_table['Symbol'].tolist()
BONDS            = ['AGG','IEF','TLT', 'HYG','LQD','TIPS', 'BKLN']
PRECIOUS_METALS  = ['GLD','SLV','GDX','XME']
CRYPTO           = ['GBTC','BLOK']
ENERGY           = ['USO','UNG','OIH','XOP','TAN','ICLN','URA','URNM','GUSH','KOLD']
CAPITALIZATIONS  = ['SPY', 'IJH' , 'IJR']
INNOVATION       = ['ARKG','ARKF','ARKK']
LONG_LEVERAGE    = ['TQQQ','SOXL','SPXL','TNA','BOIL','NUGT','ERX','DPST']
SHORT_LEVERAGE   = ['SQQQ','SPXS','UDOW','SSO','TECL','FAS','NVDA','TQQQ', 'VXX','UVXY','VIXY','UVIX','SVXY','SOXS','TZA','USD','TSLL','LABU','DPST','NUGT','CONL']
FOREIGN_MARKETS  = ['EWZ','EWJ','EWA','EWG','EWW','EEM','EFA','FEZ','INDA','EWU','EWG']



In [19]:
#Load: retrieve market caps



dow_info    = get_market_caps(qqq_table)
nasdaq_info = get_market_caps(qqq_table)
sp500_info  = get_market_caps(sp500_table)

xlk_info    = sp500_info[ sp500_info['Sector'] == 'Technology']
xlf_info    = sp500_info[sp500_info['Sector'] == 'Financial Services']
xli_info    = sp500_info[ sp500_info['Sector'] == 'Industrials']
xlv_info    = sp500_info[sp500_info['Sector'] == 'Healthcare']
xlu_info    = sp500_info[ sp500_info['Sector'] == 'Utilities']
xlb_info    = sp500_info[sp500_info['Sector'] == 'Basic Materials']
xly_info    = sp500_info[ sp500_info['Sector'] == 'Consumer Cyclical']
xlc_info    = sp500_info[sp500_info['Sector'] == 'Communication Services']
xle_info    = sp500_info[ sp500_info['Sector'] == 'Energy']
xlre_info    = sp500_info[ sp500_info['Sector'] == 'Real Estate']
xlp_info    = sp500_info[ sp500_info['Sector'] == 'Consumer Defensive']

Starting market cap retrieval process...
Original tickers: ['ADBE', 'ABNB', 'GOOGL', 'GOOG', 'AMZN', 'AMD', 'AEP', 'AMGN', 'ADI', 'ANSS']...
Adjusted tickers: ['ADBE', 'ABNB', 'GOOGL', 'GOOG', 'AMZN', 'AMD', 'AEP', 'AMGN', 'ADI', 'ANSS']...
Market cap retrieval process completed.
Starting market cap retrieval process...
Original tickers: ['ADBE', 'ABNB', 'GOOGL', 'GOOG', 'AMZN', 'AMD', 'AEP', 'AMGN', 'ADI', 'ANSS']...
Adjusted tickers: ['ADBE', 'ABNB', 'GOOGL', 'GOOG', 'AMZN', 'AMD', 'AEP', 'AMGN', 'ADI', 'ANSS']...
Market cap retrieval process completed.
Starting market cap retrieval process...
Original tickers: ['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']...
Adjusted tickers: ['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']...
Market cap retrieval process completed.


In [20]:
#Load: retrieve prices


indices_df             = generate_series(INDICES)
sectors_df            = generate_series(SECTORS)
industries_df         = generate_series(INDUSTRIES)
qqq_holdings_df        = generate_series(QQQ_HOLDINGS)
dia_holdings_df        = generate_series(DIA_HOLDINGS)
xlk_holdings_df       = generate_series(XLK_HOLDINGS)
xlf_holdings_df      = generate_series(XLF_HOLDINGS)
xli_holdings_df        = generate_series(XLI_HOLDINGS)
xlv_holdings_df       = generate_series(XLV_HOLDINGS)
xlu_holdings_df        = generate_series(XLU_HOLDINGS)
xlb_holdings_df        = generate_series(XLB_HOLDINGS)
xly_holdings_df        = generate_series(XLY_HOLDINGS)
xlc_holdings_df       = generate_series(XLC_HOLDINGS)
xle_holdings_df        = generate_series(XLE_HOLDINGS)
xlre_holdings_df       = generate_series(XLRE_HOLDINGS)
xlp_holdings_df        = generate_series(XLP_HOLDINGS)


all_series = pd.concat([
    indices_df,
    sectors_df,
    industries_df,
    qqq_holdings_df,
    dia_holdings_df,
    xlk_holdings_df,
    xlf_holdings_df,
    xli_holdings_df,
    xlv_holdings_df,
    xlu_holdings_df,
    xlb_holdings_df,
    xly_holdings_df,
    xlc_holdings_df,
    xle_holdings_df,
    xlre_holdings_df,
    xlp_holdings_df,
], axis=1)

benchmark_series           = all_series[benchmark]
benchmark_series=benchmark_series.loc[:, ~benchmark_series.columns.duplicated()]

all_series = pd.concat([
    all_series,
    benchmark_series
], axis=1)

benchmark_series = pd.Series(benchmark_series['SPY'])
all_series=all_series.loc[:, ~all_series.columns.duplicated()]


[*********************100%%**********************]  4 of 4 completed
[*********************100%%**********************]  13 of 13 completed
[*********************100%%**********************]  5 of 5 completed
[*********************100%%**********************]  101 of 101 completed

7 Failed downloads:
['GFS', 'CEG', 'ABNB', 'DASH', 'DDOG']: YFInvalidPeriodError("%ticker%: Period '10y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', 'ytd', 'max']")
['ARM']: YFInvalidPeriodError("%ticker%: Period '10y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', 'ytd', 'max']")
['GEHC']: YFInvalidPeriodError("%ticker%: Period '10y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', 'ytd', 'max']")
[*********************100%%**********************]  30 of 30 completed
[*********************100%%**********************]  67 of 67 completed
[*********************100%%**********************]  71 of 71 completed
[*********************1

In [21]:
#Calculate: spreads
mode='standard'

benchmark_minus_indices_week          = create_spreads(indices_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_sectors_week          = create_spreads(sectors_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_industries_week       = create_spreads(industries_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_dia_holdings_week       = create_spreads(dia_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_qqq_holdings_week       = create_spreads(qqq_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlk_holdings_week       = create_spreads(xlk_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlf_holdings_week       = create_spreads(xlf_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xli_holdings_week       = create_spreads(xli_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlv_holdings_week       = create_spreads(xlv_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlu_holdings_week       = create_spreads(xlu_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlb_holdings_week       = create_spreads(xlb_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xly_holdings_week       = create_spreads(xly_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlc_holdings_week       = create_spreads(xlc_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xle_holdings_week       = create_spreads(xle_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlre_holdings_week       = create_spreads(xlre_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)
benchmark_minus_xlp_holdings_week       = create_spreads(xlp_holdings_df, benchmark_series, time_frame=time_frame_week,mode=mode)


benchmark_minus_all_series_week= pd.concat([
    benchmark_minus_indices_week,
    benchmark_minus_sectors_week,
    benchmark_minus_industries_week,
    benchmark_minus_dia_holdings_week,
    benchmark_minus_xlk_holdings_week,
    benchmark_minus_xlf_holdings_week,
    benchmark_minus_xli_holdings_week,
    benchmark_minus_xlv_holdings_week,
    benchmark_minus_xlu_holdings_week,
    benchmark_minus_xlb_holdings_week,
    benchmark_minus_xly_holdings_week,
    benchmark_minus_xlc_holdings_week,
    benchmark_minus_xle_holdings_week,
    benchmark_minus_xlre_holdings_week,
    benchmark_minus_xlp_holdings_week
], axis=1)

benchmark_minus_all_series_week=benchmark_minus_all_series_week.loc[:, ~benchmark_minus_all_series_week.columns.duplicated()]


benchmark_minus_indices_short          = create_spreads(indices_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_sectors_short          = create_spreads(sectors_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_industries_short       = create_spreads(industries_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_dia_holdings_short       = create_spreads(dia_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_qqq_holdings_short       = create_spreads(qqq_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlk_holdings_short       = create_spreads(xlk_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlf_holdings_short       = create_spreads(xlf_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xli_holdings_short       = create_spreads(xli_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlv_holdings_short       = create_spreads(xlv_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlu_holdings_short       = create_spreads(xlu_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlb_holdings_short       = create_spreads(xlb_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xly_holdings_short       = create_spreads(xly_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlc_holdings_short       = create_spreads(xlc_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xle_holdings_short       = create_spreads(xle_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlre_holdings_short       = create_spreads(xlre_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)
benchmark_minus_xlp_holdings_short       = create_spreads(xlp_holdings_df, benchmark_series, time_frame=time_frame_short,mode=mode)




benchmark_minus_all_series_short= pd.concat([
    benchmark_minus_indices_short,
    benchmark_minus_sectors_short,
    benchmark_minus_industries_short,
    benchmark_minus_dia_holdings_short,
    benchmark_minus_xlk_holdings_short,
    benchmark_minus_xlf_holdings_short,
    benchmark_minus_xli_holdings_short,
    benchmark_minus_xlv_holdings_short,
    benchmark_minus_xlu_holdings_short,
    benchmark_minus_xlb_holdings_short,
    benchmark_minus_xly_holdings_short,
    benchmark_minus_xlc_holdings_short,
    benchmark_minus_xle_holdings_short,
    benchmark_minus_xlre_holdings_short,
    benchmark_minus_xlp_holdings_short
], axis=1)

benchmark_minus_all_series_short=benchmark_minus_all_series_short.loc[:, ~benchmark_minus_all_series_short.columns.duplicated()]

benchmark_minus_indices_mid          = create_spreads(indices_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_sectors_mid          = create_spreads(sectors_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_industries_mid       = create_spreads(industries_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_dia_holdings_mid       = create_spreads(dia_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_qqq_holdings_mid       = create_spreads(qqq_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlk_holdings_mid       = create_spreads(xlk_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlf_holdings_mid       = create_spreads(xlf_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xli_holdings_mid       = create_spreads(xli_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlv_holdings_mid       = create_spreads(xlv_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlu_holdings_mid       = create_spreads(xlu_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlb_holdings_mid       = create_spreads(xlb_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xly_holdings_mid       = create_spreads(xly_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlc_holdings_mid       = create_spreads(xlc_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xle_holdings_mid       = create_spreads(xle_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlre_holdings_mid       = create_spreads(xlre_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)
benchmark_minus_xlp_holdings_mid      = create_spreads(xlp_holdings_df, benchmark_series, time_frame=time_frame_mid,mode=mode)


benchmark_minus_all_series_mid= pd.concat([
    benchmark_minus_indices_mid,
    benchmark_minus_sectors_mid,
    benchmark_minus_industries_mid,
    benchmark_minus_dia_holdings_mid,
    benchmark_minus_xlk_holdings_mid,
    benchmark_minus_xlf_holdings_mid,
    benchmark_minus_xli_holdings_mid,
    benchmark_minus_xlv_holdings_mid,
    benchmark_minus_xlu_holdings_mid,
    benchmark_minus_xlb_holdings_mid,
    benchmark_minus_xly_holdings_mid,
    benchmark_minus_xlc_holdings_mid,
    benchmark_minus_xle_holdings_mid,
    benchmark_minus_xlre_holdings_mid,
    benchmark_minus_xlp_holdings_mid
], axis=1)

benchmark_minus_all_series_mid =benchmark_minus_all_series_mid.loc[:, ~benchmark_minus_all_series_mid.columns.duplicated()]




The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.


The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.


The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



In [22]:
#Graph: Market Caps
plot_market_caps(sp500_info)
plot_market_caps(nasdaq_info)
plot_market_caps(dow_info)
plot_market_caps(xlk_info)
plot_market_caps(xlf_info)
plot_market_caps(xli_info)
plot_market_caps(xlv_info)
plot_market_caps(xlu_info)
plot_market_caps(xlp_info)
plot_market_caps(xlc_info)
plot_market_caps(xlb_info)
plot_market_caps(xlre_info)
plot_market_caps(xle_info)

In [23]:
#find oversold assets relative to benchmark
import pandas as pd
import plotly.graph_objects as go

# Assuming necessary data and functions are already defined: filter_assets_by_positive_spread_std, benchmark_minus_all_series_mid, sp500_table, sp500_info

# Filter assets by positive spread standard deviation
filtered_assets = filter_assets_by_positive_spread_std(benchmark_minus_all_series_mid)
filtered_assets = filtered_assets[filtered_assets]

# Convert filtered_assets to a DataFrame and reset the index
filtered_assets_df = filtered_assets.reset_index()
filtered_assets_df.columns = ['Symbol', 'Boolean Value']

# Remove any leading or trailing whitespace from symbols
filtered_assets_df['Symbol'] = filtered_assets_df['Symbol'].str.strip()

# Make a copy of sp500_table
sp500_table_copy = sp500_table.copy()

# Prefix 'Benchmark_minus_' to symbols in the copy to match the format
sp500_table_copy['Symbol'] = 'Benchmark_minus_' + sp500_table_copy['Symbol'].str.strip()

# Extract the symbols from filtered_assets_df (no prefix needed)
filtered_symbols = filtered_assets_df['Symbol']

# Filter the copy of sp500_table based on the updated filtered_assets symbols
sp500_filtered = sp500_table_copy[sp500_table_copy['Symbol'].isin(filtered_symbols)]

# Merge sp500_filtered with filtered_assets_df (no need to adjust filtered_assets_df)
merged_df = sp500_filtered.merge(filtered_assets_df, left_on='Symbol', right_on='Symbol', how='right')

# Add market caps
merged_df = pd.merge(merged_df, sp500_info[['Symbol', 'Market Cap']], on='Symbol', how='left')

# Handle missing sector and sub-industry information
merged_df['Sector'] = merged_df['Sector'].fillna('Unknown')
merged_df['Sub-Industry'] = merged_df['Sub-Industry'].fillna('Unknown')

# Rename columns if needed
if 'Market Cap_x' in merged_df.columns or 'Market Cap_y' in merged_df.columns:
    merged_df['Market Cap'] = merged_df['Market Cap_x'].combine_first(merged_df['Market Cap_y'])
    merged_df.drop(columns=['Market Cap_x', 'Market Cap_y'], inplace=True)

# Sort the DataFrame by market cap in descending order
merged_df = merged_df.sort_values(by='Market Cap', ascending=False)

# Create a Plotly Table trace
fig = go.Figure(data=[go.Table(
    header=dict(values=['Symbol', 'Sector', 'Sub-Industry', 'Boolean Value', 'Market Cap']),
    cells=dict(values=[merged_df['Symbol'], merged_df['Sector'], merged_df['Sub-Industry'], merged_df['Boolean Value'], merged_df['Market Cap']], align='left'))
])

fig.show()


In [24]:
#Graph: spreads & risk adjusted performance
TICKER = 'XLP'
SPREAD = 'Benchmark_minus_' + TICKER
create_spread_plot(benchmark_minus_all_series_week[SPREAD]).show()
create_spread_plot(benchmark_minus_all_series_short[SPREAD]).show()
create_spread_plot(benchmark_minus_all_series_mid[SPREAD]).show()
plot_risk_adjusted_returns(all_series[TICKER],time_frame_short).show()
plot_risk_adjusted_returns(all_series[TICKER],time_frame_mid).show()



last is deprecated and will be removed in a future version. Please create a mask and filter using `.loc` instead


'Y' is deprecated and will be removed in a future version, please use 'YE' instead.




last is deprecated and will be removed in a future version. Please create a mask and filter using `.loc` instead


'Y' is deprecated and will be removed in a future version, please use 'YE' instead.




last is deprecated and will be removed in a future version. Please create a mask and filter using `.loc` instead


'Y' is deprecated and will be removed in a future version, please use 'YE' instead.




last is deprecated and will be removed in a future version. Please create a mask and filter using `.loc` instead


'Y' is deprecated and will be removed in a future version, please use 'YE' instead.




last is deprecated and will be removed in a future version. Please create a mask and filter using `.loc` instead


'Y' is deprecated and will be removed in a future version, please use 'YE' instead.

