In [1]:
import pandas as pd
import numpy as np 
import plotly.graph_objects as go
from datetime import date

In [2]:
sp500_monthly_ratios = pd.read_csv('data/SP500_Monthly.csv')
sp600_monthly_ratios = pd.read_csv('data/SP600_Monthly.csv')

In [3]:
sp500_daily_prices = pd.read_csv('data/SP500_daily.csv') 
sp600_daily_prices = pd.read_csv('data/SP600_daily.csv') 

  sp500_daily_prices = pd.read_csv('data/SP500_daily.csv')
  sp600_daily_prices = pd.read_csv('data/SP600_daily.csv')


In [4]:
sp500_table = pd.read_csv('data/sp500_table.csv')
sp600_table = pd.read_csv('data/sp600_table.csv')

In [5]:
from collections import defaultdict
sp500_table = sp500_table.drop_duplicates(subset=['GICS Sector','Symbol'])[['GICS Sector','Symbol']]
sp500_sectors = defaultdict(list)

sp600_table = sp600_table.drop_duplicates(subset=['GICS Sector','Symbol'])[['GICS Sector','Symbol']]
sp600_sectors = defaultdict(list)

In [7]:
sp600_prices = (sp600_daily_prices[['date','TICKER','PRC']]
 .groupby(["date", "TICKER"])["PRC"]
 .last()
 .reset_index()
 .pivot(index="date", columns="TICKER", values="PRC")
 .dropna(axis = 1)
)

sp600_prices = (sp600_daily_prices[['date','TICKER','PRC']]
 .groupby(["date", "TICKER"])["PRC"]
 .last()
 .reset_index()
 .pivot(index="date", columns="TICKER", values="PRC")
 .dropna(axis = 1)
)

sp600_returns = sp600_prices.pct_change().dropna()
sp600_returns.index = pd.to_datetime(sp600_returns.index).date

sp500_prices = (sp500_daily_prices[['date','TICKER','PRC']]
 .groupby(["date", "TICKER"])["PRC"]
 .last()
 .reset_index()
 .pivot(index="date", columns="TICKER", values="PRC")
 .dropna(axis = 1)
)

sp500_returns = sp500_prices.pct_change().dropna()
sp500_returns.index = pd.to_datetime(sp500_returns.index).date

In [8]:
for sector in sp500_table['GICS Sector'].unique():
    for symbol in sp500_table[sp500_table['GICS Sector'] == sector]['Symbol'].unique():
        if symbol in sp500_returns.columns:
            sp500_sectors[sector].append(symbol)
            
for sector in sp600_table['GICS Sector'].unique():
    for symbol in sp600_table[sp600_table['GICS Sector'] == sector]['Symbol'].unique():
        if symbol in sp600_returns.columns:
            sp600_sectors[sector].append(symbol)

In [9]:
sp500_monthly_ratios = (sp500_monthly_ratios
 .set_index(['public_date'])[['TICKER','roe','roa','ptb']]
 )
sp500_monthly_ratios.index = pd.to_datetime(sp500_monthly_ratios.index).date

sp600_monthly_ratios = (sp600_monthly_ratios
 .set_index(['public_date'])[['TICKER','roe','roa','ptb']]
 )
sp600_monthly_ratios.index = pd.to_datetime(sp600_monthly_ratios.index).date

In [10]:
sp500_ratios = {ticker: df for ticker, df in sp500_monthly_ratios.groupby('TICKER')}
sp600_ratios = {ticker: df for ticker, df in sp600_monthly_ratios.groupby('TICKER')}

# Backtest

In [11]:
dates = pd.to_datetime(sp500_monthly_ratios.index.unique()).date
dates = sorted(dates)

In [64]:
small_cap_tickers = set(sp600_monthly_ratios['TICKER']) & set(sp600_prices.columns)
large_cap_tickers = set(sp500_monthly_ratios['TICKER']) & set(sp500_prices.columns)
portfolio_returns = {}
small_returns = {}
large_returns = {}

sp500_prices = sp500_prices[list(large_cap_tickers)]
sp600_prices = sp600_prices[list(small_cap_tickers)]

In [65]:
small_cap_portfolio = {}
large_cap_portfolio = {}
assets_per_sector = 5

for i in range(len(sp500_returns.index)):
    date = sp500_returns.index[i] # Enter Date
    date_ = dt_1 = sp500_returns.index[i+10] if i + 10 < len(sp500_returns.index) else date
    
    # Portfolio Return for Yesterday
    if len(small_cap_portfolio) != 0:
        small_cap_returns = sum([small_cap_portfolio[ticker] * sp600_returns.loc[date_, ticker] for ticker in small_cap_portfolio])
        large_cap_returns = sum([large_cap_portfolio[ticker] * sp500_returns.loc[date_, ticker] for ticker in large_cap_portfolio])
        portfolio_returns[date] = (small_cap_returns + large_cap_returns) / notional
        small_returns[date] = small_cap_returns / 1
        large_returns[date] = large_cap_returns / 1
        
    # Rebalance Portfolio
    if date in dates:
        small_cap_portfolio = {}
        large_cap_portfolio = {}
        
        for sector in sp600_sectors:
            filtered_small = sp600_monthly_ratios[sp600_monthly_ratios['TICKER'].isin(set(sp600_sectors[sector]) & set(sp600_returns.columns))]
            filtered_small = filtered_small.loc[filtered_small.index == date]
            long_tickers = filtered_small.sort_values(by=['roe', 'roa', 'ptb'], 
                                                      ascending=[False, True, False])['TICKER'].iloc[:assets_per_sector].values
            
            long_tickers = [ticker for ticker in long_tickers if ticker in small_cap_tickers]
            
            for ticker in long_tickers:
                small_cap_portfolio[ticker] = 1 / len(long_tickers)
            
            
            filtered_large = sp500_monthly_ratios[sp500_monthly_ratios['TICKER'].isin(set(sp500_sectors[sector]) & set(sp500_returns.columns))]
            filtered_large = filtered_large.loc[filtered_large.index == date]
            short_tickers = filtered_large.sort_values(by=['roe', 'roa', 'ptb'], 
                                                       ascending=[True, False, True])['TICKER'].iloc[:assets_per_sector].values

            large_cap_tickers = [ticker for ticker in large_cap_tickers if ticker in small_cap_tickers]
            
            for ticker in short_tickers:
                large_cap_portfolio[ticker] = -1 / len(short_tickers)
            
        small_cap_portfolio = {ticker : small_cap_portfolio[ticker] / sum(small_cap_portfolio[ticker] for ticker in small_cap_portfolio)
                               for ticker in small_cap_portfolio}
        
        large_cap_portfolio = {ticker : large_cap_portfolio[ticker] / np.abs(sum(large_cap_portfolio[ticker] for ticker in large_cap_portfolio))
                               for ticker in large_cap_portfolio}
        
        assert sum(small_cap_portfolio[ticker] for ticker in small_cap_portfolio) - 1 < 1e-6
        assert sum(large_cap_portfolio[ticker] for ticker in large_cap_portfolio) + 1 < 1e-6
        
        long_notional = sum(small_cap_portfolio[ticker] for ticker in small_cap_portfolio)
        short_notional = np.abs(sum(large_cap_portfolio[ticker] for ticker in large_cap_portfolio))
        notional = long_notional + short_notional

In [67]:
returns = pd.DataFrame({
    'Portfolio': pd.Series(portfolio_returns),
    'Small Cap': pd.Series(small_returns),
    'Large Cap': pd.Series(large_returns)
})

returns.index = pd.to_datetime(returns.index)

In [73]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = returns.index,
        y = returns['Small Cap'].cumsum()
    )
)

fig.update_yaxes(title = 'Simple Cumulative Return')
fig.show()

In [69]:
returns['Small Cap'].describe()

count    3713.000000
mean        0.000133
std         0.014297
min        -0.127509
25%        -0.006928
50%         0.000544
75%         0.007693
max         0.091729
Name: Small Cap, dtype: float64

In [70]:
returns['Large Cap'].describe()

count    3713.000000
mean        0.011904
std         0.021267
min        -0.118961
25%        -0.003498
50%         0.005178
75%         0.030615
max         0.133246
Name: Large Cap, dtype: float64

In [62]:
daily_returns = returns['Return']

# Risk-free rate (set to 0% by default, adjust if needed)
risk_free_rate = 0.00  

# Basic statistics
mean_return = daily_returns.mean()
volatility = daily_returns.std()
sharpe_ratio = (mean_return / volatility) * np.sqrt(252) if volatility != 0 else np.nan

# Cumulative return
cumulative_return = (1 + daily_returns).cumprod().iloc[-1] - 1

# Max Drawdown
cumulative_returns = (1 + daily_returns).cumprod()
running_max = cumulative_returns.cummax()
drawdown = (cumulative_returns - running_max) / running_max
max_drawdown = drawdown.min()

# Skewness & Kurtosis
skewness = daily_returns.skew()
kurtosis = daily_returns.kurtosis()

# Print results
print(f"Mean Daily Return: {mean_return:.6f}")
print(f"Annualized Return: {mean_return * 252:.6f}")
print(f"Annualized Volatility: {volatility * np.sqrt(252):.6f}")
print(f"Sharpe Ratio: {sharpe_ratio:.4f}")
print(f"Max Drawdown: {max_drawdown:.4%}")
print(f"Skewness: {skewness:.4f}")
print(f"Kurtosis: {kurtosis:.4f}")
print(f"Cumulative Return: {cumulative_return:.4%}")

Mean Daily Return: 0.007873
Annualized Return: 1.983991
Annualized Volatility: 0.417529
Sharpe Ratio: 4.7517
Max Drawdown: -28.0866%
Skewness: 2.7443
Kurtosis: 5.9488
Cumulative Return: 132260732060619.7969%


In [63]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=returns['Return'],  # Portfolio return data
    nbinsx=80,  # Adjust bin count
    marker=dict(color='blue', opacity=0.7),  
    name="Daily Returns"
))

# Add layout details
fig.update_layout(
    title="Distribution of Portfolio Returns",
    xaxis_title="Daily Return",
    yaxis_title="Frequency",
    bargap=0.05,
    template="plotly_white"
)

# Show the plot
fig.show()