In [None]:
# %pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.7.2-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.7.2-cp313-cp313-win_amd64.whl (8.7 MB)
Using cached joblib-1.5.2-py3-none-any.whl (308 kB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn

   ------------- -------------------------- 1/3 [joblib]
   -------------------------- ------------- 2/3 [scikit-learn]
   -------------------------- ------------- 2/3 [scikit-learn]
   -------------------------- ------------- 2/3 [scikit-learn]
   -------------------------- ------------- 2/3 [scikit-learn]
   -------------------------- ------------- 2/3 [scikit-learn]
   -------------------------- ------------- 2/3 [scik

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from fredapi import Fred

import os
from dotenv import load_dotenv


In [None]:
def get_most_recent_series_of_date(series_key, end_date, fred):
    '''
    Retrieves FRED data on an economic indicator up till the latest entry as of a certain date.

    Parameters:
    ______________________________
    series_key: string
        The key of the economic indicator in FRED's database: https://fred.stlouisfed.org/release?rid=205.

    end_date: string
        The date in YYYY-MM-DD format. 

    fred: fredapi 
        The fredapi object to pull FRED data from.

    Returns: 
    ______________________________
    pandas.Series
        The values of the input economic indicator, with a date index.
    '''    
    df = fred.get_series_as_of_date(series_key, end_date).drop_duplicates(subset = ["date"], keep = "last")
    df = pd.Series(df["value"].to_list(), index = df["date"].to_list())
    df.index = pd.to_datetime(df.index)
    df = df.dropna()
    df = df.astype("float")
    return df

In [None]:
load_dotenv()

given_date = "2007-12-01"

fred = Fred(api_key = os.getenv("API_KEY"))
df = get_most_recent_series_of_date("DTB3", given_date, fred)
df

In [None]:
df = pd.read_csv('all_stocks_5yr.csv', index_col=0, parse_dates=True)
df = df.pivot_table(index='date', columns='Name', values='close').dropna(axis=1)
df.head()

In [None]:
# 1. Convert close prices to daily returns
returns = np.log(df).diff().dropna()

# 2. Split into estimation (first 4 years) and test (last year)
split_date = returns.index[0] + pd.DateOffset(years=4)
returns_est = returns[returns.index < split_date]
returns_test = returns[returns.index >= split_date]

# 3. Estimate mean and covariance from estimation period
mean_returns = returns_est.mean()
cov_matrix = returns_est.cov()

# 4. Find minimum variance portfolio weights
from scipy.optimize import minimize

def portfolio_variance(weights, cov_matrix):
    return weights.T @ cov_matrix @ weights

num_assets = len(mean_returns)
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
bounds = tuple((0, 1) for _ in range(num_assets))
init_guess = np.repeat(1/num_assets, num_assets)

result = minimize(portfolio_variance, init_guess, args=(cov_matrix,), 
                  method='SLSQP', bounds=bounds, constraints=constraints)
min_var_weights = result.x

# 5. Scenario 1: No rebalancing (buy and hold)
initial_weights = min_var_weights
weight_tracker = [initial_weights.copy()]
return_tracker = []

for daily_returns in returns_test.values:
    # Assets grow at different rates
    return_tracker.append(daily_returns @ weight_tracker[-1])
    new_weights = weight_tracker[-1] * (1 + daily_returns)
    new_weights = new_weights / new_weights.sum()
    weight_tracker.append(new_weights)

return_tracker = pd.Series(return_tracker, index=returns_test.index)

cum_returns = (return_tracker + 1).cumprod()
sharpe_ratio = (np.mean(return_tracker) / np.std(return_tracker)) * np.sqrt(252)

print("No Rebalancing Sharpe Ratio:", sharpe_ratio)

# 6. Scenario 2: Daily rebalancing with transaction fee (10 bps per turnover)
fee_rate = 0.002  # 10 basis points to sell then buy
target_weights = min_var_weights.copy()
current_weights = target_weights.copy()
portfolio_values = [1.0]
daily_returns = []

for i, row in returns_test.iterrows():
    # Assets grow at their individual rates - weights drift naturally
    weights_after_growth = current_weights * (1 + row)
    
    # Calculate turnover: sum of absolute trades needed to rebalance
    turnover = np.sum(np.abs(target_weights*weights_after_growth.sum() - weights_after_growth))
    
    # Portfolio return before fees
    port_ret = np.dot(current_weights, row)
    
    # Deduct transaction costs
    port_ret_after_fees = port_ret - (turnover * fee_rate)
    
    # Update portfolio value
    new_value = portfolio_values[-1] * (1 + port_ret_after_fees)
    portfolio_values.append(new_value)
    daily_returns.append(port_ret_after_fees)
    
    # Rebalance back to target weights for next period
    current_weights = target_weights.copy()

# Calculate Sharpe ratio correctly from returns, not cumulative values
daily_returns = np.array(daily_returns)
rebal_sharpe = (daily_returns.mean() / daily_returns.std()) * np.sqrt(252)

print("Daily Rebalancing Sharpe Ratio (with fees):", rebal_sharpe)

# 7. Plot results
plt.figure(figsize=(10,6))
plt.plot(cum_returns.index, cum_returns, label='No Rebalancing')
plt.plot(returns_test.index, portfolio_values[1:], label='Daily Rebalancing (with fees)')
plt.legend()
plt.title('Minimum Variance Portfolio Performance')
plt.ylabel('Cumulative Return')
plt.show()