In [2]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


## Setup

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import yfinance as yf

from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import risk_matrix
from pypfopt.efficient_frontier import EfficientFrontier

from environments.discrete_env_v3 import PortfolioManagementEnv
from experiments.utils import evaluate_results

In [6]:
RETURN_COLS = ['LARGE_CAP', 'SMALL_CAP', 'BONDS', 'CASH', 'REAL_ESTATE', 'COMMODITIES', 'GOLD']
FEATURE_COLS = RETURN_COLS

In [4]:
df = pd.read_csv('data/asset_classes_stationary_features.csv', index_col=0, parse_dates=True)
df.head(3)

Unnamed: 0,LARGE_CAP,SMALL_CAP,BONDS,CASH,REAL_ESTATE,COMMODITIES,GOLD,LARGE_CAP_RETURN_12,LARGE_CAP_RETURN_26,LARGE_CAP_RETURN_60,...,COMMODITIES_RETURN_12,COMMODITIES_RETURN_26,COMMODITIES_RETURN_60,COMMODITIES_STD_20,COMMODITIES_STD_10,GOLD_RETURN_12,GOLD_RETURN_26,GOLD_RETURN_60,GOLD_STD_20,GOLD_STD_10
2011-03-30,0.006801,0.011092,0.001899,0.0,0.014518,-0.001585,0.003328,0.005496,0.000439,0.000802,...,0.004771,0.000262,0.001156,0.013666,0.012126,0.001769,0.000666,8.1e-05,0.007749,0.006018
2011-03-31,-0.001318,0.004923,0.001902,0.0,0.008103,0.018912,0.008582,0.004118,0.000606,0.000794,...,0.003363,0.000665,0.001711,0.014273,0.008757,0.00209,0.000652,0.000621,0.007392,0.00631
2011-04-01,0.004619,0.006018,0.0,0.0,0.001539,0.000992,-0.004719,0.004152,0.000822,0.000782,...,0.002366,0.001251,0.001649,0.014268,0.008355,0.000598,0.000759,0.000589,0.007204,0.005933


## Baseline: Max Sharpe Ratio Strategy

In [9]:
WINDOW_SIZE = 126 # half a trading year

np.random.seed(5)
value_error_count = 0
env = PortfolioManagementEnv(
    df, 
    RETURN_COLS, 
    feature_cols=FEATURE_COLS, 
    starting_balance=1, 
    episode_length=-1, 
    window_size=WINDOW_SIZE,
    allocations_in_obs=False,
)

obs, done = env.reset(), False
while not done:
    
    observation_df = pd.DataFrame(obs.reshape(-1, env.NUM_ASSETS), columns=FEATURE_COLS)
    annualized_mean_return = mean_historical_return(observation_df, returns_data=True)
    annualized_covariance = risk_matrix(observation_df, returns_data=True, method='sample_cov')
    ef = EfficientFrontier(annualized_mean_return, annualized_covariance)
    
    try:
        weights = ef.max_sharpe(risk_free_rate=(1+env.RETURNS[env.current_index,0])**252-1)
        cleaned_weights = ef.clean_weights()
        env.current_allocations = np.insert(np.array([w for w in cleaned_weights.values()]), 0, 0)
    
    except ValueError: # for when none of the assets must have an expected return exceeding the risk-free rate
        env.current_allocations = np.insert(np.zeros(len(FEATURE_COLS)), 0, 1) # invest everything into the risk free rate
        value_error_count += 1
    
    obs, reward, done, info = env.step(env.NUM_ASSETS) # do nothing

print(f'Value error count: {value_error_count}')
env.render(title='Max Sharpe Portfolio Allocations (6 month)')
env.close()

ValueError: Shape of passed values is (147, 6), indices imply (147, 7)

## Baseline: Dow Jones Industrial Average (DJIA)

In [None]:
dija_returns = yf.download('^DJI', start = df.index[0], end = df.index[-1], interval = '1d')['Adj Close'].pct_change(1)

## Creating and saving `results_df`

In [None]:
results_df = pd.DataFrame(index=df.index)
results_df['Baseline DJIA'] = dija_returns
results_df['Baseline Max Sharpe'] = env.get_portfolio_returns()
results_df.head(3)

In [None]:
fig, axes = plt.subplots(1,2, figsize=(12,4))
results_df.plot(ax = axes[0], lw=0.3, alpha=0.5, title='Returns');
(results_df+1).cumprod().plot(ax = axes[1], lw=1, alpha=1, title='Cumilative Returns');

<span style="color:red">**(RUN ONCE)**</span>

In [None]:
# results_df.to_csv('experiments/results_df.csv')

In [None]:
results_df.to_csv('experiments/baselines_df.csv')

## Evaluate Results Function

In [None]:
def evaluate_results(results_df, plot_metrics=[]):
    """
    Evaluate `results_df` on a common set of metrics.
    """
    metrics = {}
    metrics['Annualized Mean Return'] = (1+results_df).prod()**(252/len(df))-1
    metrics['Annualized Risk'] = np.sqrt(results_df.var()*252)
    metrics['Sharpe Ratio'] = metrics['Annualized Mean Return'] / metrics['Annualized Risk']
    
    if len(plot_metrics)>0:
        fig, axes = plt.subplots(1, len(plot_metrics), figsize=(len(plot_metrics)*4, 4))
        for i,metric in enumerate(plot_metrics):
            metrics[metric].plot.bar(ax=axes[i], title=metric, rot=0, color='#F4C430')
    
    return metrics

In [None]:
# example
metrics = evaluate_results(results_df, plot_metrics=['Annualized Mean Return', 'Annualized Risk', 'Sharpe Ratio'])