In [81]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
os.chdir('/Users/vincent/Desktop/Capstone Project/Code')

In [4]:
from visualize import generate_metrics_table
from data import InRAMDataset, RigidDataLoader
from benchmarks import InverseVolatility, MaximumReturn, MinimumVariance, OneOverN, Random 
from losses import SharpeRatio, MaximumDrawdown, MeanReturns
from metrics import maximum_drawdown
from transaction import TransactionCost

In [5]:
os.chdir('/Users/vincent/Desktop/Capstone Project/Data')

In [6]:
mkt_ret = pd.read_parquet('mkt_ret.prq')['mkt_ret']
universe_df = pd.read_parquet('universe.prq')
online_weights = pd.read_parquet('online_weights.prq')
offline_weights = pd.read_parquet('offline_weights.prq')

In [7]:
online_weights.columns = np.arange(20)
offline_weights.columns = np.arange(20)

In [8]:
#choose 20 stocks with data from 1926-2020
stocks = universe_df.groupby('PERMNO')['RET'].count().nlargest(20).index

In [9]:
df = universe_df.loc[stocks]

In [10]:
#remove unecessary columns
df.drop(['TICKER', 'CUSIP', 'PRC', 'SHROUT'], axis=1, inplace=True)

In [11]:
#remove missing values
df.dropna(inplace=True)

In [12]:
#merge market return
df = pd.merge(df, mkt_ret, left_index=True, right_index=True)

In [13]:
#find active return
df['active_ret'] = df['RET'] - df['mkt_ret']

In [14]:
#pivot dataframe
df = df['active_ret'].unstack(level=0)

In [15]:
n_timesteps, n_assets = df.shape
lookback, gap, horizon = 36, 0, 6
n_samples = n_timesteps - lookback - horizon - gap + 1

In [16]:
split_ix = int(n_samples * 0.8)
indices_train = list(range(split_ix))
indices_test = list(range(split_ix + lookback + horizon, n_samples))

print('Train range: {}:{}\nTest range: {}:{}'.format(indices_train[0], indices_train[-1],
                                                     indices_test[0], indices_test[-1]))

Train range: 0:878
Test range: 921:1098


In [17]:
# To obtain the feature matrix :code:`X` and the target :code:`y` we apply the rolling window
# strategy.
X_list, y_list = [], []

for i in range(lookback, n_timesteps - horizon - gap + 1):
    X_list.append(df.iloc[i - lookback: i, :])
    y_list.append(df.iloc[i + gap: i + gap + horizon, :])

X = np.stack(X_list, axis=0)[:, None, ...]
y = np.stack(y_list, axis=0)[:, None, ...]

print('X: {}, y: {}'.format(X.shape, y.shape))

X: (1099, 1, 36, 20), y: (1099, 1, 6, 20)


In [18]:
dataset = InRAMDataset(X, y, asset_names=np.arange(n_assets))

In [19]:
dataloader_test = RigidDataLoader(dataset,
                                  indices=indices_test,
                                  batch_size=32)

In [20]:
benchmark = {'InvVol': InverseVolatility(),
             'MaxRet': MaximumReturn(),
             'MinVar': MinimumVariance(),
             'EW': OneOverN(),
             'Random': Random()}

In [21]:
metrics = {
    'MaxDD': MaximumDrawdown(),
    'Sharpe': SharpeRatio(),
    'MeanReturn': MeanReturns()
}

In [22]:
index = np.concatenate([np.asarray(indices_test), np.asarray([indices_test[-1] + 1])])

In [23]:
#Initialize all PVM to equal weight portfolios
PVM_dict = {b_name: pd.DataFrame(1/n_assets, index=index, columns=np.arange(n_assets))
            for b_name in benchmark.keys()}

In [24]:
# Let us now use the above created objects. We first generate a table with all metrics over all
# samples and for all benchmarks. This is done via :code:`generate_metrics_table`.
metrics_table = generate_metrics_table(benchmark,
                                       dataloader_test,
                                       metrics,
                                       PVM_dict)

In [25]:
y_test = y[indices_test]

In [26]:
y_test = y_test[:, 0, 0, :]

In [27]:
dates = df.index[indices_test[0] + lookback: indices_test[-1]+lookback+1]

In [28]:
active_return = pd.DataFrame(y_test, index=dates)

In [29]:
absolute_return = active_return.unstack()
absolute_return.name = 'active_return'
absolute_return = pd.merge(absolute_return, mkt_ret, left_index=True, right_index=True)
absolute_return = absolute_return['active_return'] + absolute_return['mkt_ret']
absolute_return = absolute_return.unstack(level=0)

In [35]:
online_weights.index = dates
offline_weights.index = dates

In [36]:
online_active_ret = (online_weights * active_return).sum(axis=1)
offline_active_ret = (offline_weights * active_return).sum(axis=1)

In [37]:
online_abs_ret = (online_weights * absolute_return).sum(axis=1)
offline_abs_ret = (offline_weights * absolute_return).sum(axis=1)

In [78]:
fig = go.Figure()
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    ret = (weights * active_return).sum(axis=1)
    fig.add_trace(go.Scatter(x=dates,
                             y=ret,
                             name=b_name))
fig.add_trace(go.Scatter(x=dates,
                         y=online_active_ret,
                         name='online'))
fig.add_trace(go.Scatter(x=dates,
                         y=offline_active_ret,
                         name='offline'))
fig.update_layout(xaxis_title='dates',
                  yaxis_title='active return')

fig.show()

In [39]:
fig = go.Figure()
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    ret = (weights * absolute_return).sum(axis=1)
    fig.add_trace(go.Scatter(x=dates,
                             y=ret,
                             name=b_name))
fig.add_trace(go.Scatter(x=dates,
                         y=online_abs_ret,
                         name='online'))
fig.add_trace(go.Scatter(x=dates,
                         y=offline_abs_ret,
                         name='offline'))
fig.update_layout(xaxis_title='dates',
                  yaxis_title='absolute return')

fig.show()

In [40]:
fig = go.Figure()
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    ret = (weights * absolute_return).sum(axis=1)
    fig.add_trace(go.Scatter(x=dates,
                             y=np.cumprod(ret + 1),
                             name=b_name))
fig.add_trace(go.Scatter(x=dates,
                         y=np.cumprod(online_abs_ret + 1),
                         name='online'))
fig.add_trace(go.Scatter(x=dates,
                         y=np.cumprod(offline_abs_ret + 1),
                         name='offline'))
fig.update_layout(xaxis_title='dates',
                  yaxis_title='Price')

fig.show()

In [41]:
Sharpe_Ratio = {}
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    ret = (weights * active_return).sum(axis=1)
    Sharpe_Ratio[b_name] = ret.mean()/ret.std()
Sharpe_Ratio['online'] = online_active_ret.mean()/online_active_ret.std()
Sharpe_Ratio['offline'] = offline_active_ret.mean()/offline_active_ret.std()

In [42]:
Sharpe_Ratio = pd.Series(Sharpe_Ratio)

In [43]:
fig = go.Figure()
fig.add_trace(go.Bar(x=Sharpe_Ratio.index,
                     y=Sharpe_Ratio.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Active Return Sharpe Ratio')

fig.show()

In [44]:
Sortino_Ratio = {}
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    ret = (weights * active_return).sum(axis=1)
    Sortino_Ratio[b_name] = ret.mean()/np.mean(np.min(ret, 0)**2)
Sortino_Ratio['online'] = online_active_ret.mean()/np.mean(np.min(ret, 0)**2)
Sortino_Ratio['offline'] = offline_active_ret.mean()/np.mean(np.min(ret, 0)**2)

In [45]:
Sortino_Ratio = pd.Series(Sortino_Ratio)

In [46]:
fig = go.Figure()
fig.add_trace(go.Bar(x=Sortino_Ratio.index,
                     y=Sortino_Ratio.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Active Raturn Sortino Ratio')

fig.show()

In [48]:
MDD = {}
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    ret = (weights * absolute_return).sum(axis=1)
    MDD[b_name] = maximum_drawdown(ret)
MDD['online'] = maximum_drawdown(online_abs_ret)
MDD['offline'] = maximum_drawdown(offline_abs_ret)

In [50]:
MDD = pd.Series(MDD)

In [54]:
fig = go.Figure()
fig.add_trace(go.Bar(x=MDD.index,
                     y=MDD.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Maximum Drawdown')

fig.show()

In [63]:
def compute_transaction_cost(weights, abs_ret):
    dates = weights.index
    tc_array = []
    for i in range(1, len(dates)):
        prev_w = weights.loc[dates[i-1]] * (1 + abs_ret.loc[dates[i-1]])
        prev_w /= np.sum(prev_w)
        mu = TC(prev_w.values, weights.loc[dates[i]].values)
        tc_array.append(1 - mu)
    return np.mean(tc_array) 

In [64]:
#transaction cost
transaction_cost = {}
TC = TransactionCost(selling_cost=25e-4, purchasing_cost=25e-4)
for b_name in PVM_dict.keys():
    weights = PVM_dict[b_name].iloc[1:, :]
    weights.index = dates
    transaction_cost[b_name] = compute_transaction_cost(weights, absolute_return)
transaction_cost['online'] = compute_transaction_cost(online_weights, absolute_return)
transaction_cost['offline'] = compute_transaction_cost(offline_weights, absolute_return)


In [66]:
transaction_cost = pd.Series(transaction_cost)

In [67]:
fig = go.Figure()
fig.add_trace(go.Bar(x=transaction_cost.index,
                     y=transaction_cost.values))
fig.update_layout(xaxis_title='Strategy',
                  yaxis_title='Transaction Cost')

fig.show()

In [82]:
# weights = PVM_dict['offline'].iloc[1:, :]
weights = offline_weights.copy()
fig = go.Figure()
for col in weights.columns:
    fig.add_trace(go.Bar(x=dates,
                         y=weights[col],
                         name=col))
fig.update_layout(xaxis_title='timestamps', yaxis_title='weights',
                  barmode='stack', bargap=0)
fig.show()