In [1248]:
# number of assets
dim = 48
# length of rolling in-sample window in days
tau = 250
# length of out-of-sample window in days
out = 21
# VaR threshold
var_thresh = 95
# number of cross-validation folds
k = 10

In [1249]:
# Import relevant libraries
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [1250]:
# Import + format dataset
path = '/Users/julienraffaud/Desktop/Machine Learning with Applications in Finance/48_Industry_Portfolios_daily.CSV'
data = pd.read_csv(path)
data[data.columns[0]] = pd.to_datetime(data[data.columns[0]].astype(str), errors='coerce')
data = data.rename(columns={ data.columns[0]: "Date" })
data = data.set_index('Date')
data = data.apply(pd.to_numeric, errors='coerce')
data = data.iloc[-1401:, :]

In [1251]:
# Minimum variance portfolio backtest
mvp_returns = []
for i in range(0, int((len(data) - tau)/out)):
    # current window
    window = np.array( data.iloc[i*out:i*out + tau, :] )
    # Estimated covariance matrix
    est_cov = np.cov(window.T)
    # Inverse of estimated covariance matrix
    cov_inv = np.linalg.inv( est_cov )
    # dim*1 vector of ones
    ones = np.ones((dim, 1))
    # First half of mvp weights formula
    a = np.linalg.inv( np.linalg.multi_dot(( ones.T, cov_inv, ones)) )
    # Second half of mvp weights formula
    b = np.dot( cov_inv, ones)
    # Minimum Variance Portfolio weights
    mvp = a*b
    # In-sample variance of the MVP
    var_in = np.linalg.multi_dot((mvp.T, est_cov, mvp))
    # out-of-sample data
    out_sample = np.array( data.iloc[i*out+tau:i*out+tau+out, :].T )
    # out-of-sample returns
    out_returns = np.dot(mvp.T, out_sample)
    mvp_returns += out_returns.T.tolist()

In [1252]:
# total number of out-of-sample returns measured
nob = int((len(data) - tau)/out)*out

In [1253]:
# compute MVP variance
mvp_variance = np.var(mvp_returns)
# compute MVP VaR at (1-var_thresh)% level
mvp_var = np.percentile(mvp_returns, 100-var_thresh)
# compute MVP Sharpe ratio
mvp_sharpe = np.mean( mvp_returns )/np.sqrt(mvp_variance)
print(np.sqrt(mvp_variance))
print(mvp_var)
print(mvp_sharpe)

0.5745083094795582
-0.8546721538677073
0.15201022882228912


In [1254]:
## RIDGE-regularized portfolio backtest
# lambdas
lmbd = np.linspace(0, 1, 100)
ridge_returns = []
for i in range(0, int((len(data) - tau)/out)):
    # current window
    window = np.array( data.iloc[i*out:i*out + tau, :] )
    # average out-of-sample variance associated with each lambda
    lmbd_variances = np.zeros((len(lmbd), 1))
    for fold in range(0, k):
        variances = []
        # sample values from in-sample data
        sample = np.random.choice(tau, out, replace=False)
        # remaining in-sample data
        mod_window = np.delete(window, sample, axis=0)
        # out-of-sample data
        outer = window[sample, :]
        # dim*1 vector of ones
        ones = np.ones((dim, 1))
        # Estimated covariance matrix
        est_cov = np.cov(mod_window.T)
        ## CROSS-VALIDATION STEP
        for l in lmbd:
            cov_inv = np.linalg.inv( est_cov+l*np.eye(dim) )
            # First half of mvp weights formula
            a = np.linalg.inv( np.linalg.multi_dot(( ones.T, cov_inv, ones)) )
            # Second half of mvp weights formula
            b = np.dot( cov_inv, ones)
            # Portfolio weights
            mvp = a*b
            # In-sample variance of the MVP
            var_in = np.linalg.multi_dot((mvp.T, est_cov, mvp))
            # out-of-sample variance associated to each lambda
            var_out = np.var(np.dot(mvp.T, outer.T).T )
            # append variance
            variances.append( var_out )
        variances = np.array(variances)
        variances.shape = (len(lmbd), 1)
        # update each lambda's corresponding variance
        lmbd_variances += variances/k
    # index of lambda*
    star = lmbd_variances.tolist().index(min(lmbd_variances)) 
    # lambda*
    lambda_star = lmbd[lmbd_variances.tolist().index(min(lmbd_variances))]
    ## END OF CROSS VALIDATION STEP
    # estimated covariance matrix
    est_cov = np.cov(window.T)
    # inverted covariance matrix
    cov_inv = np.linalg.inv( est_cov + lambda_star*np.eye(dim))
    a = np.linalg.inv( np.linalg.multi_dot(( ones.T, cov_inv, ones)) )
    # Second half of mvp weights formula
    b = np.dot( cov_inv, ones)
    # Portfolio weights
    mvp = a*b
    # out-of-sample data
    out_sample = np.array( data.iloc[i*out+tau:i*out+tau+out, :].T )
    # out-of-sample returns
    out_returns = np.dot(mvp.T, out_sample)
    ridge_returns += out_returns.T.tolist()

In [1255]:
# compute ridge portfolio variance
ridge_variance = np.var(ridge_returns)
# compute RP VaR at (1-var_thresh)% level
ridge_var = np.percentile(ridge_returns, 100-var_thresh)
# compute RP Sharpe ratio
ridge_sharpe = np.mean( ridge_returns )/np.sqrt(ridge_variance)
print(np.sqrt(ridge_variance))
print(ridge_var)
print(ridge_sharpe)

0.5621236882440815
-0.8550044096349942
0.15322037096935828
