## Imports

In [1]:
#List of imports

import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from hmmlearn import hmm

import urllib.request
import zipfile
import cvxpy as cp
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
import itertools
import random

random.seed(50)

## Additions below
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler

In [2]:
from tqdm.notebook import tqdm
import warnings; warnings.simplefilter('ignore')

# Main Final

In [None]:
##Main


#Set up Data
price_data = pd.read_csv("../Data/small_universe.csv", index_col='Date').fillna(method="bfill")
rf = (1+price_data["10 YR"])**(1/12)-1
universe=['ETN', 'AME', 'AAL', 'CHRW', 'ABT', 'AMGN', 'VRTX', 'ALGN', 'AAPL', 'ADBE', 'AMD', 'ADS', 'T', 'CMCSA', 'ATVI', 'CHTR', 'AMZN', 'F', 'DG', 'CMG', 'DUK', 'LNT', 'AES', 'FE', 'C', 'BAC', 'CBOE', 'RE', 'APD', 'FMC', 'BLL', 'CF', 'DRE', 'BXP', 'EQIX', 'CCI', 'CL', 'KMB', 'KR', 'STZ', 'CVX', 'XOM', 'COG', 'APA']
data = Data(price_data, rf,universe,"FF")

#Set up Portfolio
port=Portfolio(data)

#Set up Variables
num_stocks=data.get_num_stocks()
start_date = "2007-12-31"
end_date = "2020-11-26"
#1 year lookback
lookback = 12 
lookahead = 1

high_risk_ret=0.18
medium_risk_ret=0.12
low_risk_ret= 0.08

lam = (1+medium_risk_ret)**(1/12)-1
trans_coeff = 0.0001
holding_coeff = 0.0001
conf_level = 0


#Set up constraints
constr_list = ["asset_limit"]
constr_model = Constraints(constr_list)

#Set up cost models
cost_model = Costs(trans_coeff, holding_coeff)
cost_model.replicate_cost_coeff(num_stocks, lookahead)

opt_model = Model(lookahead,lam)
risk_model = Risks("MVO", "ellip", conf_level)

regress_weighting = [0,0,0.25,0.75]
factor_model = FactorModel(lookahead, lookback, regress_weighting)

back_test = Backtest(start_date, end_date)
back_test.run(data, port, factor_model, opt_model, constr_model, cost_model, risk_model)


Parameter OutputFlag unchanged
   Value: 1  Min: 0  Max: 1  Default: 1
Changed value of parameter QCPDual to 1
   Prev: 0  Min: 0  Max: 1  Default: 0
Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 403 rows, 178 columns and 878 nonzeros
Model fingerprint: 0xafd54e02
Model has 861 quadratic objective terms
Coefficient statistics:
  Matrix range     [1e-04, 1e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e-09, 1e-01]
  Bounds range     [0e+00, 0e+00]
  RHS range        [9e-03, 1e+00]
Presolve removed 269 rows and 45 columns
Presolve time: 0.01s
Presolved: 134 rows, 133 columns, 439 nonzeros
Presolved model has 861 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 Free vars  : 40
 AA' NZ     : 3.837e+03
 Factor NZ  : 4.257e+03
 Factor Ops : 1.415e+05 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual


  18   1.03292210e-05  3.20666131e-06  1.87e-10 4.60e-13  4.49e-09     0s
  19   9.43148747e-06  3.99722426e-06  6.51e-11 4.41e-13  3.42e-09     0s
  20   8.70853982e-06  4.58543792e-06  7.05e-11 4.14e-13  2.60e-09     0s
  21   8.08085129e-06  5.02145916e-06  7.70e-11 3.51e-13  1.93e-09     0s
  22   7.52500015e-06  5.48509255e-06  6.56e-11 3.20e-13  1.29e-09     0s
  23   7.22423843e-06  5.72676211e-06  4.93e-11 3.88e-13  9.44e-10     0s
  24   6.93085588e-06  5.96724115e-06  3.51e-11 3.06e-13  6.07e-10     0s
  25   6.71340939e-06  6.15111542e-06  7.17e-12 1.85e-13  3.54e-10     0s
  26   6.65215957e-06  6.20673968e-06  1.95e-12 1.35e-13  2.81e-10     0s
  27   6.57400090e-06  6.23459044e-06  1.62e-12 1.58e-13  2.14e-10     0s
  28   6.56830475e-06  6.23907581e-06  9.66e-13 1.50e-13  2.07e-10     0s
  29   6.45529816e-06  6.35669941e-06  8.07e-12 9.24e-14  6.21e-11     0s
  30   6.40341840e-06  6.39564138e-06  1.87e-12 6.65e-14  4.90e-12     0s

Barrier solved model in 30 iterations

  23   1.06863477e-05  2.16209037e-06  1.59e-10 4.21e-13  2.28e-09     0s
  24   9.85609317e-06  3.37344709e-06  1.10e-10 3.47e-13  1.73e-09     0s
  25   9.17103413e-06  4.37429173e-06  2.13e-10 3.12e-13  1.28e-09     0s
  26   8.81619551e-06  4.77236280e-06  1.95e-10 3.33e-13  1.08e-09     0s
  27   8.41522102e-06  5.18779428e-06  1.54e-10 3.22e-13  8.62e-10     0s
  28   8.06033374e-06  5.45157001e-06  1.05e-10 3.03e-13  6.97e-10     0s
  29   7.85693995e-06  5.65158678e-06  8.85e-11 2.73e-13  5.89e-10     0s
  30   7.76112225e-06  5.76396706e-06  7.57e-11 2.47e-13  5.33e-10     0s
  31   7.47994745e-06  5.98233661e-06  9.09e-11 2.04e-13  4.00e-10     0s
  32   7.41246872e-06  6.03584592e-06  8.67e-11 1.87e-13  3.68e-10     0s
  33   7.16932489e-06  6.23898023e-06  8.87e-11 1.68e-13  2.48e-10     0s
  34   7.08567536e-06  6.34599179e-06  5.51e-11 1.59e-13  1.98e-10     0s
  35   6.95088694e-06  6.46406658e-06  2.75e-11 1.99e-13  1.30e-10     0s
  36   6.87805013e-06  6.47976163e-06 

   9   1.95475947e-02 -1.09860955e+02  2.11e-07 4.24e-09  1.62e-02     0s
  10   1.95070075e-02 -1.91762445e+00  9.23e-09 7.48e-11  2.86e-04     0s
  11   1.69782334e-02 -1.23092994e+00  2.79e-09 2.26e-11  1.84e-04     0s
  12   1.39393960e-02 -3.37200765e-01  1.05e-09 2.67e-12  5.18e-05     0s
  13   9.46316661e-03 -1.85173154e-01  4.75e-10 4.51e-17  2.87e-05     0s
  14   6.30725355e-03 -8.88031683e-02  1.93e-10 2.01e-17  1.40e-05     0s
  15   3.96372176e-03 -6.98931239e-02  2.19e-10 3.47e-18  1.09e-05     0s
  16   1.53908083e-03 -1.59177821e-02  6.10e-10 6.94e-18  2.57e-06     0s
  17   4.06095923e-04 -3.07091980e-03  6.40e-10 3.47e-18  5.13e-07     0s
  18   1.06511142e-04 -3.47799361e-04  9.25e-10 1.14e-18  6.70e-08     0s
  19   5.31143629e-05 -1.21919938e-04  2.68e-10 4.16e-14  2.58e-08     0s
  20   3.85945301e-05 -7.45415251e-05  1.31e-10 4.98e-14  1.67e-08     0s
  21   3.06548484e-05 -4.93610528e-05  2.38e-11 2.01e-13  1.18e-08     0s
  22   2.47710732e-05 -3.21227480e-05 

Parameter OutputFlag unchanged
   Value: 1  Min: 0  Max: 1  Default: 1
Changed value of parameter QCPDual to 1
   Prev: 0  Min: 0  Max: 1  Default: 0
Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 9611 rows, 4418 columns and 21454 nonzeros
Model fingerprint: 0x0af6f638
Model has 861 quadratic objective terms
Coefficient statistics:
  Matrix range     [4e-05, 1e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e-08, 5e-01]
  Bounds range     [0e+00, 0e+00]
  RHS range        [3e-07, 1e+00]
Presolve removed 3653 rows and 53 columns
Presolve time: 0.02s
Presolved: 5958 rows, 4365 columns, 16623 nonzeros
Presolved model has 861 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 Free vars  : 40
 AA' NZ     : 7.322e+04
 Factor NZ  : 1.069e+05 (roughly 5 MBytes of memory)
 Factor Ops : 2.308e+06 (less than 1 second per iteration)
 Threads    : 1

              

  17   2.69728337e-03 -5.33905193e-02  2.99e-10 6.94e-18  4.32e-06     0s
  18   1.51381512e-03 -2.60544229e-02  6.06e-10 3.47e-18  2.12e-06     0s
  19   8.99296415e-04 -1.31288115e-02  2.69e-10 6.94e-18  1.08e-06     0s
  20   2.95396321e-04 -1.93126541e-03  1.09e-09 8.67e-19  1.71e-07     0s
  21   1.98704048e-04 -9.81695711e-04  5.04e-10 8.67e-19  9.09e-08     0s
  22   1.35091568e-04 -4.85251496e-04  2.23e-10 8.69e-15  4.78e-08     0s
  23   1.01174373e-04 -2.74287483e-04  1.16e-10 2.10e-14  2.89e-08     0s
  24   8.03260545e-05 -1.75168923e-04  1.39e-10 2.99e-14  1.97e-08     0s
  25   6.63590149e-05 -1.07107778e-04  2.23e-10 1.91e-14  1.34e-08     0s
  26   5.97764009e-05 -8.25887014e-05  2.37e-10 3.04e-14  1.10e-08     0s
  27   5.66785964e-05 -7.01110225e-05  2.11e-10 2.71e-14  9.76e-09     0s
  28   5.39551688e-05 -6.05914909e-05  1.90e-10 2.44e-14  8.82e-09     0s
  29   5.23169703e-05 -5.52844818e-05  1.78e-10 2.29e-14  8.28e-09     0s
  30   4.68674343e-05 -4.00612769e-05 

  46   2.11721217e-05  9.66968201e-06  2.72e-10 8.73e-14  7.42e-10     1s
  47   2.10123237e-05  9.86432057e-06  3.42e-10 8.47e-14  7.19e-10     1s
  48   2.01818799e-05  1.08882276e-05  5.92e-10 9.09e-14  6.00e-10     1s
  49   1.97692544e-05  1.13238906e-05  7.64e-10 9.66e-14  5.45e-10     1s
  50   1.90234116e-05  1.21952834e-05  7.91e-11 1.05e-13  4.41e-10     1s
  51   1.83807964e-05  1.29646585e-05  5.19e-10 1.08e-13  3.49e-10     1s
  52   1.77442625e-05  1.35840083e-05  1.17e-10 9.48e-14  2.68e-10     1s
  53   1.75110966e-05  1.39102406e-05  1.11e-10 9.05e-14  2.32e-10     1s
  54   1.73134440e-05  1.40240976e-05  2.31e-10 8.48e-14  2.12e-10     1s
  55   1.71335877e-05  1.41859246e-05  2.21e-10 7.90e-14  1.90e-10     1s
  56   1.68659583e-05  1.44246749e-05  9.01e-11 4.60e-14  1.58e-10     1s
  57   1.63226390e-05  1.48635051e-05  4.14e-10 5.22e-14  9.41e-11     1s
  58   1.61892663e-05  1.50050119e-05  1.61e-10 3.00e-14  7.64e-11     1s
  59   1.58423545e-05  1.51861073e-05 


Barrier solved model in 70 iterations and 0.86 seconds
Optimal objective 1.82431257e-05

Goal returns: 0.009488792934583046
port return raw: 0.010259357038477239
robustness cost: 0.0
risk value: 1.824312573186331e-05
holding cost: 0.0007479771691971121
trans cost: 2.253168622085599e-05
Parameter OutputFlag unchanged
   Value: 1  Min: 0  Max: 1  Default: 1
Changed value of parameter QCPDual to 1
   Prev: 0  Min: 0  Max: 1  Default: 0
Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 18439 rows, 8650 columns and 41246 nonzeros
Model fingerprint: 0x8446fede
Model has 861 quadratic objective terms
Coefficient statistics:
  Matrix range     [2e-07, 1e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [6e-08, 8e-01]
  Bounds range     [0e+00, 0e+00]
  RHS range        [3e-07, 1e+00]
Presolve removed 6401 rows and 57 columns
Presolve time: 0.03s
Presolved: 12038 rows, 8593 columns, 3

   4   8.32319047e+03 -6.77240707e+08  3.38e+03 6.98e+01  1.09e+05     0s
   5   2.52867175e+03 -3.66863107e+08  1.81e+03 3.75e+01  6.45e+04     0s
   6   7.05625342e+02 -2.18417392e+08  9.37e+02 1.94e+01  3.83e+04     0s
   7   1.72098819e+02 -1.36823536e+08  4.48e+02 9.28e+00  2.21e+04     0s
   8   1.64305710e+00 -1.83344724e+07  3.46e+01 7.15e-01  2.03e+03     0s
   9   7.20796688e-02 -4.19804965e+06  8.36e-04 1.73e-05  1.72e+02     0s
  10   7.20665222e-02 -4.21107312e+03  2.05e-07 8.65e-11  1.73e-01     0s
  11   7.20529172e-02 -1.71700897e+01  1.03e-09 2.90e-13  7.08e-04     0s
  12   6.52321791e-02 -7.61339126e+00  1.08e-10 3.08e-14  3.15e-04     0s
  13   4.84644975e-02 -6.19033658e+00  4.88e-15 1.67e-16  2.56e-04     0s
  14   4.18389809e-02 -1.43263091e+00  3.27e-10 4.93e-17  6.06e-05     0s
  15   2.64683323e-02 -9.33063263e-01  6.66e-15 1.46e-17  3.94e-05     0s
  16   1.57020136e-02 -4.13934056e-01  3.29e-09 1.19e-17  1.76e-05     0s
  17   8.33302260e-03 -2.05428556e-01 

  25   2.11416843e-04 -8.11024612e-04  2.46e-10 5.96e-15  3.69e-08     1s
  26   1.82306276e-04 -6.17238792e-04  1.77e-10 4.28e-15  2.88e-08     1s
  27   1.56073945e-04 -4.31154768e-04  4.70e-11 1.68e-14  2.12e-08     1s
  28   1.32291086e-04 -2.98125171e-04  3.65e-11 1.49e-14  1.55e-08     1s
  29   1.11918735e-04 -2.07254443e-04  2.64e-11 1.56e-14  1.15e-08     1s
  30   9.88842847e-05 -1.55438356e-04  8.16e-11 1.98e-14  9.17e-09     1s
  31   9.04514297e-05 -1.23602705e-04  2.87e-10 2.29e-14  7.72e-09     1s
  32   8.33467999e-05 -1.00803850e-04  2.56e-10 1.84e-14  6.64e-09     1s
  33   7.67216139e-05 -8.08620713e-05  1.88e-10 1.42e-14  5.68e-09     1s
  34   7.02449827e-05 -6.42664221e-05  2.51e-10 1.50e-14  4.85e-09     1s
  35   6.37279173e-05 -4.72487033e-05  6.67e-11 1.56e-14  4.00e-09     1s
  36   5.94897283e-05 -3.55331936e-05  1.03e-10 1.25e-14  3.43e-09     1s
  37   5.55447262e-05 -2.75153914e-05  3.67e-11 9.74e-15  2.99e-09     1s
  38   4.85331314e-05 -1.12367440e-05 

  50   3.27077220e-05  7.69497886e-06  6.59e-11 2.77e-14  7.98e-10     1s
  51   3.08867603e-05  1.01902860e-05  2.54e-11 1.46e-14  6.60e-10     1s
  52   2.98292649e-05  1.11204812e-05  3.07e-11 2.77e-14  5.97e-10     1s
  53   2.83325446e-05  1.27779905e-05  1.72e-10 5.53e-14  4.96e-10     1s
  54   2.71419165e-05  1.40384772e-05  3.40e-10 6.84e-14  4.18e-10     1s
  55   2.63041294e-05  1.49564036e-05  1.48e-10 8.31e-14  3.62e-10     1s
  56   2.53302484e-05  1.59921908e-05  5.90e-10 8.99e-14  2.98e-10     1s
  57   2.49813996e-05  1.64285130e-05  6.49e-10 8.62e-14  2.73e-10     2s
  58   2.41596950e-05  1.71589594e-05  2.65e-10 7.97e-14  2.23e-10     2s
  59   2.38006390e-05  1.74391116e-05  5.31e-10 8.09e-14  2.03e-10     2s
  60   2.31177841e-05  1.79343222e-05  3.47e-10 6.94e-14  1.65e-10     2s
  61   2.28987184e-05  1.82601765e-05  8.95e-11 5.04e-14  1.48e-10     2s
  62   2.18418204e-05  1.89680258e-05  7.41e-11 4.05e-14  9.17e-11     2s
  63   2.17306519e-05  1.91027325e-05 

  65   2.96703221e-05  2.22744408e-05  8.19e-10 4.45e-14  2.10e-10     2s
  66   2.89086313e-05  2.28940646e-05  1.28e-09 4.35e-14  1.71e-10     2s
  67   2.86533502e-05  2.32611079e-05  1.15e-09 4.24e-14  1.53e-10     2s
  68   2.85227818e-05  2.33734152e-05  9.97e-10 3.53e-14  1.46e-10     2s
  69   2.79135819e-05  2.37484083e-05  9.58e-10 3.59e-14  1.18e-10     2s
  70   2.67864483e-05  2.43465896e-05  8.63e-10 3.04e-14  6.94e-11     2s
  71   2.65172176e-05  2.46041912e-05  3.00e-10 1.43e-14  5.44e-11     2s
  72   2.55764119e-05  2.50573669e-05  1.88e-09 1.71e-14  1.48e-11     2s
  73   2.53514187e-05  2.51532294e-05  1.18e-11 9.58e-15  5.63e-12     2s
  74   2.53438026e-05  2.51579527e-05  1.27e-10 8.88e-15  5.28e-12     2s
  75   2.52819821e-05  2.51958095e-05  8.73e-10 4.20e-15  2.45e-12     2s
  76   2.52705740e-05  2.52068487e-05  3.33e-11 5.07e-16  1.81e-12     2s
  77   2.52633794e-05  2.52121120e-05  4.48e-11 4.46e-16  1.46e-12     2s
  78   2.52570723e-05  2.52179986e-05 

  75   2.52683865e-05  2.22646879e-05  3.74e-10 1.20e-14  7.66e-11     2s
  76   2.51340151e-05  2.22960019e-05  5.91e-10 1.35e-14  7.24e-11     2s
  77   2.46635141e-05  2.25063214e-05  6.75e-10 1.48e-14  5.50e-11     2s
  78   2.45966638e-05  2.25519555e-05  5.88e-10 1.45e-14  5.21e-11     2s
  79   2.36656073e-05  2.30225897e-05  3.24e-10 1.86e-14  1.64e-11     2s
  80   2.33900655e-05  2.31488079e-05  9.33e-10 9.56e-15  6.15e-12     2s
  81   2.33819637e-05  2.31540571e-05  8.12e-10 8.50e-15  5.81e-12     2s
  82   2.33550388e-05  2.31719688e-05  3.01e-10 6.43e-15  4.67e-12     2s
  83   2.33032215e-05  2.32097344e-05  2.64e-10 2.77e-15  2.38e-12     2s
  84   2.32830809e-05  2.32207046e-05  7.40e-11 1.56e-15  1.59e-12     2s
  85   2.32796827e-05  2.32230456e-05  4.19e-11 1.38e-15  1.44e-12     2s
  86   2.32766455e-05  2.32280084e-05  6.57e-11 1.13e-15  1.24e-12     2s
  87   2.32604077e-05  2.32410277e-05  1.40e-10 7.88e-16  4.94e-13     2s
  88   2.32578436e-05  2.32435123e-05 

  76   3.39963049e-05  3.00673902e-05  3.08e-09 1.59e-14  9.03e-11     2s
  77   3.39406519e-05  3.01114568e-05  3.18e-09 1.57e-14  8.80e-11     2s
  78   3.38461607e-05  3.01923341e-05  2.90e-09 1.46e-14  8.40e-11     2s
  79   3.35920700e-05  3.03703888e-05  2.31e-09 1.19e-14  7.41e-11     2s
  80   3.22805022e-05  3.11391525e-05  2.97e-09 1.66e-14  2.62e-11     2s
  81   3.16988974e-05  3.13786732e-05  3.10e-09 9.98e-15  7.36e-12     2s
  82   3.16939767e-05  3.13830302e-05  2.77e-09 9.63e-15  7.15e-12     3s
  83   3.16725363e-05  3.13964306e-05  1.75e-09 8.44e-15  6.35e-12     3s
  84   3.15737435e-05  3.14496671e-05  1.11e-09 4.22e-15  2.85e-12     3s
  85   3.15707901e-05  3.14513609e-05  1.03e-09 4.10e-15  2.75e-12     3s
  86   3.15611430e-05  3.14619662e-05  1.33e-10 2.96e-16  2.28e-12     3s
  87   3.15443891e-05  3.14724830e-05  3.48e-10 3.46e-16  1.65e-12     3s
  88   3.15301783e-05  3.14866813e-05  3.07e-10 4.71e-16  1.00e-12     3s
  89   3.15174040e-05  3.14964822e-05 

  76   4.00079286e-05  2.80349217e-05  5.63e-11 9.43e-15  2.50e-10     3s
  77   3.94247999e-05  2.87233967e-05  2.65e-10 8.29e-15  2.23e-10     3s
  78   3.90988745e-05  2.88428905e-05  4.86e-10 1.42e-14  2.14e-10     3s
  79   3.79222261e-05  2.95541287e-05  1.80e-09 1.18e-14  1.74e-10     3s
  80   3.71748269e-05  3.03550265e-05  6.35e-10 1.10e-14  1.42e-10     3s
  81   3.65709683e-05  3.08657224e-05  1.07e-09 1.09e-14  1.19e-10     3s
  82   3.62844346e-05  3.10501149e-05  1.37e-09 1.30e-14  1.09e-10     3s
  83   3.57580057e-05  3.14716072e-05  1.28e-09 1.57e-14  8.93e-11     3s
  84   3.55610400e-05  3.14175899e-05  1.03e-09 1.62e-14  8.64e-11     3s
  85   3.51397813e-05  3.17558170e-05  1.75e-09 1.29e-14  7.05e-11     3s
  86   3.40798943e-05  3.23770283e-05  2.36e-09 1.54e-14  3.55e-11     3s
  87   3.32101413e-05  3.28086462e-05  2.69e-09 1.00e-14  8.37e-12     3s
  88   3.32050370e-05  3.28113579e-05  2.71e-09 9.89e-15  8.20e-12     3s
  89   3.31933657e-05  3.28214005e-05 

  70   9.26510386e-05  2.60407434e-05  1.37e-10 2.86e-14  1.26e-09     3s
  71   8.95466116e-05  2.95459827e-05  1.23e-10 2.57e-14  1.14e-09     3s
  72   8.76318512e-05  3.27294807e-05  1.06e-10 2.21e-14  1.04e-09     3s
  73   8.49644720e-05  3.57473537e-05  1.02e-10 1.75e-14  9.34e-10     3s
  74   8.30021336e-05  3.75350695e-05  8.95e-11 1.53e-14  8.63e-10     3s
  75   8.15990645e-05  3.88275855e-05  4.24e-11 1.38e-14  8.12e-10     3s
  76   7.98758666e-05  4.05923211e-05  3.69e-11 1.20e-14  7.46e-10     3s
  77   7.85013284e-05  4.18280274e-05  2.01e-10 8.80e-15  6.96e-10     3s
  78   7.42087551e-05  4.58872136e-05  7.16e-11 8.96e-15  5.38e-10     3s
  79   7.20044715e-05  4.87039436e-05  1.01e-10 9.64e-15  4.42e-10     3s
  80   7.10793045e-05  4.91962321e-05  5.33e-11 8.61e-15  4.15e-10     3s
  81   6.94611771e-05  5.09992501e-05  3.73e-11 7.19e-15  3.50e-10     3s
  82   6.81479389e-05  5.16046178e-05  9.15e-12 5.84e-15  3.14e-10     3s
  83   6.74951848e-05  5.22683289e-05 

<bound method NDFrame.head of Date
2006-11-28    0.003683
2006-11-29    0.003691
2006-11-30    0.003643
2006-12-03    0.003619
2006-12-04    0.003619
                ...   
2020-11-19    0.000714
2020-11-22    0.000714
2020-11-23    0.000714
2020-11-24    0.000730
2020-11-26         NaN
Name: 10 YR, Length: 3524, dtype: float64>

## Class Definitions Data/Portfolio


In [14]:
class Data:
    #Anything Data Related
    def __init__(self, stock_prices, risk_free, universe=None,factor_type='PCA', period='M'):
        #TO-DO: Add initialization of market cap
        
        if not universe:
            universe = stock_prices.columns
            
        if type(universe[0]) == int:
            self.stock_prices = stock_prices.iloc[:,universe]

        else:
            self.stock_prices = stock_prices[universe]
        
        self.risk_free = risk_free
        self.risk_free.index = pd.to_datetime(self.risk_free.index)
        self.risk_free = self.risk_free.resample(period).last()
        self.stock_prices.index= pd.to_datetime(self.stock_prices.index)
        self.stock_returns=self.get_stock_returns(period)
        self.factor_returns= self.get_factor_returns(factor_type)
 
        return
    
    def get_stock_returns(self, period='M'):
        price = self.stock_prices.resample(period).last()

        # Calculate the percent change
        ret_data = price.pct_change()[1:]

        # Convert from series to dataframe
        ret_data = pd.DataFrame(ret_data)

        return ret_data

    def get_factor_returns(self, factor_type='PCA', period='M'):
        if factor_type == 'CAPM':
         
            return self.get_CAPM_returns(period)
        
        elif factor_type == 'FF':
       
            return self.get_FF_returns(period)
            
        elif factor_type == 'Carhart':
           
            return self.get_Carhart_returns(period)
            
        elif factor_type == 'PCA':
           
            return self.get_PCA_returns(period)
        
        else:
            print("Invalid input: Please select one of the following factor types: CAPM, FF, Carhart or PCA.")
        
        return   
    
    def get_FF_returns(self, period='M'):
        ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"    
        # Download the file and save it  
        urllib.request.urlretrieve(ff_url,'fama_french.zip')
        zip_file = zipfile.ZipFile('fama_french.zip', 'r')    
        # Extact the file data
        zip_file.extractall()
        zip_file.close()    
        ff_factors = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows = 3, index_col = 0)   
        # Skip null rows
        ff_row = ff_factors.isnull().any(1).to_numpy().nonzero()[0][0]

        # Read the csv file again with skipped rows
        ff_factors = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows = 3, nrows = ff_row, index_col = 0)

        # Format the date index
        ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')

        # Format dates to end of month
        ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()

        # Resample the data to correct frequency
        ff_factors = ff_factors.resample(period).last()

        # Convert from percent to decimal
        ff_factors = ff_factors.apply(lambda x: x/ 100)

        return ff_factors
    
    def get_CAPM_returns(self, period='M'):
        ff_factors = self.get_FF_returns(period)
        
        # Remove the unnecessary factors
        capm_factors = ff_factors.iloc[:, 0]
        
        return capm_factors
    
    def get_Carhart_returns(self, period='M'):
        ff_factors = self.get_FF_returns(period)

        # Get the momentum factor
        momentum_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_CSV.zip"

        # Download the file and save it  
        urllib.request.urlretrieve(momentum_url,'momentum.zip')
        zip_file = zipfile.ZipFile('momentum.zip', 'r')

        # Extact the file data
        zip_file.extractall()
        zip_file.close()

        momentum_factor = pd.read_csv('F-F_Momentum_Factor.csv', skiprows = 13, index_col = 0)

        # Skip null rows
        row = momentum_factor.isnull().any(1).to_numpy().nonzero()[0][0]

        # Read the csv file again with skipped rows
        momentum_factor = pd.read_csv('F-F_Momentum_Factor.csv', skiprows = 13, nrows = row, index_col = 0)

        # Format the date index
        momentum_factor.index = pd.to_datetime(momentum_factor.index, format= '%Y%m')

        # Format dates to end of month
        momentum_factor.index = momentum_factor.index + pd.offsets.MonthEnd()

         # Resample the data to correct frequency
        momentum_factor = momentum_factor.resample(period).last()

        # Convert from percent to decimal
        momentum_factor = momentum_factor.apply(lambda x: x/ 100)

        # Combine to create the carhart_factors
        carhart_factors = pd.concat([ff_factors, momentum_factor], axis=1).dropna()

        return carhart_factors
    
    def get_PCA_returns(self, period='M'):
        exRets = self.get_stock_returns(period="D")
        num_stocks = len(exRets.columns)
        returns_mat = exRets.to_numpy()
        n_dates = returns_mat.shape[0]
        n_assets = returns_mat.shape[1]
        
        demeaned = (returns_mat - returns_mat.mean(axis=0)).transpose()
        sigma = 1/(n_dates - 1)*np.matmul(demeaned,demeaned.transpose())
        eigval, eigvec = np.linalg.eig(sigma)
        
        principal_components = np.matmul(eigvec.transpose(),demeaned).transpose()
        pca_factors = np.real(principal_components[:,0:100])
        
        pca_df = pd.DataFrame(pca_factors, index = exRets.index, columns = [str(i) for i in range(num_stocks)])
        pca_df = pca_df.resample(period).last()
        
        return pca_df
    
    def get_index_from_date(self, date_index_df, date):
        return date_index_df.index.get_loc(date)
    
    def get_lookback_data(self, date_index_df, date, lookback):
        end_idx= self.get_index_from_date(date_index_df, date)
        return date_index_df.iloc[end_idx-lookback:end_idx]      
    
    def get_num_stocks(self):
        return len(self.stock_returns.columns)

    
class Portfolio:
    #Anything Portfolio related: weights, returns, date-stamped
    def __init__(self, data):       
        num_stocks=data.get_num_stocks()
        self.weights= np.array([[0]*num_stocks + [1]]) # 0 weight on stock
        self.returns= np.array([])
        self.dates= []
        return
        
    def update_weights(self, new_weights):
        
        new_weights = np.expand_dims(new_weights, axis=0)
        self.weights = np.append(self.weights, new_weights, axis=0)
        return
     
    def update_returns(self, new_returns):
        self.returns=np.append(self.returns, new_returns)
        return

    def update_dates(self, new_dates):
       
        self.dates.append(new_dates)
        return
        
    def get_Sharpe(self, data):
        risk_free = data.risk_free
        recent_date = self.dates[-1]
        sigma = np.std(self.returns - np.array(risk_free.loc[self.dates]))
        sharpe_ratio = ((np.prod(1+self.returns)-1) - np.array(risk_free.loc[recent_date]))/sigma
        return sharpe_ratio
        
    def plot(self):
        port_cumu_returns = np.array([x+1 for x in self.returns]).cumprod()
        plt.figure(figsize=(12,6))
        plt.plot(self.dates, port_cumu_returns)
        plt.xticks(rotation=45)
        plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
        plt.xlabel("Date")
        plt.ylabel("Cumulative Return")
        plt.show()
    


## Class Definitions: Cost/Constraints

In [15]:
class Costs:
    def __init__(self, trans_coeff, holding_coeff):
        self.holding_cost = 0
        self.trans_cost = 0
        self.trans_coeff = trans_coeff
        self.holding_coeff = holding_coeff
        return
        
    def replicate_cost_coeff(self, num_stocks, lookahead):
        trans_cost_repl = np.ones((num_stocks,lookahead))
        holding_cost_repl = np.ones((num_stocks, lookahead))
        self.trans_coeff = trans_cost_repl*self.trans_coeff
        self.holding_coeff = holding_cost_repl*self.holding_coeff
        return
    
    def set_holding_cost(self, weights_new):
        self.holding_cost += cp.sum(cp.multiply(self.holding_coeff, cp.neg(weights_new)))
        return
        
    def calc_trans_cost(self, weights_new, weights_old, trans_coeff):
        abs_trade= cp.abs(weights_new-weights_old)
        return cp.sum(cp.multiply(trans_coeff, abs_trade))       
    
    
    def set_trans_cost(self, weights_new, weights_old):
        weights_curr= weights_new[:,0]
        if weights_new.shape[1]>1:         
            weights_future = weights_new[:,1:]
            weights_future_shift = weights_new[:,:-1]
            self.trans_cost = self.calc_trans_cost(weights_future, weights_future_shift, self.trans_coeff[:,1:])

        self.trans_cost += self.calc_trans_cost(weights_curr, weights_old,self.trans_coeff[:,0])
        return


class Constraints:
        #List of all constraints
        def __init__(self, constr_list=['asset_limit_cardinality'], 
                     upper_limit=0.20, lower_limit=-0.20, stock_limit=15):
            self.upper_limit = upper_limit
            self.lower_limit = lower_limit
            self.stock_limit = stock_limit
            self.constr_list = constr_list
            self.value=[]

        
        def set_constraints(self, all_weights, y, cvar=False, gamma=None, z=None, r=None):
            
            # weights is without risk free
            weights=all_weights[:-1,:]
           
            #unity condition
            self.value += [cp.sum(all_weights,axis=0)==1]
            
            #can never be short cash
            self.value += [all_weights[-1,:]>=0]
            
            num_stocks=weights.shape[0]
            
            if cvar:
                self.value += [z >= 0]
                self.value += [z >= -r.T@all_weights-gamma]
            
            if "no_short" in self.constr_list:
                self.value+=  [weights>=0]
                
            if "asset_limit_cardinality" in self.constr_list:
                upper_limit= cp.multiply(self.upper_limit, y)
                lower_limit = cp.multiply(self.lower_limit,y)
                
                #ensure that at least 1 but no more than 2 in each sector
                for i in range(0,num_stocks,4):
                    self.value += [y[i:i+3]>=1]
                   # self.value += [y[i:i+3]<=2]
                    
                self.value += [weights>=lower_limit, weights<=upper_limit]

            elif "asset_limit" in self.constr_list:
                self.value += [weights>=self.lower_limit, weights<=self.upper_limit]
        
            return



    


## Class Definitions Risk Type/ Optimization Models

In [28]:
class Risks:
    def __init__(self, risk_type="MVO", robust_type="ellip", conf_lvl=0):
        #risk value, return adjustment, risk type and confidence level
        self.value=0
        self.return_adj=0
        self.risk_type=risk_type
        self.robust_type=robust_type
        self.conf_lvl=conf_lvl
        return
        
    def set_risk(self, weights, Q, lookahead=1, S=5000, gamma=None, z=None, alpha=None):
        
        portfolio_risk=0
        robustness_cost=0
        num_stocks = weights.shape[1]
        
        if self.risk_type== "MVO":
        
            for i in range(lookahead):
                portfolio_risk += cp.quad_form(weights[:,i], Q[i])        
            self.value = portfolio_risk
            
        elif self.risk_type == "CVAR":
            if not S or not gamma or not z or not alpha:
                print("Missing one of these required inputs for CVaR optimization: S, gamma, z, alpha")
                return
            self.value = gamma + (1/((1-alpha)*S))*cp.sum(z)
        
        if self.robust_type == "rect":
            
            for i in range(lookahead):
                delta = stats.norm.ppf(self.conf_lvl)*np.sqrt(np.diag(Q[i]/num_stocks))
                robustness_cost += delta@cp.abs(weights[:,i])
    
            self.return_adj= robustness_cost
        
        elif self.robust_type == "ellip":
            
            for i in range(lookahead):
         
                penalty = cp.norm(np.sqrt(np.diag(Q[i]/num_stocks))@weights[:,i],2)
                robustness_cost += stats.chi2.ppf(self.conf_lvl, num_stocks)*penalty
                

            self.return_adj = robustness_cost
            
    
        return

    def get_RP_objective(self, weights, args):
        Q = args[0]
        assets_risk_budget = args[1]
        lookahead = args[2]
        cost_model = args[3]
        
        num_stocks = len(assets_risk_budget)

        self.value=0
        # We convert the weights to a matrix
        weights = np.matrix(weights)
        for i in range(lookahead):
            # We calculate the risk of the weights distribution

            portfolio_risk = np.sqrt((weights[0,num_stocks*i:num_stocks*(i+1)] * Q[i] 
                                      * weights[0,num_stocks*i:num_stocks*(i+1)].T))[0, 0]

            # We calculate the contribution of each asset to the risk of the weights
            # distribution
            assets_risk_contribution = np.multiply(weights[0,num_stocks*i:num_stocks*(i+1)].T, Q[i] 
                                                   * weights[0,num_stocks*i:num_stocks*(i+1)].T)/ portfolio_risk

            # We calculate the desired contribution of each asset to the risk of the
            # weights distribution
            assets_risk_target = np.asmatrix(np.multiply(portfolio_risk, assets_risk_budget))

            # Error between the desired contribution and the calculated contribution of
            # each asset
            self.value += np.sum(np.square(assets_risk_contribution - assets_risk_target.T))
            
            # Get the holding costs
            self.value += np.sum(cost_model.holding_coeff[0,0]*weights[0,num_stocks*i:num_stocks*(i+1)])
            
            # Get the transaction costs
            if i < lookahead-1:
                abs_trade = np.abs(weights[0, num_stocks*i:num_stocks*(i+1)]-
                                   weights[0, num_stocks*(i+1):num_stocks*(i+2)])
                self.value += np.sum(cost_model.trans_coeff[0,0]*abs_trade)
            
        # It returns the calculated error
        return self.value 
    
    
    
class Model:
    def __init__(self, look_ahead, goal_ret):
        self.opt_weights = 0
        self.status = None
        self.look_ahead=look_ahead
        self.goal_ret=goal_ret
        return
        
    def Solver(self, port, mu , Q, rf, constr_model, cost_model, risk_model, scen_model=None):
        
        mu_np = np.array(mu)
   
        Q_np = np.array(Q)
 
        num_stocks = port.weights.shape[1]-1
        num_simulations=5000
        
        if risk_model.risk_type =="CVAR":
            self.look_ahead=1
            mu_np = np.array(mu)[0,:]
            mu_np = np.expand_dims(mu_np, axis=0)
        
        
        #Construct optimization problem
        all_weights = cp.Variable((num_stocks+1,self.look_ahead))   
        y = cp.Variable((num_stocks,self.look_ahead), boolean=True)
        z = cp.Variable((num_simulations,1)) 
        g = cp.Variable(1) 
        
        weights_prev= port.weights[-1,:-1]   
        weights=all_weights[:-1,:]

        # Set model parameters
        cost_model.set_trans_cost(weights, weights_prev)
        cost_model.set_holding_cost(weights)    
        constr_model.set_constraints(all_weights, y)
        #scen_model.set_scenarios()
        
        if risk_model.risk_type=="CVAR":
            constr_model.set_constraints(all_weights, y, cvar=True, gamma=g, z=z, r=scen_model.value)
            risk_model.set_risk(weights, Q, S=5000, gamma=g, z=z, alpha=alpha)
        elif risk_model.risk_type=="MVO":

            constr_model.set_constraints(all_weights, y)
            risk_model.set_risk(weights, Q, self.look_ahead)

        # Get portfolio return
        portfolio_return_per_period = mu_np@weights
        rf_return = cp.sum(rf*all_weights[-1,:])
        portfolio_return = cp.trace(portfolio_return_per_period)+rf_return 
        
        #Max return objective
        #objective= cp.Maximize(portfolio_return-risk_model.return_adj)
        
        #Minimize risk objective
        objective= cp.Minimize(risk_model.value)
        constr_model.value+= [portfolio_return - risk_model.return_adj-cost_model.trans_cost -cost_model.holding_cost >=self.goal_ret]
        
        #Construct Problem and Solve
        prob= cp.Problem(objective, constr_model.value)
        result=prob.solve(solver="GUROBI", verbose=True)
        self.status= prob.status

        temp_goal_ret = self.goal_ret
        while self.status!="optimal":
            print("Unsolvable, Reducing Return Target")
            temp_goal_ret= 0.8*temp_goal_ret
            print("Temporary Goal Return is:", temp_goal_ret)
            new_constr= [portfolio_return - risk_model.return_adj-cost_model.trans_cost -cost_model.holding_cost >=temp_goal_ret]
            constr_model.value= constr_model.value[:-1]+ new_constr
            prob= cp.Problem(objective, constr_model.value)
            result=prob.solve(solver="GUROBI")
            self.status= prob.status
            
        
        self.opt_weights=np.array(all_weights.value)[:,0]
        print("Goal returns:", self.goal_ret)
        print("port return raw:",portfolio_return.value)
        print("robustness cost:", risk_model.return_adj.value)
        print("risk value:",risk_model.value.value)
        print("holding cost:",cost_model.holding_cost.value)
        print("trans cost:", cost_model.trans_cost.value)
     
        return self.opt_weights
    


    def risk_parity(self, port, Q, lookahead, risk_model, cost_model):
        TOLERANCE = 1e-7
        Q_np =np.array(Q)
        num_stocks=port.weights.shape[1]-1

        #Construct optimization problem
        init_weights = np.tile(port.weights[-1,:-1],lookahead).astype(float)
        init_rf = port.weights[-1,-1]
        weight_total = 1-init_rf
        
        if np.count_nonzero(init_weights)==0:
            init_weights = np.array([1/num_stocks]*num_stocks*lookahead)
            weight_total=1
            init_rf = 0
        
        # The desired contribution of each asset to the portfolio risk: we want all
        # assets to contribute equally
        assets_risk_budget = [1/num_stocks] * num_stocks

        # Optimisation process of weights
        # Restrictions to consider in the optimisation: only long positions whose
        # sum equals 100%
        constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - weight_total},
                       {'type': 'ineq', 'fun': lambda x: x})

        # Optimisation process in scipy
        optimize_result = minimize(fun=risk_model.get_RP_objective,
                                   x0=init_weights,
                                   args=[Q, assets_risk_budget, lookahead, cost_model],
                                   method='SLSQP',
                                   constraints=constraints,
                                   tol=TOLERANCE,
                                   options={'disp': False, 'maxiter':5000}
                                  )

        # Recover the weights from the optimised object
        weights = np.array(optimize_result.x) 
        
        self.opt_weights = np.concatenate((weights[0:num_stocks], np.array([init_rf])))
        return self.opt_weights        

 

## Class Definitions Backtest/Gridsearch

In [17]:
    
class Livetest:
    def __init__(self, start_date, end_date, period='M'):
        self.rebal_freq = period
        self.start_date = start_date
        self.end_date = end_date        

        return
        
    
    def run(self, data, portfolio):      
        look_back= factor_model.lookback
        look_ahead= factor_model.lookahead
        stock_return= data.stock_returns      
        reb_dates= np.array(data.stock_returns.loc[self.start_date:self.end_date].index)    
        
        
        for t in reb_dates:
            #need some dynamic adjustment here... how to set risk_model confidence level, cost_coefficieints,
            #constraint asset limits, goal_return ,, factor_model, opt_model, constr_model, cost_model, risk_model
            
            goal_return
            
            
            mu, Q = factor_model.get_param_estimate(t, data)
            new_rf_rate=float(data.risk_free.loc[t])            
            
            weights = opt_model.Solver(portfolio, mu , Q, new_rf_rate, self.lookahead,
                                        constr_model, cost_model, risk_model)     
            

#             elif risk_model.risk_type == 'risk-parity':
#                 weights = optimizer.risk_parity(portfolio, Q, self.lookahead, risk_model, cost_model)
                

                
            portfolio.update_dates(t)
            portfolio.update_weights(weights)
            portfolio.update_returns(np.dot(weights[:-1],stock_return.loc[t])+weights[-1]*new_rf_rate)

        return portfolio.get_Sharpe(data)




In [18]:
    
class Backtest:
    def __init__(self, start_date, end_date, period='M'):
        self.rebal_freq = period
        self.start_date = start_date
        self.end_date = end_date        

        return
        
    
    def run(self, data, portfolio, factor_model, opt_model, constr_model, cost_model, risk_model):      
        look_back= factor_model.lookback
        look_ahead= factor_model.lookahead
        stock_return= data.stock_returns      
        reb_dates= np.array(data.stock_returns.loc[self.start_date:self.end_date].index)    

        
        for t in reb_dates:
            
            
            mu, Q = factor_model.get_param_estimate(t, data)
            new_rf_rate=float(data.risk_free.loc[t]) 

            
            weights = opt_model.Solver(portfolio, mu , Q, new_rf_rate, constr_model, cost_model, risk_model)     
            

#             elif risk_model.risk_type == 'risk-parity':
#                 weights = optimizer.risk_parity(portfolio, Q, self.lookahead, risk_model, cost_model)
                

                
            portfolio.update_dates(t)
            portfolio.update_weights(weights)
            portfolio.update_returns(np.dot(weights[:-1],stock_return.loc[t])+weights[-1]*new_rf_rate)

        return portfolio.get_Sharpe(data)


    def grid_search(self, data, portfolio, model, trans_coeff=0.2, hold_coeff=0.2, lam=0.9, conf_level=0.95):

#         # Overall - currently test values are used
#         pot_lookaheads = [1, 3, 6, 12, 60]
#         pot_lookbacks = [2, 3, 6, 12, 60]

#         # Factor Models
#         factor_models = ['CAPM', 'FF', 'Carhart', 'PCA'] # Data
#         regressions = ['linear', 'lasso', 'ridge', 'SVR'] # FactorModel

#         # Constraints
#         cardinalities = ['', 'cardinality']
#         asset_limits = ['asset_limit_cardinality', 'asset_limit']
#         no_shorts = ['', 'no_short']
#         constraints_list = [cardinalities, asset_limits, no_shorts]

#         stock_limits = list(range(5, 501, 5))

#         # Optimization
#         MVO_robustness = ['', 'rectangular', 'elliptical']

        # Overall
        pot_lookaheads = [5]
        pot_lookbacks = [20]

        # Factor Models
        factor_models = ['FF', 'PCA']  # Data
        weights = [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1],[0.25,0.25,0.25,0.25],
                       [0,0.5,0.5,0],[0,0.25,0.25,0.5],[0,0,0.5,0.5]]

        # Constraints
        cardinalities = ['cardinality']
        asset_limits = ['asset_limit_cardinality', 'asset_limit']
        no_shorts = ['no_short']
        constraints_list = [cardinalities, asset_limits, no_shorts]

        stock_limits = list(range(5, 21, 5))
        upper_asset_limits = [1]
        lower_asset_limits = [-1]

        # Optimization
        MVO_robustness = ['ellip']

        # list of sharpe ratios per parameter combination
        sharpe_ratios = []

        # list of parameter combinations corresponding to sharpe ratio
        parameter_combos = []

        for combo in tqdm(list(itertools.product(factor_models, weights, \
                                                 list(itertools.product(*constraints_list)), stock_limits, \
                                                 upper_asset_limits, lower_asset_limits, MVO_robustness))):

            # Store the combination
            curr_combo = {'rebalance_freq': 'M', 'factor_model': combo[0], 'weights': combo[1],
                          'constraints_list': list(combo[2]), 'stock_limit': combo[3], 'upper_asset_limit': combo[4],
                          'lower_asset_limit': combo[5], 'robustness': combo[6]}

            # Initial Setup
            data.set_factor_returns(curr_combo['factor_model'], curr_combo['rebalance_freq'])

            num_stocks = data.get_num_stocks()
            cost_model = Costs(trans_coeff, hold_coeff)

            # Get lookaheads that are multiples of the rebalancing frequency and <= 60 months
            if curr_combo['rebalance_freq'] == 'M':
                first = 1
            else:
                first = int(curr_combo['rebalance_freq'][0])

            lookaheads = list(itertools.compress(pot_lookaheads, [look >= first for look in pot_lookaheads]))
            lookbacks = list(itertools.compress(pot_lookbacks, [look >= first for look in pot_lookbacks]))

            for lookahead in lookaheads:
                curr_combo['lookahead'] = lookahead
                for lookback in lookbacks:
                    curr_combo['lookback'] = lookback

                    # Continue Setup
                    cost_model.replicate_cost_coeff(num_stocks, lookahead)
                    constr_model = Constraints(curr_combo['constraints_list'])

                    risk_model = Risks(curr_combo['robustness'], conf_level)

                    # Run backtest
                    factor = FactorModel(curr_combo['lookahead'], curr_combo['lookback'],
                                         curr_combo['weights'])
                    sharpe = self.run(data, portfolio, factor, model, constr_model, cost_model, risk_model)

                    # Update results
                    sharpe_ratios.append(sharpe)
                    parameter_combos.append(curr_combo)

        return sharpe_ratios, parameter_combos


## Factor Models

In [19]:
class FactorModel:
    def __init__(self, lookahead, lookback, regress_weighting):
        
        """
        lookahead: number of periods in the future to estimate
        lookback: number of periods in the past to use for estimations
        regress_weighting: array of size 4 with weight corresponding to each regression type; adds up to 1; 
        order is linear, lasso, ridge, SVR; in the case where there is one 1 and the rest 0's, there is no ensembling;
        can artifically call LSTM by setting all weights to 0
        """
        self.lookahead = lookahead
        self.lookback = lookback
        self.regress_weighting = regress_weighting
        return
               
    def get_param_estimate(self, rebal_date, data):
               
        if sum(self.regress_weighting) == 0:
            return self.get_mu_LSTM(rebal_date, data)

        elif sum(self.regress_weighting) == 1:
            return self.get_mu_Q_regression(rebal_date, data)
        
        else:
            return "ERROR: This regression weighting is not valid. Please make sure the weights sum to 1. You can also give all zeros for LSTM."
        
    def get_mu_Q_regression(self, rebal_date, data): 
        returns_data = data.stock_returns
        factor_data = data.factor_returns
        lookahead = self.lookahead
        lookback = self.lookback
        regress_weighting = self.regress_weighting
                
        # For keeping track of mu's and Q's from each period
        mu_arr = []
        Q_arr = []

        n_factors = len(factor_data.columns)
        


        returns_data = data.get_lookback_data(returns_data, rebal_date, lookback)
        factor_data = data.get_lookback_data(factor_data, rebal_date, lookback)

        
        
        for i in range(0, lookahead):

            # Calculate the factor covariance matrix

            F = factor_data.loc[:, factor_data.columns != 'Ones'].cov()

            # Calculate the factor expected excess return from historical data using the geometric mean
            factor_data['Ones'] = [1 for i in range(len(factor_data))]
            gmean = stats.gmean(factor_data + 1,axis=0) - 1

            # Set up X and Y to determine alpha and beta
            X = factor_data
            Y = returns_data
            X = X.to_numpy()
            Y = Y.to_numpy()
 
            ### LINEAR REGRESSION
        
            model = LinearRegression().fit(X,Y)
            alpha = model.intercept_
            beta = model.coef_[:,0:n_factors]

            # Calculate the residuals 
            alpha = np.reshape(alpha,(alpha.size,1))
            epsilon = returns_data.to_numpy() - np.matmul(X, np.transpose(np.hstack((beta, alpha))))

            # Calculate the residual variance with "N - p - 1" degrees of freedom
            sigmaEp = np.sum(epsilon**2, axis=0) / (len(returns_data) - n_factors - 1)

            #  Calculate the asset expected excess returns
            mu_linear = model.predict([gmean])[0]

            # Calculate the diagonal matrix of residuals and the asset covariance matrix
            D = np.diag(sigmaEp)

            # Calculate the covariance matrix
            Q_linear = np.matmul(np.matmul(beta,F.to_numpy()),beta.T)+D


            ### LASSO REGRESSION

            model = Lasso().fit(X,Y)
            alpha = model.intercept_
            beta = model.coef_[:,0:n_factors]

            # Calculate the residuals 
            alpha = np.reshape(alpha,(alpha.size,1))
            epsilon = returns_data.to_numpy() - np.matmul(X, np.transpose(np.hstack((beta, alpha))))

            # Calculate the residual variance with "N - p - 1" degrees of freedom
            sigmaEp = np.sum(epsilon**2, axis=0) / (len(returns_data) - n_factors - 1)

            #  Calculate the asset expected excess returns
            mu_lasso = model.predict([gmean])[0]

            # Calculate the diagonal matrix of residuals and the asset covariance matrix
            D = np.diag(sigmaEp)

            # Calculate the covariance matrix
            Q_lasso = np.matmul(np.matmul(beta,F.to_numpy()),beta.T)+D


            ### RIDGE REGRESSION

            model = Ridge().fit(X,Y)
            alpha = model.intercept_
            beta = model.coef_[:,0:n_factors]

            # Calculate the residuals 
            alpha = np.reshape(alpha,(alpha.size,1))
            epsilon = returns_data.to_numpy() - np.matmul(X, np.transpose(np.hstack((beta, alpha))))

            # Calculate the residual variance with "N - p - 1" degrees of freedom
            sigmaEp = np.sum(epsilon**2, axis=0) / (len(returns_data) - n_factors - 1)

            #  Calculate the asset expected excess returns
            mu_ridge = model.predict([gmean])[0]

            # Calculate the diagonal matrix of residuals and the asset covariance matrix
            D = np.diag(sigmaEp)

            # Calculate the covariance matrix
            Q_ridge = np.matmul(np.matmul(beta,F.to_numpy()),beta.T)+D


            ### SUPPORT VECTOR REGRESSION

            model = make_pipeline(StandardScaler(), MultiOutputRegressor(LinearSVR(C=1, dual=False, loss="squared_epsilon_insensitive"))).fit(X, Y)
            beta = np.array([[model.named_steps['multioutputregressor'].estimators_[i].coef_[0:n_factors] for i in range(len(model.named_steps['multioutputregressor'].estimators_))]])[0]
            alpha = np.array([model.named_steps['multioutputregressor'].estimators_[i].intercept_[0] for i in range(len(model.named_steps['multioutputregressor'].estimators_))])

            # Calculate the residuals 
            alpha = np.reshape(alpha,(alpha.size,1))
            epsilon = returns_data.to_numpy() - np.matmul(X, np.transpose(np.hstack((beta, alpha))))

            # Calculate the residual variance with "N - p - 1" degrees of freedom
            sigmaEp = np.sum(epsilon**2, axis=0) / (len(returns_data) - n_factors - 1)

            #  Calculate the asset expected excess returns
            mu_SVR = model.predict([gmean])[0]

            # Calculate the diagonal matrix of residuals and the asset covariance matrix
            D = np.diag(sigmaEp)

            # Calculate the covariance matrix
            Q_SVR = np.matmul(np.matmul(beta,F.to_numpy()),beta.T)+D

        
            # Ensemble the methods
            mu = regress_weighting[0]*mu_linear + regress_weighting[1]*mu_lasso + regress_weighting[2]*mu_ridge + regress_weighting[3]*mu_SVR
            Q = regress_weighting[0]*Q_linear + regress_weighting[1]*Q_lasso + regress_weighting[2]*Q_ridge + regress_weighting[3]*Q_SVR

            # Add mu and Q to array
            mu_arr.append(mu)
            Q_arr.append(Q)

            # Update for next time step
            factor_data = factor_data[1:]
            factor_append = pd.Series(gmean, index = factor_data.columns)
            factor_data = factor_data.append(factor_append, ignore_index=True)

            returns_data = returns_data[1:]
            mu_append = pd.Series(mu, index=returns_data.columns)
            returns_data = returns_data.append(mu_append, ignore_index=True)   

        return mu_arr, Q_arr
        
    def get_mu_LSTM(self, rebal_date, data): 
        returns_data = data.stock_returns
        factor_data = data.factor_returns
        
        lookahead = self.lookahead
        lookback = self.lookback
        regress_weighting = self.regress_weighting

        returns_data = data.get_lookback_data(returns_data, rebal_date, lookback)
        factor_data = data.get_lookback_data(factor_data, rebal_date, lookback)
        
        tempx, tempy = self.generate_X_y(factor_data.values, returns_data.values, lookback, lookahead)
        train_x, test_x, train_y, test_y = self.traintest_split(tempx, tempy)

        # scale inputs
        scaled_train_x = (train_x - train_x.min())/(train_x.max() - train_x.min())
        scaled_test_x = (test_x - test_x.min())/(test_x.max() - test_x.min())
        scaled_train_y = (train_y - train_y.min())/(train_y.max() - train_y.min())
        scaled_test_y = (test_y - test_y.min())/(test_y.max() - test_y.min())

        mu = self.get_prediction(train_x, train_y, factor_data, lookback)
        return mu
    
    def generate_X_y(self, factor_data, returns_data, n_lookback, n_lookforward):
        X, y = list(), list()
        in_start = 0
        for i in range(len(factor_data)):
            in_end = in_start + n_lookback
            out_end = in_end + n_lookforward
            # ensure we have enough data for this instance
            if out_end <= len(factor_data):
                X.append(factor_data[in_start:in_end,:])
                y.append(returns_data[in_end:out_end,:])
            in_start += 1
        return np.array(X), np.array(y)
    
    def traintest_split(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        return X_train, X_test, y_train, y_test   
    
    def build_model(self, train_x, train_y):
        # define parameters
        verbose, epochs, batch_size = 0, 50, 16
        n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]

        # define model
        model = Sequential()
        model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))
        model.add(RepeatVector(n_outputs))
        model.add(LSTM(200, activation='relu', return_sequences=True))
        model.add(TimeDistributed(Dense(100, activation='relu')))
        model.add(TimeDistributed(Dense(train_y.shape[2])))
        model.compile(loss='mse', optimizer='adam')
        # fit network
        model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
        return model
    
    def forecast(self, model, history, n_lookback):
        # flatten data
        data = np.array(history)
        data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
        # retrieve last observations for lookback data
        input_x = data[-n_lookback:, :]
        # reshape into [1, n_lookback, n]
        input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
        # forecast the next set
        yhat = model.predict(input_x, verbose=0)
        # we only want the vector forecast
        yhat = yhat[0]
        return yhat

    def evaluate_forecasts(self, actual, predicted):
        # calculate overall RMSE
        s = 0
        for row in range(actual.shape[0]):
            for col in range(actual.shape[1]):
                for k in range(actual.shape[2]):
                    s += (actual[row, col, k] - predicted[row, col, k])**2
        score = sqrt(s / (actual.shape[0] * actual.shape[1] * actual.shape[2]))
        return score

    def evaluate_model(self, train_x, train_y, test_x, test_y, n_lookback):
        # fit model
        model = self.build_model(train_x, train_y)
        history = [x for x in train_x]
        # walk-forward validation 
        predictions = list()
        for i in range(len(test_x)):
            yhat_sequence = self.forecast(model, history, n_lookback)
            # store the predictions
            predictions.append(yhat_sequence)
            # get real observation and add to history for predicting the next set
            history.append(test_x[i, :])
        # evaluate predictions 
        predictions = np.array(predictions)
        score = self.evaluate_forecasts(test_y, predictions)
        plt.plot(model.history.history['loss'])
        #plt.plot(model.history.history['val_loss'])
        return score
    
    def get_prediction(self, train_x, train_y, factor_data, lookback):
        model = self.build_model(train_x, train_y)
        return self.forecast(model, factor_data.tail(lookback), lookback)


### HMM Investigation

In [10]:
    
class Regime:
    def __init__(self, data ,t):
        train_prices=None
        train_returns=None
        train_dates=None
        self.get_train_data(data,t)

    def get_train_data(self, data, t):
        mkt_data = data.factor_returns["Mkt-RF"]+data_set.factor_returns["RF"]
        first_date="2010-01-01"
        mkt_returns=mkt_data[first_date:t]
        self.train_dates=mkt_returns.index

        mkt_returns=np.array(mkt_returns.values)
        mkt_prices = 100*(np.array([x+1 for x in mkt_returns]).cumprod())
        mkt_prices=np.expand_dims(mkt_prices,axis=1)
        mkt_returns=np.expand_dims(mkt_returns,axis=1)
        self.train_prices=mkt_prices
        self.train_returns=mkt_returns


    def HMM (self, num_hs):
        model=hmm.GaussianHMM(n_components=num_hs)
        model.fit(self.train_returns)
        return model

In [12]:
t="2019-12-31"
reg=Regime(data_set,t)
reg_model =reg.HMM(2)

NameError: name 'data_set' is not defined

In [13]:
out=reg_model.predict(reg.train_returns)
test=np.array(list(map(bool,out)))

NameError: name 'reg_model' is not defined

In [None]:

        plt.figure(figsize=(12,6))
        plt.scatter(reg.train_dates[test],reg.train_prices[test])
        plt.scatter(reg.train_dates[~test],reg.train_prices[~test])

        plt.xticks(rotation=45)
        #plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
        plt.xlabel("Date")
        plt.ylabel("Cumulative Return")