In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.dummy import DummyRegressor

from ts_utils import OOSR2, modOOSR2

Train/Test split

In [2]:
df = pd.read_csv("../.data/timeseries.csv", index_col="Date")
df.index = pd.to_datetime(df.index)
df = df.round(4)

train = df.iloc[:350]
test = df.iloc[350:]

X_train, y_train = train.drop(["EXCESS_RETURN_T+1"], axis=1).values, train["EXCESS_RETURN_T+1"].values
X_test, y_test = test.drop(["EXCESS_RETURN_T+1"], axis=1).values, test["EXCESS_RETURN_T+1"].values

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((350, 49), (350,), (165, 49), (165,))

Mean model

In [3]:
naive_mean = DummyRegressor(strategy="mean")
naive_mean.fit(X_train, y_train)
modOOSR2(y_test, naive_mean.predict(X_test))

0.023305483729676668

Elastic Net

In [4]:
from sklearn.linear_model import ElasticNet

pipeline = Pipeline([
    ('scaler', "passthrough"),
    ('regressor', ElasticNet())
])

param_grid = {
    'scaler': ["passthrough", MinMaxScaler(), StandardScaler(), MaxAbsScaler()],
    'regressor__alpha': [0.01, 0.1, 1, 10, 25, 50, 100, 250, 500, 1000],
    "regressor__l1_ratio":[i for i in np.linspace(0.01,1,100)]  
}

tscv = TimeSeriesSplit(n_splits=3)

grid_search = GridSearchCV(
    pipeline, 
    param_grid,
    scoring={"modOOSR2": make_scorer(modOOSR2)},
    refit="modOOSR2", 
    cv=tscv, 
    n_jobs=-1
    )

grid_search.fit(X_train, y_train)

In [5]:
results = pd.DataFrame(grid_search.cv_results_)
#results

In [6]:
grid_search.best_params_

{'regressor__alpha': 0.01,
 'regressor__l1_ratio': 0.12,
 'scaler': MaxAbsScaler()}

In [7]:
modOOSR2(y_test, grid_search.predict(X_test))

0.043942995775844484

In [8]:
OOSR2(y_test, grid_search.predict(X_test), naive_mean.predict(X_test))

0.021129955889355956

In [9]:
best = grid_search.best_estimator_
best.named_steps["regressor"].coef_, best.named_steps["regressor"].intercept_

(array([ 0.        , -0.        , -0.        ,  0.        , -0.        ,
        -0.01115464, -0.        ,  0.        , -0.        , -0.        ,
         0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        -0.        , -0.        ,  0.        , -0.        ,  0.        ,
        -0.        ,  0.        ,  0.        , -0.        , -0.        ,
         0.        , -0.        , -0.        , -0.        , -0.        ,
        -0.        ,  0.        , -0.        ,  0.        ,  0.        ,
         0.        , -0.        ,  0.        ,  0.        , -0.        ,
        -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        , -0.        ]),
 0.008840672344829548)