In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.dummy import DummyRegressor


from ts_utils import OOSR2, modOOSR2

Train/Test split

In [15]:
df = pd.read_csv("../.data/timeseries.csv", index_col="Date")
df.index = pd.to_datetime(df.index)
df = df.round(4)

train = df.iloc[:350]
test = df.iloc[350:]

X_train, y_train = train.drop(["EXCESS_RETURN_T+1"], axis=1).values, train["EXCESS_RETURN_T+1"].values
X_test, y_test = test.drop(["EXCESS_RETURN_T+1"], axis=1).values, test["EXCESS_RETURN_T+1"].values

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((350, 49), (350,), (165, 49), (165,))

Mean model

In [16]:
naive_mean = DummyRegressor(strategy="mean")
naive_mean.fit(X_train, y_train)
modOOSR2(y_test, naive_mean.predict(X_test))

0.023305483729676668

Ridge

In [17]:
from sklearn.linear_model import Ridge

pipeline = Pipeline([
    ('scaler', "passthrough"),
    ('regressor', Ridge())
])

param_grid = {
    'scaler': ["passthrough", MinMaxScaler(), StandardScaler(), MaxAbsScaler()],
    'regressor__alpha': [0.01, 0.1, 1, 10, 25, 50, 100, 250, 500, 1000],  
}

tscv = TimeSeriesSplit(n_splits=3)

grid_search = GridSearchCV(
    pipeline, 
    param_grid,
    scoring={"modOOSR2": make_scorer(modOOSR2)},
    refit="modOOSR2", 
    cv=tscv, 
    n_jobs=-1
    )

grid_search.fit(X_train, y_train)

In [18]:
results = pd.DataFrame(grid_search.cv_results_)
#results

In [19]:
grid_search.best_params_

{'regressor__alpha': 500, 'scaler': MaxAbsScaler()}

In [20]:
modOOSR2(y_test, grid_search.predict(X_test))

0.02852032405562377

In [21]:
OOSR2(y_test, grid_search.predict(X_test), naive_mean.predict(X_test))

0.005339274705729813

In [22]:
best = grid_search.best_estimator_
best.named_steps["regressor"].coef_,best.named_steps["regressor"].intercept_

(array([ 3.95780005e-04, -2.43046238e-04, -3.76588471e-04,  2.05804233e-04,
        -2.28921216e-04, -1.53645650e-03, -3.58547713e-04,  2.66839081e-04,
        -1.33073262e-04, -8.46794897e-04, -7.62822662e-06,  9.64069781e-05,
         6.70854337e-05, -5.24893003e-04, -7.75354059e-05, -8.02863731e-04,
        -6.56670281e-04,  1.52760126e-04, -2.62496348e-04,  2.72926461e-04,
        -7.72714468e-04,  3.60471443e-04,  2.40074935e-04, -6.04204670e-05,
        -4.50873147e-04,  4.11251191e-04, -1.71753420e-04, -3.06606933e-04,
        -2.17436412e-04, -3.56954807e-04, -3.66229849e-04,  1.87489940e-04,
        -3.87058815e-04,  4.94277777e-04,  3.65239797e-04,  3.33854226e-04,
        -7.05530440e-05,  1.92558003e-05,  2.88205228e-04, -1.99257036e-04,
        -2.30144518e-04, -2.47563607e-04,  4.95917976e-05,  1.44484446e-04,
         9.11076695e-05,  8.64038979e-05,  2.64448413e-04,  1.64719357e-04,
        -6.94749047e-05]),
 0.0045175612435021896)