In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.dummy import DummyRegressor
from sklearn.utils.fixes import loguniform

import scipy.stats as stats

from ts_utils import OOSR2, modOOSR2

In [2]:
df = pd.read_csv("../.data/timeseries.csv", index_col="Date")
df.index = pd.to_datetime(df.index)
df = df.round(4)

train = df.iloc[:350]
test = df.iloc[350:]

X_train, y_train = train.drop(["EXCESS_RETURN_T+1"], axis=1).values, train["EXCESS_RETURN_T+1"].values
X_test, y_test = test.drop(["EXCESS_RETURN_T+1"], axis=1).values, test["EXCESS_RETURN_T+1"].values

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((350, 49), (350,), (165, 49), (165,))

In [3]:
naive_mean = DummyRegressor(strategy="mean")
naive_mean.fit(X_train, y_train)
modOOSR2(y_test, naive_mean.predict(X_test))

0.023305483729676668

In [17]:
from sklearn.svm import LinearSVR

pipeline = Pipeline([
    ('scaler', "passthrough"),
    ('regressor', LinearSVR(max_iter=50000))
])

param_grid = {
    'scaler': ["passthrough", MinMaxScaler(), StandardScaler(), MaxAbsScaler()],
    "regressor__C": [i for i in np.logspace(-8, 1, num=500)],
}

tscv = TimeSeriesSplit(n_splits=3)

grid_search = GridSearchCV(
    estimator=pipeline, 
    param_grid = param_grid,
    scoring={"modOOSR2": make_scorer(modOOSR2)},
    refit="modOOSR2", 
    cv=tscv,
    n_jobs=-1,
    )

grid_search.fit(X_train, y_train)

In [18]:
results = pd.DataFrame(grid_search.cv_results_)
#results

In [19]:
grid_search.best_params_

{'regressor__C': 5.855110155867244e-07, 'scaler': StandardScaler()}

In [20]:
modOOSR2(y_test, grid_search.predict(X_test))

0.00015735558279428297

In [21]:
best = grid_search.best_estimator_
best.named_steps["regressor"].coef_, best.named_steps["regressor"].intercept_

(array([ 3.26749011e-06, -6.34872102e-06, -3.83501151e-06, -2.42384270e-06,
        -1.23084893e-05, -1.34682070e-05, -6.55185922e-06, -3.32734195e-06,
         9.12283055e-08, -1.20014560e-05,  3.03524824e-06,  6.43429374e-06,
         1.17315638e-06, -2.48952581e-05,  3.24170540e-06, -1.52889885e-05,
        -1.06255100e-05,  1.71177185e-06, -2.25581882e-06, -5.67648903e-06,
        -1.58027403e-05,  1.67131830e-06,  6.21472067e-06,  1.78432076e-06,
        -5.43206389e-06,  3.00015273e-06, -1.88074563e-06, -2.38836624e-06,
         2.80675190e-06, -9.22932230e-06, -7.10128290e-06,  1.32845204e-06,
         1.08482484e-06,  8.26930999e-06, -6.74204293e-08,  1.00698040e-06,
        -7.95571575e-07,  8.16743481e-06,  1.29211083e-05, -7.61264055e-06,
        -8.66402071e-06, -1.72674674e-05, -1.53856148e-05,  3.77295513e-06,
         3.43239232e-06,  1.18604812e-06, -5.59757827e-06,  3.10642168e-06,
        -1.60457692e-05]),
 array([3.16175948e-05]))