Inital imports

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer

Test/Train split

In [16]:
df = pd.read_csv("data/timeseries.csv", index_col="Date")
df.index = pd.to_datetime(df.index)
df = df.round(4)

In [17]:
train = df.iloc[:350]
test = df.iloc[350:]

In [18]:
X_train, y_train = train.drop(["EXCESS_RETURN_T+1"], axis=1).values, train["EXCESS_RETURN_T+1"].values
X_test, y_test = test.drop(["EXCESS_RETURN_T+1"], axis=1).values, test["EXCESS_RETURN_T+1"].values

In [19]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((350, 49), (350,), (165, 49), (165,))

Modified OOS R2

In [20]:
def modOOSR2(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
    denominator = (y_true**2).sum(axis=0, dtype=np.float64)
    return 1 - (numerator/denominator)

def OOSR2(y_true, y_pred, mean_model_pred):
    numerator = ((y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
    denominator = ((y_true - mean_model_pred)**2).sum(axis=0, dtype=np.float64)
    return 1 - (numerator/denominator)

Naive models

In [22]:
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_absolute_error

naive_mean = DummyRegressor(strategy="mean")
naive_const = DummyRegressor(strategy="constant", constant=0)

naive_mean.fit(X_train, y_train)
naive_const.fit(X_train, y_train)

res1 = mean_absolute_error(y_test, naive_mean.predict(X_test))
res2 = mean_absolute_error(y_test, naive_const.predict(X_test))
res3 = modOOSR2(y_test, naive_mean.predict(X_test))

res1, res2, res3

(0.033644436363636364, 0.03438000000000001, 0.023305483729676668)

Simple and penalized linear

In [None]:
from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler


pipeline = Pipeline([
    ('scaler', MaxAbsScaler()),
    ('regressor', Ridge())
])

param_grid = {
    #'scaler': [MinMaxScaler((-1,1)), StandardScaler()],
    'regressor__alpha': [0.1, 1, 10, 20, 30, 40, 50],  
    #"scaler__feature_range': [(0, 1),(0, 5)],
    #'regressor__l1_ratio':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
}

tscv = TimeSeriesSplit(n_splits=3)

grid_search = GridSearchCV(
    pipeline, 
    param_grid,
    scoring={"modOOSR2": make_scorer(modR2)},
    refit="modOOSR2", 
    cv=tscv, 
    n_jobs=-1
    )

grid_search.fit(X_train, y_train)


In [None]:
modR2(y_test, grid_search.predict(X_test))

In [None]:
results = pd.DataFrame(grid_search.cv_results_)
results

In [None]:
pipeline.get_params()

In [None]:
grid_search.best_estimator_

In [None]:
grid_search.best_params_

In [None]:
grid_search.best_score_