In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

from panel_utils import *

from sklearn.linear_model import Ridge
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from sklearn.dummy import DummyRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.utils.fixes import loguniform

In [None]:
df = pd.read_csv("panel_bigcap.csv", index_col="Date")

In [2]:
#scale_interact_sort(df=df, micro_cols=micro_cols, macro_cols=macro_cols)
%store -r df

In [3]:
train = df.loc[:"2013-12-31"]
test = df.loc["2014-01-31":]

X_train, y_train = train.drop(["EXCESS_RETURN_T+1"], axis=1), train["EXCESS_RETURN_T+1"]
X_test, y_test = test.drop(["EXCESS_RETURN_T+1"], axis=1), test["EXCESS_RETURN_T+1"]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((294327, 55), (294327,), (169245, 55), (169245,))

In [4]:
naive_mean = DummyRegressor(strategy="mean")
naive_mean.fit(X_train.values, y_train.values)
modOOSR2(y_test.values, naive_mean.predict(X_test.values))

0.0003301052587154629

In [5]:
pipeline = Pipeline([
    ('scaler', "passthrough"),
    ('regressor', Ridge())
])

param_dist = {
    'scaler': ["passthrough"],
    'regressor__alpha': loguniform(0.001,100)
}

grid_search = RandomizedSearchCV(
    estimator=pipeline, 
    param_distributions= param_dist,
    scoring={"modOOSR2": make_scorer(modOOSR2)},
    refit="modOOSR2", 
    cv=holdout_cv(X_train, n_test = 3),
    n_iter=1000, 
    n_jobs=-1,
    pre_dispatch =8,
    error_score= "raise"
    )

grid_search.fit(X_train.values, y_train.values)

RandomizedSearchCV(cv=<generator object holdout_cv at 0x7f8cf9def970>,
                   error_score='raise',
                   estimator=Pipeline(steps=[('scaler', 'passthrough'),
                                             ('regressor', Ridge())]),
                   n_iter=1000, n_jobs=-1,
                   param_distributions={'regressor__alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f8cf95a43d0>,
                                        'scaler': ['passthrough']},
                   pre_dispatch=8, refit='modOOSR2',
                   scoring={'modOOSR2': make_scorer(modOOSR2)})

In [6]:
results = pd.DataFrame(grid_search.cv_results_)
results[results["rank_test_modOOSR2"]<=5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_regressor__alpha,param_scaler,params,split0_test_modOOSR2,mean_test_modOOSR2,std_test_modOOSR2,rank_test_modOOSR2
255,0.270127,0.0,0.003974,0.0,9.686466,passthrough,"{'regressor__alpha': 9.686465837231866, 'scale...",-0.008064,-0.008064,0.0,3
473,0.237075,0.0,0.004199,0.0,9.760669,passthrough,"{'regressor__alpha': 9.760669065305775, 'scale...",-0.008055,-0.008055,0.0,2
633,0.255431,0.0,0.004009,0.0,9.790733,passthrough,"{'regressor__alpha': 9.790732993906083, 'scale...",-0.008051,-0.008051,0.0,1
686,0.290803,0.0,0.004163,0.0,9.429633,passthrough,"{'regressor__alpha': 9.429632919222655, 'scale...",-0.008097,-0.008097,0.0,4
879,0.255438,0.0,0.004348,0.0,9.349814,passthrough,"{'regressor__alpha': 9.349814178333366, 'scale...",-0.008108,-0.008108,0.0,5


In [7]:
grid_search.best_estimator_

Pipeline(steps=[('scaler', 'passthrough'),
                ('regressor', Ridge(alpha=9.790732993906083))])

In [8]:
modOOSR2(y_test.values, grid_search.predict(X_test.values))

-0.0010259885276366898

In [9]:
OOSR2(y_test.values, grid_search.predict(X_test.values), naive_mean.predict(X_test.values))

-0.0013565415878640241

In [10]:
best = grid_search.best_estimator_
best.named_steps["regressor"].coef_, best.named_steps["regressor"].intercept_

(array([ 0.04229688,  0.00737443, -0.02569963, -0.06532691, -0.09150883,
         0.0092389 ,  0.02946033,  0.07038718,  0.00953123, -0.01744176,
         0.01651191, -0.00316441,  0.01170391, -0.01378238,  0.03353376,
        -0.02798889,  0.02487381, -0.00564852, -0.00802534, -0.01400848,
        -0.00274496, -0.01192489,  0.00226813, -0.06102902, -0.01373055,
         0.00063435, -0.00516124, -0.00119801,  0.00897016, -0.07664896,
         0.06922512,  0.01257476,  0.02007778, -0.0238734 , -0.08841392,
         0.06576711, -0.08738049, -0.08051517, -0.03437921,  0.16742652,
         0.04232758,  0.12711123,  0.12774162, -0.0112015 , -0.08597463,
        -0.13860629,  0.01474063, -0.0434012 ,  0.07819417,  0.01560654,
         0.03260628,  0.02312108,  0.03828209, -0.0215217 , -0.13160893]),
 0.10570958988764538)