In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

from panel_utils import *

from sklearn.linear_model import Lasso
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from sklearn.dummy import DummyRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from sklearn.utils.fixes import loguniform

In [None]:
df = pd.read_csv("panel_bigcap.csv", index_col="Date")

In [2]:
#scale_interact_sort(df=df, micro_cols=micro_cols, macro_cols=macro_cols)
%store -r df

In [3]:
train = df.loc[:"2013-12-31"]
test = df.loc["2014-01-31":]

X_train, y_train = train.drop(["EXCESS_RETURN_T+1"], axis=1), train["EXCESS_RETURN_T+1"]
X_test, y_test = test.drop(["EXCESS_RETURN_T+1"], axis=1), test["EXCESS_RETURN_T+1"]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((294327, 55), (294327,), (169245, 55), (169245,))

In [4]:
naive_mean = DummyRegressor(strategy="mean")
naive_mean.fit(X_train.values, y_train.values)
modOOSR2(y_test.values, naive_mean.predict(X_test.values))

0.0003301052587154629

In [5]:
pipeline = Pipeline([
    ('scaler', "passthrough"),
    ('regressor', Lasso())
])

param_dist = {
    'scaler': ["passthrough"],
    'regressor__alpha': loguniform(0.001,10)
}

grid_search = RandomizedSearchCV(
    estimator=pipeline, 
    param_distributions= param_dist,
    scoring={"modOOSR2": make_scorer(modOOSR2)},
    refit="modOOSR2", 
    cv=holdout_cv(X_train, n_test = 3),
    n_iter=1000, 
    n_jobs=-1,
    pre_dispatch =8,
    error_score= "raise"
    )

grid_search.fit(X_train.values, y_train.values)

RandomizedSearchCV(cv=<generator object holdout_cv at 0x7fec5b2f46d0>,
                   error_score='raise',
                   estimator=Pipeline(steps=[('scaler', 'passthrough'),
                                             ('regressor', Lasso())]),
                   n_iter=1000, n_jobs=-1,
                   param_distributions={'regressor__alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fec5aa98400>,
                                        'scaler': ['passthrough']},
                   pre_dispatch=8, refit='modOOSR2',
                   scoring={'modOOSR2': make_scorer(modOOSR2)})

In [6]:
results = pd.DataFrame(grid_search.cv_results_)
results[results["rank_test_modOOSR2"] <= 5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_regressor__alpha,param_scaler,params,split0_test_modOOSR2,mean_test_modOOSR2,std_test_modOOSR2,rank_test_modOOSR2
97,0.392498,0.0,0.00587,0.0,0.00126,passthrough,"{'regressor__alpha': 0.001260283352939505, 'sc...",0.02829,0.02829,0.0,2
147,0.438482,0.0,0.004391,0.0,0.001228,passthrough,"{'regressor__alpha': 0.0012275547771673555, 's...",0.028234,0.028234,0.0,5
468,0.399959,0.0,0.004238,0.0,0.001273,passthrough,"{'regressor__alpha': 0.0012725100667238176, 's...",0.028256,0.028256,0.0,3
635,0.412332,0.0,0.004291,0.0,0.001257,passthrough,"{'regressor__alpha': 0.0012571541519866812, 's...",0.028299,0.028299,0.0,1
836,0.404941,0.0,0.004388,0.0,0.00128,passthrough,"{'regressor__alpha': 0.0012796316230731066, 's...",0.028236,0.028236,0.0,4


In [8]:
grid_search.best_estimator_

Pipeline(steps=[('scaler', 'passthrough'),
                ('regressor', Lasso(alpha=0.0012571541519866812))])

In [9]:
modOOSR2(y_test.values, grid_search.predict(X_test.values))

0.00030517914808025903

In [10]:
OOSR2(y_test.values, grid_search.predict(X_test.values), naive_mean.predict(X_test.values))

-2.4934341592564024e-05

In [11]:
best = grid_search.best_estimator_
best.named_steps["regressor"].intercept_

0.01762033852393134

In [12]:
best.named_steps["regressor"].coef_

array([-0.        ,  0.        , -0.        , -0.        , -0.        ,
       -0.00843082, -0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        ,  0.        ,  0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ])