# Cross validation

## Imports

In [1]:
import pandas as pd
from sqlalchemy import create_engine

server = "mssql-82792-0.cloudclusters.net:16272"
username = "user"
password = "RiceOwls1912" 
database = "ghz"
string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database

conn = create_engine(string).connect()


## EXAMPLE: TRAIN FROM 2021-12, PREDICT FOR 2022-01

In [2]:
df = pd.read_sql(
    """
    select ticker, date, agr, bm, idiovol, mom12m, roeq, ret
    from data
    where date in ('2021-12', '2022-01')
    """, 
    conn
)
features = ["agr", "bm", "idiovol", "mom12m", "roeq"]

## TRANSFORM EACH CROSS-SECTION

In [3]:
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(output_distribution="normal")

def qtxs(d):
    x = qt.fit_transform(d)
    return pd.DataFrame(x, columns=d.columns, index=d.index)

df[features + ["ret"]] = df.groupby(
  "date", 
  group_keys=False
)[features + ["ret"]].apply(qtxs)

## FITTING A Gradient Boosting

In [5]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

df = df.dropna()

Xtrain = df[df.date=='2021-12'][features]
ytrain = df[df.date=='2021-12']["ret"]

model = GradientBoostingRegressor(
  max_depth=3,
  learning_rate=0.05,
  random_state=0
)
model.fit(Xtrain, ytrain)

GradientBoostingRegressor(learning_rate=0.05, random_state=0)

In [6]:
param_grid = {
  "max_depth": [3, 4], 
  "learning_rate": [0.05, 0.1]
}

cv = GridSearchCV(
  estimator=GradientBoostingRegressor(),
  param_grid=param_grid,
)

_ = cv.fit(Xtrain, ytrain)
pd.DataFrame(cv.cv_results_).iloc[:, 4:]

Unnamed: 0,param_learning_rate,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.05,3,"{'learning_rate': 0.05, 'max_depth': 3}",0.205606,0.200925,0.128059,0.053506,0.163218,0.150263,0.05595,1
1,0.05,4,"{'learning_rate': 0.05, 'max_depth': 4}",0.19524,0.201872,0.117736,0.016117,0.169546,0.140102,0.068695,2
2,0.1,3,"{'learning_rate': 0.1, 'max_depth': 3}",0.19177,0.189925,0.104906,0.035839,0.148249,0.134138,0.058566,3
3,0.1,4,"{'learning_rate': 0.1, 'max_depth': 4}",0.15958,0.170066,0.085479,-0.006736,0.16214,0.114106,0.067718,4
