## Modeling the h1n1 and seasonal flu shots
I have 2 target features
1. h1n1 vaccination
2. seasonal vaccination

For now, both target variables will be predicted by the same underlying X features.
As most of my features are categorical / binary variables, my goal is to build 4 models, then stack them
1. CatBoost on original (not one hot encoded) dataset
2. LightGBM on original (not one hot encoded) dataset by setting categorical features
3. xgBoost on one hot encoded dataset
4. LightGbm / CatBoost / sklearn GBM on one-hot encoded dataset (whichever is best)

### #1: CatBoost

In [97]:
import pandas as pd
import numpy as np

from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier, Dataset
from catboost import CatBoostClassifier

import time
import pickle
import warnings
warnings.filterwarnings('ignore')

def evaluate_model(model_name, model, X, y):
    
    predictions_probas = model.predict_proba(X)[:,1]

    AUC = roc_auc_score(y, predictions_probas)
    LogLoss = log_loss(y, predictions_probas)

    print('AUC for', model_name, ': %1.4f' % AUC)
    print('LogLoss for', model_name, ': %1.3f' % LogLoss)

    metrics_table = pd.DataFrame({'AUC' : [round(AUC, 4)], 'LogLoss' : [round(LogLoss, 3)]}, index = [model_name])
    
    return metrics_table

#### H1N1

In [48]:
h1n1_cat = pd.read_csv('../../data/h1n1_catboost.csv')

X_catboost = h1n1_cat.drop(['h1n1_vaccine'], 1)
y_catboost = h1n1_cat['h1n1_vaccine'].copy()

print('Original shape:', h1n1_cat.shape)
print('X shape:', X_catboost.shape)
print('y shape:', y_catboost.shape)

Original shape: (26707, 34)
X shape: (26707, 33)
y shape: (26707,)


In [49]:
def catboost_prepare(X):
    
    for col in X.dtypes[X.dtypes == 'float64'].index.tolist():
        X[col] = X[col].astype(pd.Int64Dtype()).astype('O')

    for col in X.columns.tolist():
        X[col] = X[col].fillna('None')
        
    return X

In [50]:
X_catboost = catboost_prepare(X_catboost)

In [51]:
X_cat_train, X_cat_test, y_cat_train, y_cat_test = train_test_split(X_catboost, y_catboost, test_size = 0.2, random_state = 20202020)

In [52]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

cat_params = {'learning_rate': [0.1],
              'l2_leaf_reg': [0.5],
              'subsample': [0.75],
              'rsm' : [2/3],
              'max_depth': [9], # up to 16 (8 on gpu)
              'grow_policy': ['Lossguide'],
              'min_data_in_leaf' : [23], 
              'max_leaves' : [23],
              'iterations' : [100]} 

cat = CatBoostClassifier(random_state = 20202020, verbose = 0,
                         eval_metric = 'AUC:hints=skip_train~false', objective = 'Logloss',
                         cat_features = X_catboost.columns.tolist())

GRID_cat = GridSearchCV(cat, param_grid = cat_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

GRID_cat.fit(X_cat_train, y_cat_train)

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 17:44:46 2021
Ended at Fri Feb  5 17:45:44 2021
0.9783220132191975 minutes


In [53]:
evaluate_model('CatBoost h1n1', GRID_cat.best_estimator_, X_cat_test, y_cat_test)

AUC for CatBoost h1n1 : 0.8674
LogLoss for CatBoost h1n1 : 0.345


Unnamed: 0,AUC,LogLoss
CatBoost h1n1,0.8674,0.345


In [54]:
model_filename = 'models/h1n1_catboost.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_cat.best_estimator_, file)

#### Seasonal

In [55]:
seasonal_cat = pd.read_csv('../../data/seasonal_catboost.csv')

X_catboost = seasonal_cat.drop(['seasonal_vaccine'], 1)
y_catboost = seasonal_cat['seasonal_vaccine'].copy()

print('Original shape:', seasonal_cat.shape)
print('X shape:', X_catboost.shape)
print('y shape:', y_catboost.shape)

Original shape: (26707, 34)
X shape: (26707, 33)
y shape: (26707,)


In [56]:
X_catboost = catboost_prepare(X_catboost)

In [57]:
X_cat_train, X_cat_test, y_cat_train, y_cat_test = train_test_split(X_catboost, y_catboost, test_size = 0.2, random_state = 20202020)

In [58]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

cat_params = {'learning_rate': [0.1],
              'l2_leaf_reg': [0.5, 1],
              'subsample': [0.75],
              'rsm' : [2/3],
              'max_depth': [9], # up to 16 (8 on gpu)
              'grow_policy': ['Lossguide'],
              'min_data_in_leaf' : [23, 29], 
              'max_leaves' : [17, 23],
              'iterations' : [100]}

cat = CatBoostClassifier(random_state = 20202020, verbose = 0,
                         eval_metric = 'AUC:hints=skip_train~false', objective = 'Logloss',
                         cat_features = X_catboost.columns.tolist())

GRID_cat_seasonal = GridSearchCV(cat, param_grid = cat_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

GRID_cat_seasonal.fit(X_cat_train, y_cat_train)

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 17:46:03 2021
Ended at Fri Feb  5 17:47:09 2021
1.090047331651052 minutes


In [59]:
evaluate_model('CatBoost seasonal', GRID_cat_seasonal.best_estimator_, X_cat_test, y_cat_test)

AUC for CatBoost seasonal : 0.8610
LogLoss for CatBoost seasonal : 0.465


Unnamed: 0,AUC,LogLoss
CatBoost seasonal,0.861,0.465


In [60]:
model_filename = 'models/seasonal_catboost.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_cat_seasonal.best_estimator_, file)

#### Make test predictions with CatBoost

In [61]:
test = pd.read_csv('../../data/originals/test_catboost.csv')
respondent_id = test['respondent_id'].copy()
test.drop(['respondent_id'], 1, inplace = True)

test = catboost_prepare(test)

In [62]:
submission = pd.DataFrame(respondent_id)
submission['h1n1_vaccine'] = GRID_cat.best_estimator_.predict_proba(test)[:,1]
submission['seasonal_vaccine'] = GRID_cat_seasonal.best_estimator_.predict_proba(test)[:,1]

submission.head()

Unnamed: 0,respondent_id,h1n1_vaccine,seasonal_vaccine
0,26707,0.121296,0.191064
1,26708,0.041142,0.050702
2,26709,0.14776,0.746198
3,26710,0.620204,0.904706
4,26711,0.362429,0.567235


In [63]:
submission.to_csv('../../data/submissions/first_submission_catboost.csv', index = False)

### #2: LightGBM using categorical features

#### H1N1

In [64]:
h1n1_cat = pd.read_csv('../../data/h1n1_catboost.csv')

X_lgb = h1n1_cat.drop(['h1n1_vaccine'], 1)
y_lgb = h1n1_cat['h1n1_vaccine'].copy()

print('Original shape:', h1n1_cat.shape)
print('X shape:', X_lgb.shape)
print('y shape:', y_lgb.shape)

Original shape: (26707, 34)
X shape: (26707, 33)
y shape: (26707,)


In [65]:
def lgb_cat_prepare(X):
    
    for col in X.dtypes[X.dtypes == 'object'].index.tolist():
        X[col] = X[col].fillna('None')

    for col in X.dtypes[X.dtypes == 'float64'].index.tolist():
        X[col] = X[col].fillna(-1).astype(pd.Int64Dtype())

    for col in X.columns.tolist():
        X[col] = X[col].astype('category')
        
    return X

In [66]:
X_lgb = lgb_cat_prepare(X_lgb)

In [67]:
X_lgb_train, X_lgb_test, y_lgb_train, y_lgb_test = train_test_split(X_lgb, y_lgb, test_size = 0.2, random_state = 20202020)

In [68]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

LGB_params = {'boosting_type' : ['dart'],  
              'learning_rate' : [0.05],
              'num_leaves' : [35],
              'min_child_samples' : [1],
              'max_depth' : [25],
              'subsample' : [1],
              'colsample_bytree' : [0.8],
              'n_estimators' : [500]}

LGB = LGBMClassifier(random_state = 20202020, objective = 'binary', metric = 'auc')

GRID_LGB_h1n1 = GridSearchCV(LGB, param_grid = LGB_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

# lgb_categoricals = X_lgb_train.columns.tolist()
GRID_LGB_h1n1.fit(X_lgb_train, y_lgb_train) # categorical_feature = lgb_categoricals

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 17:48:20 2021
Ended at Fri Feb  5 17:49:08 2021
0.7993542154630026 minutes


In [69]:
evaluate_model('LightGBM h1n1 with categoricals', GRID_LGB_h1n1.best_estimator_, X_lgb_test, y_lgb_test)

AUC for LightGBM h1n1 with categoricals : 0.8673
LogLoss for LightGBM h1n1 with categoricals : 0.347


Unnamed: 0,AUC,LogLoss
LightGBM h1n1 with categoricals,0.8673,0.347


In [70]:
model_filename = 'models/h1n1_lighgbm_categoricals.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_LGB_h1n1.best_estimator_, file)

#### Seasonal

In [71]:
seasonal_cat = pd.read_csv('../../data/seasonal_catboost.csv')

X_lgb = seasonal_cat.drop(['seasonal_vaccine'], 1)
y_lgb = seasonal_cat['seasonal_vaccine'].copy()

print('Original shape:', seasonal_cat.shape)
print('X shape:', X_lgb.shape)
print('y shape:', y_lgb.shape)

Original shape: (26707, 34)
X shape: (26707, 33)
y shape: (26707,)


In [72]:
X_lgb = lgb_cat_prepare(X_lgb)

In [73]:
X_lgb_train, X_lgb_test, y_lgb_train, y_lgb_test = train_test_split(X_lgb, y_lgb, test_size = 0.2, random_state = 20202020)

In [74]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

LGB_params = {'boosting_type' : ['gbdt'], 
              'learning_rate' : [0.05],
              'num_leaves' : [25],
              'min_child_samples' : [1],
              'max_depth' : [13],
              'subsample' : [1],
              'colsample_bytree' : [1/2],
              'n_estimators' : [200]}

LGB = LGBMClassifier(random_state = 20202020, objective = 'binary', metric = 'auc')

GRID_LGB_seasonal = GridSearchCV(LGB, param_grid = LGB_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

# lgb_categoricals = X_lgb_train.columns.tolist()
GRID_LGB_seasonal.fit(X_lgb_train, y_lgb_train) # categorical_feature = lgb_categoricals

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 17:49:42 2021
Ended at Fri Feb  5 17:49:44 2021
0.04388338724772135 minutes


In [75]:
evaluate_model('LightGBM seasonal with categoricals', GRID_LGB_seasonal.best_estimator_, X_lgb_test, y_lgb_test)

AUC for LightGBM seasonal with categoricals : 0.8617
LogLoss for LightGBM seasonal with categoricals : 0.463


Unnamed: 0,AUC,LogLoss
LightGBM seasonal with categoricals,0.8617,0.463


In [76]:
model_filename = 'models/seasonal_lighgbm_categoricals.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_LGB_seasonal.best_estimator_, file)

#### Make test predictions with LightGBM (with categorical features instead of one-hot encoding)

In [77]:
test = pd.read_csv('../../data/originals/test_catboost.csv')
respondent_id = test['respondent_id'].copy()
test.drop(['respondent_id'], 1, inplace = True)

test = lgb_cat_prepare(test)

In [78]:
submission = pd.DataFrame(respondent_id)
submission['h1n1_vaccine'] = GRID_LGB_h1n1.best_estimator_.predict_proba(test)[:,1]
submission['seasonal_vaccine'] = GRID_LGB_seasonal.best_estimator_.predict_proba(test)[:,1]

submission.head()

Unnamed: 0,respondent_id,h1n1_vaccine,seasonal_vaccine
0,26707,0.155108,0.268159
1,26708,0.075113,0.033132
2,26709,0.212651,0.714583
3,26710,0.671908,0.902284
4,26711,0.378022,0.39791


In [79]:
submission.to_csv('../../data/submissions/first_submission_lightgbm.csv', index = False)

### #3: xgBoost with encoding int features as categories

#### H1N1

In [146]:
h1n1 = pd.read_csv('../../data/h1n1_encoded.csv')

X = h1n1.drop(['h1n1_vaccine'], 1)
y = h1n1['h1n1_vaccine'].copy()

print('Original shape:', h1n1.shape)
print('X shape:', X.shape)
print('y shape:', y.shape)

Original shape: (26707, 59)
X shape: (26707, 58)
y shape: (26707,)


In [147]:
def xgb_encoded_prepare(X_):
    
    X = X_.copy()
    
    for col in X.dtypes[X.dtypes == 'float64'].index.tolist():
        X[col] = X[col].astype(pd.Int64Dtype())
        
    to_encode = ['h1n1_concern', 'h1n1_knowledge', 'opinion_h1n1_vacc_effective', 'opinion_h1n1_risk',
                 'opinion_h1n1_sick_from_vacc', 'opinion_seas_vacc_effective', 'opinion_seas_risk', 
                 'opinion_seas_sick_from_vacc']

    X = pd.get_dummies(X, prefix_sep = ' = ', columns = to_encode)
    
    to_float = ['behavioral_antiviral_meds', 'behavioral_avoidance', 'behavioral_face_mask', 
                'behavioral_wash_hands', 'behavioral_large_gatherings', 'behavioral_outside_home', 
                'behavioral_touch_face', 'doctor_recc_h1n1', 'doctor_recc_seasonal', 'chronic_med_condition', 
                'child_under_6_months', 'health_worker', 'health_insurance', 'household_adults', 
                'household_children', 'marital_status_married', 'rent_or_own_own']
    
    for col in to_float:
        X[col] = X[col].astype(float)
        
    X.columns = [col_name.replace('<', 'smaller_than').replace(',', '') for col_name in X.columns.tolist()]
    
    return X

In [148]:
X = xgb_encoded_prepare(X)

In [151]:
X_xgb_train, X_xgb_test, y_xgb_train, y_xgb_test = train_test_split(X, y, test_size = 0.2, random_state = 20202020)

In [173]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

XGB_params = {'eta' : [0.05],
              'gamma' : [2],
              'subsample' : [1],
              'colsample_bytree' : [2/3],
              'colsample_bynode' : [3/4],
              'max_depth' : [21],
              'min_child_weight' : [1],
              'n_estimators' : [100]}

XGB = XGBClassifier(random_state = 20202020, verbosity = 0, objective = 'binary:logistic', eval_metric = 'auc')

GRID_XGB_h1n1_encoded = GridSearchCV(XGB, param_grid = XGB_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

GRID_XGB_h1n1_encoded.fit(X_xgb_train, y_xgb_train)

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 20:40:40 2021
Ended at Fri Feb  5 20:41:41 2021
1.028653367360433 minutes


In [174]:
evaluate_model('xgBoost h1n1 with encoding', GRID_XGB_h1n1_encoded.best_estimator_, X_xgb_test, y_xgb_test)

AUC for xgBoost h1n1 with encoding : 0.8634
LogLoss for xgBoost h1n1 with encoding : 0.350


Unnamed: 0,AUC,LogLoss
xgBoost h1n1 with encoding,0.8634,0.35


In [175]:
model_filename = 'models/h1n1_xgb_encoded.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_XGB_h1n1_encoded.best_estimator_, file)

#### Seasonal

In [176]:
seasonal = pd.read_csv('../../data/seasonal_encoded.csv')

X = seasonal.drop(['seasonal_vaccine'], 1)
y = seasonal['seasonal_vaccine'].copy()

print('Original shape:', seasonal.shape)
print('X shape:', X.shape)
print('y shape:', y.shape)

Original shape: (26707, 59)
X shape: (26707, 58)
y shape: (26707,)


In [177]:
X = xgb_encoded_prepare(X)

In [178]:
X_xgb_train, X_xgb_test, y_xgb_train, y_xgb_test = train_test_split(X, y, test_size = 0.2, random_state = 20202020)

In [201]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

XGB_params = {'eta' : [0.01],
              'gamma' : [1.5],
              'subsample' : [1],
              'colsample_bytree' : [2/3],
              'colsample_bynode' : [2/3],
              'max_depth' : [19],
              'min_child_weight' : [1],
              'n_estimators' : [250]}

XGB = XGBClassifier(random_state = 20202020, verbosity = 0, objective = 'binary:logistic', eval_metric = 'auc')

GRID_XGB_seasonal_encoded = GridSearchCV(XGB, param_grid = XGB_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

GRID_XGB_seasonal_encoded.fit(X_xgb_train, y_xgb_train)

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 21:40:30 2021
Ended at Fri Feb  5 21:42:44 2021
2.243872169653575 minutes


In [203]:
evaluate_model('xgBoost seasonal with encoding', GRID_XGB_seasonal_encoded.best_estimator_, X_xgb_test, y_xgb_test)

AUC for xgBoost seasonal with encoding : 0.8552
LogLoss for xgBoost seasonal with encoding : 0.478


Unnamed: 0,AUC,LogLoss
xgBoost seasonal with encoding,0.8552,0.478


In [204]:
model_filename = 'models/seasonal_xgb_encoded.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_XGB_seasonal_encoded.best_estimator_, file)

#### Make test predictions with xgBoost

In [210]:
test = pd.read_csv('../../data/originals/test_encoded.csv')
respondent_id = test['respondent_id'].copy()
test.drop(['respondent_id'], 1, inplace = True)

test = xgb_encoded_prepare(test)

In [211]:
submission = pd.DataFrame(respondent_id)
submission['h1n1_vaccine'] = GRID_XGB_h1n1_encoded.best_estimator_.predict_proba(test)[:,1]
submission['seasonal_vaccine'] = GRID_XGB_seasonal_encoded.best_estimator_.predict_proba(test)[:,1]

submission.head()

Unnamed: 0,respondent_id,h1n1_vaccine,seasonal_vaccine
0,26707,0.095608,0.196756
1,26708,0.055153,0.079003
2,26709,0.288272,0.812739
3,26710,0.672732,0.877807
4,26711,0.214239,0.518264


In [212]:
submission.to_csv('../../data/submissions/first_submission_xgboost.csv', index = False)

### #4: xgBoost with leaving categorical features as numerics (1-5 rating is now 'continous')

#### H1N1

In [221]:
h1n1 = pd.read_csv('../../data/h1n1_encoded.csv')

X = h1n1.drop(['h1n1_vaccine'], 1)
y = h1n1['h1n1_vaccine'].copy()

print('Original shape:', h1n1.shape)
print('X shape:', X.shape)
print('y shape:', y.shape)

Original shape: (26707, 59)
X shape: (26707, 58)
y shape: (26707,)


In [222]:
def xgb_continuous_prepare(X_):
    
    X = X_.copy()    
    X.columns = [col_name.replace('<', 'smaller_than').replace(',', '') for col_name in X.columns.tolist()]
    
    return X

In [223]:
X = xgb_continuous_prepare(X)

In [224]:
X_xgb_train, X_xgb_test, y_xgb_train, y_xgb_test = train_test_split(X, y, test_size = 0.2, random_state = 20202020)

In [246]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

XGB_params = {'eta' : [0.025],
              'gamma' : [3],
              'subsample' : [1],
              'colsample_bytree' : [2/3],
              'colsample_bynode' : [2/3],
              'max_depth' : [15],
              'min_child_weight' : [1],
              'n_estimators' : [250]}

XGB = XGBClassifier(random_state = 20202020, verbosity = 0, objective = 'binary:logistic', eval_metric = 'auc')

GRID_XGB_h1n1_cont = GridSearchCV(XGB, param_grid = XGB_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

GRID_XGB_h1n1_cont.fit(X_xgb_train, y_xgb_train)

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 22:41:52 2021
Ended at Fri Feb  5 22:43:00 2021
1.1454349438349405 minutes


In [249]:
evaluate_model('xgBoost h1n1 without encoding', GRID_XGB_h1n1_cont.best_estimator_, X_xgb_test, y_xgb_test)

AUC for xgBoost h1n1 without encoding : 0.8643
LogLoss for xgBoost h1n1 without encoding : 0.348


Unnamed: 0,AUC,LogLoss
xgBoost h1n1 without encoding,0.8643,0.348


In [250]:
model_filename = 'models/h1n1_xgb_continuous.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_XGB_h1n1_cont.best_estimator_, file)

#### Seasonal

In [251]:
seasonal = pd.read_csv('../../data/seasonal_encoded.csv')

X = seasonal.drop(['seasonal_vaccine'], 1)
y = seasonal['seasonal_vaccine'].copy()

print('Original shape:', seasonal.shape)
print('X shape:', X.shape)
print('y shape:', y.shape)

Original shape: (26707, 59)
X shape: (26707, 58)
y shape: (26707,)


In [252]:
X = xgb_continuous_prepare(X)

In [253]:
X_xgb_train, X_xgb_test, y_xgb_train, y_xgb_test = train_test_split(X, y, test_size = 0.2, random_state = 20202020)

In [258]:
start = time.time()
print("Started at", str(time.ctime(int(start))))

XGB_params = {'eta' : [0.05, 0.01, 0.1],
              'gamma' : [1, 2],
              'subsample' : [1],
              'colsample_bytree' : [2/3, 3/4],
              'colsample_bynode' : [2/3, 3/4],
              'max_depth' : [17, 21, 25],
              'min_child_weight' : [1],
              'n_estimators' : [100]}

XGB = XGBClassifier(random_state = 20202020, verbosity = 0, objective = 'binary:logistic', eval_metric = 'auc')

GRID_XGB_seasonal_cont = GridSearchCV(XGB, param_grid = XGB_params, cv = 5, scoring = 'roc_auc', n_jobs = -1)

GRID_XGB_seasonal_cont.fit(X_xgb_train, y_xgb_train)

print("Ended at", str(time.ctime(int(time.time()))))
print((time.time() - start) / 60, 'minutes')

Started at Fri Feb  5 22:44:47 2021
Ended at Fri Feb  5 23:15:15 2021
30.466174523035686 minutes


In [259]:
GRID_XGB_seasonal_cont.best_params_

{'colsample_bynode': 0.6666666666666666,
 'colsample_bytree': 0.6666666666666666,
 'eta': 0.01,
 'gamma': 2,
 'max_depth': 17,
 'min_child_weight': 1,
 'n_estimators': 100,
 'subsample': 1}

In [260]:
evaluate_model('xgBoost seasonal without encoding', GRID_XGB_seasonal_cont.best_estimator_, X_xgb_test, y_xgb_test)

AUC for xgBoost seasonal without encoding : 0.8583
LogLoss for xgBoost seasonal without encoding : 0.529


Unnamed: 0,AUC,LogLoss
xgBoost seasonal without encoding,0.8583,0.529


In [261]:
model_filename = 'models/seasonal_xgb_continuous.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(GRID_XGB_seasonal_cont.best_estimator_, file)