In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
import lightgbm as lgb
%run "Data Prep.ipynb"

In [None]:
#Split training set to make validation set
x_train, x_valid, y_train, y_valid = train_test_split(df_train, dfy_train, test_size=0.25, random_state=99)

print('Train samples: {} Validation samples: {}'.format(len(x_train), len(x_valid)))
d_train = xgb.DMatrix(x_train, y_train)
d_valid = xgb.DMatrix(x_valid, y_valid)
d_test = xgb.DMatrix(df_test)

In [None]:
#Tuned params for xgboost using gridsearch
params = {'eta': 0.02, 'max_depth': 4, 'subsample': 0.9, 'colsample_bytree': 0.9,
         'objective': 'binary:logistic', 'eval_metric': 'auc', 'silent': True, 'seed': 42}
watchlist = [(d_train, 'train'), (d_valid, 'valid')]

In [None]:
#Train the xgboost model 
mdl = xgb.train(params, d_train, 5000, watchlist, early_stopping_rounds=200, feval=gini_xgb, maximize=True, verbose_eval=50)

In [None]:
#Prediction
p_test = mdl.predict(d_test)

#Submission file
sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = p_test
sub.to_csv('xgb1.csv', index=False, float_format='%.5f')

In [None]:
#Tuned params for lgb
params2 = {'learning_rate': 0.02, 'max_depth': 4, 'boosting': 'gbdt', 
          'objective': 'binary', 'metric': 'auc', 'is_training_metric': False, 'seed': 42}

In [None]:
l_train = lgb.Dataset(x_train, label=y_train)
l_valid = lgb.Dataset(x_valid, label=y_valid)
mdl2 = lgb.train(params2, l_train, 2000, l_valid, verbose_eval=100, 
              feval=gini_lgb, early_stopping_rounds=200)


In [None]:
#Prediction
p_test2 = mdl2.predict(df_test, num_iteration = mdl2.best_iteration)

#Submission file
sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = p_test2
sub.to_csv('lgb1.csv', index=False, float_format='%.5f')

In [None]:
#Blend predictions
d1 = pd.read_csv('xgb1.csv')
d2 = pd.read_csv('lgb1.csv')

d3 = d1
d3['target'] = d1['target']*0.5 + d2['target']
sub.to_csv('blend1.csv', index=False, float_format='%.5f')