In [1]:
# Pandas is an open source Python package that provides numerous tools for data analysis.
# Numpy is the fundamental package for scientific computing with Python. 
import pandas as pd #pd is an alias pointing to pandas
import numpy as np # np is an alias pointing to numpy

# Seed the generator to make this notebook's output stable across runs
np.random.seed(89)
from datetime import datetime
import random

# To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt #pyplot is matplotlib's plotting framework https://matplotlib.org/users/pyplot_tutorial.html
from matplotlib.colors import ListedColormap # Learn more about matplotlib.colors here https://matplotlib.org/3.1.1/api/colors_api.html
import seaborn as sns # sns is an alias pointing to seaborn

# Standardize features by removing the mean and scaling to unit variance
from sklearn.preprocessing import StandardScaler

# Split validation
from sklearn.model_selection import train_test_split

# GridSearchCV, KFold and Cross-Validation Score
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score

In [2]:
# All train data
train = pd.read_csv('C:\\Users\\cassi\\Desktop\\trainingsample.csv')

In [3]:
# Look at basics of Dataframe 
print(train.head(5))
print(train.columns)

   click    C1  banner_pos   site_id site_domain site_category    app_id  \
0      0  1005           1  a7853007    7e091613      f028772b  ecad2386   
1      0  1002           0  5b787406    f3ca2e42      50e219e0  ecad2386   
2      0  1005           0  85f751fd    c4e18dd6      50e219e0  e986f7ef   
3      0  1005           0  85f751fd    c4e18dd6      50e219e0  92f5800b   
4      0  1010           1  85f751fd    c4e18dd6      50e219e0  04f2be5f   

  app_domain app_category device_id  ...   C17 C18   C19     C20  C21  \
0   7801e8d9     07d7df22  a99f214a  ...   906   3  1451  100156   61   
1   7801e8d9     07d7df22  25030ae3  ...  2684   2  1327      -1   52   
2   2347f47a     0f2161f8  cf441c35  ...  2340   3    41  100111  159   
3   ae637522     0f2161f8  a99f214a  ...  2424   1   175  100189   71   
4   7801e8d9     0f2161f8  462e0551  ...  2698   0   163  100084  212   

   hour_of_day  day_of_week                      user  \
0           21    Wednesday  a99f214a930ec31d3b

In [4]:
# Print data types of columns
print(train.dtypes)

click                int64
C1                   int64
banner_pos           int64
site_id             object
site_domain         object
site_category       object
app_id              object
app_domain          object
app_category        object
device_id           object
device_ip           object
device_model        object
device_type          int64
device_conn_type     int64
C14                  int64
C15                  int64
C16                  int64
C17                  int64
C18                  int64
C19                  int64
C20                  int64
C21                  int64
hour_of_day          int64
day_of_week         object
user                object
site_info           object
app_info            object
dtype: object


In [5]:
# Hash function

def convert_obj_to_int(self):
    
    object_list_columns = self.columns
    object_list_dtypes = self.dtypes
    new_col_suffix = '_int'
    for index in range(0,len(object_list_columns)):
        if object_list_dtypes[index] == object :
            self[object_list_columns[index]+new_col_suffix] = self[object_list_columns[index]].map( lambda  x: hash(x))
            self.drop([object_list_columns[index]],inplace=True,axis=1)
    return self
train = convert_obj_to_int(train)

In [6]:
# Print data types of columns
print(train.dtypes)

click                int64
C1                   int64
banner_pos           int64
device_type          int64
device_conn_type     int64
C14                  int64
C15                  int64
C16                  int64
C17                  int64
C18                  int64
C19                  int64
C20                  int64
C21                  int64
hour_of_day          int64
site_id_int          int64
site_domain_int      int64
site_category_int    int64
app_id_int           int64
app_domain_int       int64
app_category_int     int64
device_id_int        int64
device_ip_int        int64
device_model_int     int64
day_of_week_int      int64
user_int             int64
site_info_int        int64
app_info_int         int64
dtype: object


# Xgboost Model Training - 2

In [7]:
# Imports
from operator import itemgetter
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, log_loss


In [8]:
features = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'hour_of_day',
       'site_id_int', 'site_domain_int', 'site_category_int', 'app_id_int',
       'app_domain_int', 'app_category_int', 'device_id_int', 'device_ip_int',
       'device_model_int', 'day_of_week_int', 'user_int', 'site_info_int', 'app_info_int' ]

In [9]:
# Retriving Attributes
X = train.loc[:, features]

In [10]:
# Retriving Target Varibale
y = train.click.values

In [11]:
######################## Split the Data ######################

# Imports
from sklearn.model_selection import train_test_split

# Split Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify=y)

In [12]:
dtrain = xgb.DMatrix(X_train, label=y_train)

In [13]:
dtest = xgb.DMatrix(X_test, label=y_test)

In [18]:
params = {
        "objective": "binary:logistic",
        "booster" : "gbtree",
        "eta": 0.1,
        "max_depth": 5,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "seed": 42,
        "eval_metric": "logloss",
        }
num_boost_round = 1000
early_stopping_rounds = 20
test_size = 0.2
watchlist = [(dtrain, 'train'), (dtest, 'eval')]

In [19]:
model = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)

[0]	train-logloss:0.64887	eval-logloss:0.64885
[1]	train-logloss:0.61258	eval-logloss:0.61265
[2]	train-logloss:0.58238	eval-logloss:0.58232
[3]	train-logloss:0.55777	eval-logloss:0.55770
[4]	train-logloss:0.53656	eval-logloss:0.53647
[5]	train-logloss:0.51875	eval-logloss:0.51872
[6]	train-logloss:0.50369	eval-logloss:0.50361
[7]	train-logloss:0.49126	eval-logloss:0.49120
[8]	train-logloss:0.48045	eval-logloss:0.48034
[9]	train-logloss:0.47132	eval-logloss:0.47118
[10]	train-logloss:0.46342	eval-logloss:0.46333
[11]	train-logloss:0.45680	eval-logloss:0.45671
[12]	train-logloss:0.45103	eval-logloss:0.45096
[13]	train-logloss:0.44616	eval-logloss:0.44611
[14]	train-logloss:0.44179	eval-logloss:0.44174
[15]	train-logloss:0.43831	eval-logloss:0.43829
[16]	train-logloss:0.43527	eval-logloss:0.43528
[17]	train-logloss:0.43271	eval-logloss:0.43273
[18]	train-logloss:0.43050	eval-logloss:0.43056
[19]	train-logloss:0.42861	eval-logloss:0.42863
[20]	train-logloss:0.42677	eval-logloss:0.42686
[2

[170]	train-logloss:0.40107	eval-logloss:0.40355
[171]	train-logloss:0.40105	eval-logloss:0.40355
[172]	train-logloss:0.40103	eval-logloss:0.40355
[173]	train-logloss:0.40101	eval-logloss:0.40354
[174]	train-logloss:0.40099	eval-logloss:0.40354
[175]	train-logloss:0.40097	eval-logloss:0.40353
[176]	train-logloss:0.40092	eval-logloss:0.40349
[177]	train-logloss:0.40087	eval-logloss:0.40346
[178]	train-logloss:0.40085	eval-logloss:0.40345
[179]	train-logloss:0.40075	eval-logloss:0.40336
[180]	train-logloss:0.40074	eval-logloss:0.40336
[181]	train-logloss:0.40071	eval-logloss:0.40335
[182]	train-logloss:0.40064	eval-logloss:0.40329
[183]	train-logloss:0.40062	eval-logloss:0.40328
[184]	train-logloss:0.40053	eval-logloss:0.40321
[185]	train-logloss:0.40051	eval-logloss:0.40321
[186]	train-logloss:0.40047	eval-logloss:0.40319
[187]	train-logloss:0.40042	eval-logloss:0.40316
[188]	train-logloss:0.40037	eval-logloss:0.40312
[189]	train-logloss:0.40032	eval-logloss:0.40308
[190]	train-logloss:

[338]	train-logloss:0.39574	eval-logloss:0.40047
[339]	train-logloss:0.39571	eval-logloss:0.40046
[340]	train-logloss:0.39569	eval-logloss:0.40046
[341]	train-logloss:0.39567	eval-logloss:0.40046
[342]	train-logloss:0.39565	eval-logloss:0.40045
[343]	train-logloss:0.39562	eval-logloss:0.40044
[344]	train-logloss:0.39559	eval-logloss:0.40042
[345]	train-logloss:0.39557	eval-logloss:0.40041
[346]	train-logloss:0.39555	eval-logloss:0.40040
[347]	train-logloss:0.39552	eval-logloss:0.40038
[348]	train-logloss:0.39549	eval-logloss:0.40037
[349]	train-logloss:0.39545	eval-logloss:0.40035
[350]	train-logloss:0.39544	eval-logloss:0.40034
[351]	train-logloss:0.39542	eval-logloss:0.40034
[352]	train-logloss:0.39541	eval-logloss:0.40034
[353]	train-logloss:0.39539	eval-logloss:0.40033
[354]	train-logloss:0.39535	eval-logloss:0.40030
[355]	train-logloss:0.39534	eval-logloss:0.40030
[356]	train-logloss:0.39532	eval-logloss:0.40029
[357]	train-logloss:0.39529	eval-logloss:0.40028
[358]	train-logloss:

[506]	train-logloss:0.39224	eval-logloss:0.39933
[507]	train-logloss:0.39221	eval-logloss:0.39932
[508]	train-logloss:0.39219	eval-logloss:0.39932
[509]	train-logloss:0.39217	eval-logloss:0.39931
[510]	train-logloss:0.39215	eval-logloss:0.39931
[511]	train-logloss:0.39214	eval-logloss:0.39931
[512]	train-logloss:0.39212	eval-logloss:0.39931
[513]	train-logloss:0.39209	eval-logloss:0.39929
[514]	train-logloss:0.39207	eval-logloss:0.39928
[515]	train-logloss:0.39206	eval-logloss:0.39928
[516]	train-logloss:0.39204	eval-logloss:0.39927
[517]	train-logloss:0.39203	eval-logloss:0.39927
[518]	train-logloss:0.39202	eval-logloss:0.39926
[519]	train-logloss:0.39200	eval-logloss:0.39926
[520]	train-logloss:0.39198	eval-logloss:0.39926
[521]	train-logloss:0.39195	eval-logloss:0.39926
[522]	train-logloss:0.39193	eval-logloss:0.39926
[523]	train-logloss:0.39192	eval-logloss:0.39925
[524]	train-logloss:0.39190	eval-logloss:0.39925
[525]	train-logloss:0.39188	eval-logloss:0.39924
[526]	train-logloss:

[674]	train-logloss:0.38924	eval-logloss:0.39858
[675]	train-logloss:0.38923	eval-logloss:0.39858
[676]	train-logloss:0.38921	eval-logloss:0.39858
[677]	train-logloss:0.38920	eval-logloss:0.39858
[678]	train-logloss:0.38918	eval-logloss:0.39859
[679]	train-logloss:0.38916	eval-logloss:0.39858
[680]	train-logloss:0.38915	eval-logloss:0.39858
[681]	train-logloss:0.38915	eval-logloss:0.39858
[682]	train-logloss:0.38913	eval-logloss:0.39858
[683]	train-logloss:0.38910	eval-logloss:0.39857
[684]	train-logloss:0.38909	eval-logloss:0.39857
[685]	train-logloss:0.38908	eval-logloss:0.39856
[686]	train-logloss:0.38906	eval-logloss:0.39856
[687]	train-logloss:0.38905	eval-logloss:0.39856
[688]	train-logloss:0.38903	eval-logloss:0.39855
[689]	train-logloss:0.38901	eval-logloss:0.39855
[690]	train-logloss:0.38901	eval-logloss:0.39855
[691]	train-logloss:0.38899	eval-logloss:0.39855
[692]	train-logloss:0.38897	eval-logloss:0.39853
[693]	train-logloss:0.38895	eval-logloss:0.39852
[694]	train-logloss:

[842]	train-logloss:0.38670	eval-logloss:0.39816
[843]	train-logloss:0.38669	eval-logloss:0.39817
[844]	train-logloss:0.38668	eval-logloss:0.39817
[845]	train-logloss:0.38666	eval-logloss:0.39817
[846]	train-logloss:0.38665	eval-logloss:0.39817
[847]	train-logloss:0.38663	eval-logloss:0.39816
[848]	train-logloss:0.38662	eval-logloss:0.39816
[849]	train-logloss:0.38660	eval-logloss:0.39816
[850]	train-logloss:0.38660	eval-logloss:0.39815
[851]	train-logloss:0.38659	eval-logloss:0.39815
[852]	train-logloss:0.38658	eval-logloss:0.39815
[853]	train-logloss:0.38657	eval-logloss:0.39814
[854]	train-logloss:0.38656	eval-logloss:0.39814
[855]	train-logloss:0.38655	eval-logloss:0.39814
[856]	train-logloss:0.38653	eval-logloss:0.39813
[857]	train-logloss:0.38651	eval-logloss:0.39813
[858]	train-logloss:0.38649	eval-logloss:0.39814
[859]	train-logloss:0.38649	eval-logloss:0.39813
[860]	train-logloss:0.38647	eval-logloss:0.39814
[861]	train-logloss:0.38645	eval-logloss:0.39814
[862]	train-logloss:

In [20]:
predictions = model.predict(dtest)

In [21]:
print(predictions)

[0.1111246  0.24043196 0.24235219 ... 0.38218725 0.18809424 0.07649877]


In [22]:
# AUC
roc_auc_score(y_test, predictions)

0.7470521695665071

In [23]:
log_loss(y_test, predictions)

0.3978827601847332

# Random Forest

In [11]:
#Imports
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

import sklearn
from sklearn import ensemble


In [12]:
features = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'hour_of_day',
       'site_id_int', 'site_domain_int', 'site_category_int', 'app_id_int',
       'app_domain_int', 'app_category_int', 'device_id_int', 'device_ip_int',
       'device_model_int', 'day_of_week_int', 'user_int', 'site_info_int', 'app_info_int' ]

In [13]:
# Retriving Attributes
X = train.loc[:, features]

In [14]:
# Retriving Target Varibale
y = train.click.values

In [15]:
######################## Split the Data ######################

# Imports
from sklearn.model_selection import train_test_split

# Split Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify=y)

In [19]:
rfc = RandomForestClassifier()

In [28]:
rfc.fit(X_train,y_train)# predictions

RandomForestClassifier()

In [29]:
rfc_predict = rfc.predict(X_test)

In [23]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, log_loss

In [33]:
rfc_cv_score = cross_val_score(rfc, X, y, cv=2, scoring='roc_auc')

In [36]:
print("=== Confusion Matrix ===")
print(confusion_matrix(y_test, rfc_predict))
print('\n')
print("=== Classification Report ===")
print(classification_report(y_test, rfc_predict))
print('\n')
print("=== All AUC Scores ===")
print(rfc_cv_score)
print('\n')
print("=== Mean AUC Score ===")
print("Mean AUC Score - Random Forest: ", rfc_cv_score.mean())

=== Confusion Matrix ===
[[160915   5112]
 [ 29739   4234]]


=== Classification Report ===
              precision    recall  f1-score   support

           0       0.84      0.97      0.90    166027
           1       0.45      0.12      0.20     33973

    accuracy                           0.83    200000
   macro avg       0.65      0.55      0.55    200000
weighted avg       0.78      0.83      0.78    200000



=== All AUC Scores ===
[0.71624    0.71561294]


=== Mean AUC Score ===
Mean AUC Score - Random Forest:  0.7159264693544395


# LightGBM Model

In [61]:
import lightgbm as lgb
# Imports
from operator import itemgetter
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, log_loss


In [62]:
features = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'hour_of_day',
       'site_id_int', 'site_domain_int', 'site_category_int', 'app_id_int',
       'app_domain_int', 'app_category_int', 'device_id_int', 'device_ip_int',
       'device_model_int', 'day_of_week_int', 'user_int', 'site_info_int', 'app_info_int' ]

In [63]:
# Retriving Attributes
X = train.loc[:, features]

In [64]:
# Retriving Target Varibale
y = train.click.values

In [65]:
######################## Split the Data ######################

# Imports
from sklearn.model_selection import train_test_split

# Split Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify=y)

In [66]:
# specify your configurations as a dict
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': { 'binary_logloss'},
    'num_leaves': 31, # default leaves(31) amount for each tree
    'learning_rate': 0.08,
    'feature_fraction': 0.8, # will select 70% features before training each tree
    'bagging_fraction': 0.4, #feature_fraction, but this will random select part of data
    'bagging_freq': 3, #  perform bagging at every 5 iteration
    'verbose': 0
}

In [67]:
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

In [68]:
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=2000,
                valid_sets=lgb_eval,
                early_stopping_rounds=1500)

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[1]	valid_0's binary_logloss: 0.450214
Training until validation scores don't improve for 1500 rounds
[2]	valid_0's binary_logloss: 0.445728
[3]	valid_0's binary_logloss: 0.441935
[4]	valid_0's binary_logloss: 0.438521
[5]	valid_0's binary_logloss: 0.435724
[6]	valid_0's binary_logloss: 0.433321
[7]	valid_0's binary_logloss: 0.431022
[8]	valid_0's binary_logloss: 0.429068
[9]	valid_0's binary_logloss: 0.427408
[10]	valid_0's binary_logloss: 0.425894
[11]	valid_0's binary_logloss: 0.424483
[12]	valid_0's binary_logloss: 0.423223
[13]	valid_0's binary_logloss: 0.422041
[14]	valid_0's binary_logloss: 0.421108
[15]	valid_0's binary_logloss: 0.420237
[16]	valid_0's binary_logloss: 0.419378
[17]	valid_0's binary_logloss: 0.418518
[18]	valid_0's binary_logloss: 0.417815
[19]	valid_0's binary_logloss: 0.417137
[20]	valid_0's binary_logloss: 0.416514
[21]	valid_0's binary_lo

[206]	valid_0's binary_logloss: 0.402049
[207]	valid_0's binary_logloss: 0.402029
[208]	valid_0's binary_logloss: 0.402015
[209]	valid_0's binary_logloss: 0.402016
[210]	valid_0's binary_logloss: 0.402
[211]	valid_0's binary_logloss: 0.402003
[212]	valid_0's binary_logloss: 0.401975
[213]	valid_0's binary_logloss: 0.40198
[214]	valid_0's binary_logloss: 0.401972
[215]	valid_0's binary_logloss: 0.401965
[216]	valid_0's binary_logloss: 0.401964
[217]	valid_0's binary_logloss: 0.401972
[218]	valid_0's binary_logloss: 0.401967
[219]	valid_0's binary_logloss: 0.401957
[220]	valid_0's binary_logloss: 0.401958
[221]	valid_0's binary_logloss: 0.401957
[222]	valid_0's binary_logloss: 0.401956
[223]	valid_0's binary_logloss: 0.40194
[224]	valid_0's binary_logloss: 0.401926
[225]	valid_0's binary_logloss: 0.401914
[226]	valid_0's binary_logloss: 0.401912
[227]	valid_0's binary_logloss: 0.401901
[228]	valid_0's binary_logloss: 0.401889
[229]	valid_0's binary_logloss: 0.401889
[230]	valid_0's binar

[418]	valid_0's binary_logloss: 0.400575
[419]	valid_0's binary_logloss: 0.400575
[420]	valid_0's binary_logloss: 0.400571
[421]	valid_0's binary_logloss: 0.400564
[422]	valid_0's binary_logloss: 0.400533
[423]	valid_0's binary_logloss: 0.400522
[424]	valid_0's binary_logloss: 0.400515
[425]	valid_0's binary_logloss: 0.400515
[426]	valid_0's binary_logloss: 0.400517
[427]	valid_0's binary_logloss: 0.400516
[428]	valid_0's binary_logloss: 0.400523
[429]	valid_0's binary_logloss: 0.400525
[430]	valid_0's binary_logloss: 0.400521
[431]	valid_0's binary_logloss: 0.400518
[432]	valid_0's binary_logloss: 0.400517
[433]	valid_0's binary_logloss: 0.400514
[434]	valid_0's binary_logloss: 0.400515
[435]	valid_0's binary_logloss: 0.400513
[436]	valid_0's binary_logloss: 0.400511
[437]	valid_0's binary_logloss: 0.400518
[438]	valid_0's binary_logloss: 0.400523
[439]	valid_0's binary_logloss: 0.400522
[440]	valid_0's binary_logloss: 0.400511
[441]	valid_0's binary_logloss: 0.400498
[442]	valid_0's 

[621]	valid_0's binary_logloss: 0.40002
[622]	valid_0's binary_logloss: 0.400028
[623]	valid_0's binary_logloss: 0.400022
[624]	valid_0's binary_logloss: 0.400028
[625]	valid_0's binary_logloss: 0.400018
[626]	valid_0's binary_logloss: 0.400013
[627]	valid_0's binary_logloss: 0.400014
[628]	valid_0's binary_logloss: 0.400012
[629]	valid_0's binary_logloss: 0.400007
[630]	valid_0's binary_logloss: 0.400013
[631]	valid_0's binary_logloss: 0.400012
[632]	valid_0's binary_logloss: 0.400011
[633]	valid_0's binary_logloss: 0.400011
[634]	valid_0's binary_logloss: 0.400011
[635]	valid_0's binary_logloss: 0.400014
[636]	valid_0's binary_logloss: 0.400003
[637]	valid_0's binary_logloss: 0.399998
[638]	valid_0's binary_logloss: 0.399991
[639]	valid_0's binary_logloss: 0.399984
[640]	valid_0's binary_logloss: 0.399983
[641]	valid_0's binary_logloss: 0.399981
[642]	valid_0's binary_logloss: 0.39998
[643]	valid_0's binary_logloss: 0.399983
[644]	valid_0's binary_logloss: 0.399985
[645]	valid_0's bi

[824]	valid_0's binary_logloss: 0.399966
[825]	valid_0's binary_logloss: 0.399968
[826]	valid_0's binary_logloss: 0.399966
[827]	valid_0's binary_logloss: 0.399974
[828]	valid_0's binary_logloss: 0.399979
[829]	valid_0's binary_logloss: 0.399974
[830]	valid_0's binary_logloss: 0.399972
[831]	valid_0's binary_logloss: 0.399965
[832]	valid_0's binary_logloss: 0.399962
[833]	valid_0's binary_logloss: 0.399962
[834]	valid_0's binary_logloss: 0.399959
[835]	valid_0's binary_logloss: 0.39996
[836]	valid_0's binary_logloss: 0.399959
[837]	valid_0's binary_logloss: 0.399958
[838]	valid_0's binary_logloss: 0.399955
[839]	valid_0's binary_logloss: 0.399959
[840]	valid_0's binary_logloss: 0.399976
[841]	valid_0's binary_logloss: 0.399966
[842]	valid_0's binary_logloss: 0.399966
[843]	valid_0's binary_logloss: 0.399957
[844]	valid_0's binary_logloss: 0.399956
[845]	valid_0's binary_logloss: 0.399953
[846]	valid_0's binary_logloss: 0.399955
[847]	valid_0's binary_logloss: 0.399962
[848]	valid_0's b

[1026]	valid_0's binary_logloss: 0.399965
[1027]	valid_0's binary_logloss: 0.399968
[1028]	valid_0's binary_logloss: 0.399968
[1029]	valid_0's binary_logloss: 0.399977
[1030]	valid_0's binary_logloss: 0.399975
[1031]	valid_0's binary_logloss: 0.399971
[1032]	valid_0's binary_logloss: 0.399973
[1033]	valid_0's binary_logloss: 0.399972
[1034]	valid_0's binary_logloss: 0.399966
[1035]	valid_0's binary_logloss: 0.39997
[1036]	valid_0's binary_logloss: 0.399961
[1037]	valid_0's binary_logloss: 0.399956
[1038]	valid_0's binary_logloss: 0.399948
[1039]	valid_0's binary_logloss: 0.399958
[1040]	valid_0's binary_logloss: 0.399957
[1041]	valid_0's binary_logloss: 0.399962
[1042]	valid_0's binary_logloss: 0.399965
[1043]	valid_0's binary_logloss: 0.399969
[1044]	valid_0's binary_logloss: 0.399971
[1045]	valid_0's binary_logloss: 0.399976
[1046]	valid_0's binary_logloss: 0.399976
[1047]	valid_0's binary_logloss: 0.399981
[1048]	valid_0's binary_logloss: 0.399981
[1049]	valid_0's binary_logloss: 0.

[1225]	valid_0's binary_logloss: 0.400106
[1226]	valid_0's binary_logloss: 0.400105
[1227]	valid_0's binary_logloss: 0.400109
[1228]	valid_0's binary_logloss: 0.40011
[1229]	valid_0's binary_logloss: 0.400109
[1230]	valid_0's binary_logloss: 0.400106
[1231]	valid_0's binary_logloss: 0.400103
[1232]	valid_0's binary_logloss: 0.40011
[1233]	valid_0's binary_logloss: 0.400123
[1234]	valid_0's binary_logloss: 0.400125
[1235]	valid_0's binary_logloss: 0.400126
[1236]	valid_0's binary_logloss: 0.400128
[1237]	valid_0's binary_logloss: 0.400127
[1238]	valid_0's binary_logloss: 0.400134
[1239]	valid_0's binary_logloss: 0.400135
[1240]	valid_0's binary_logloss: 0.400138
[1241]	valid_0's binary_logloss: 0.400137
[1242]	valid_0's binary_logloss: 0.400132
[1243]	valid_0's binary_logloss: 0.400132
[1244]	valid_0's binary_logloss: 0.400128
[1245]	valid_0's binary_logloss: 0.400126
[1246]	valid_0's binary_logloss: 0.400121
[1247]	valid_0's binary_logloss: 0.400121
[1248]	valid_0's binary_logloss: 0.4

[1432]	valid_0's binary_logloss: 0.400352
[1433]	valid_0's binary_logloss: 0.400351
[1434]	valid_0's binary_logloss: 0.40035
[1435]	valid_0's binary_logloss: 0.400351
[1436]	valid_0's binary_logloss: 0.400353
[1437]	valid_0's binary_logloss: 0.400361
[1438]	valid_0's binary_logloss: 0.400358
[1439]	valid_0's binary_logloss: 0.400358
[1440]	valid_0's binary_logloss: 0.40036
[1441]	valid_0's binary_logloss: 0.400366
[1442]	valid_0's binary_logloss: 0.400374
[1443]	valid_0's binary_logloss: 0.400382
[1444]	valid_0's binary_logloss: 0.400385
[1445]	valid_0's binary_logloss: 0.400388
[1446]	valid_0's binary_logloss: 0.400384
[1447]	valid_0's binary_logloss: 0.40038
[1448]	valid_0's binary_logloss: 0.400376
[1449]	valid_0's binary_logloss: 0.400368
[1450]	valid_0's binary_logloss: 0.400372
[1451]	valid_0's binary_logloss: 0.400369
[1452]	valid_0's binary_logloss: 0.40037
[1453]	valid_0's binary_logloss: 0.400376
[1454]	valid_0's binary_logloss: 0.400379
[1455]	valid_0's binary_logloss: 0.400

[1632]	valid_0's binary_logloss: 0.400516
[1633]	valid_0's binary_logloss: 0.400515
[1634]	valid_0's binary_logloss: 0.400517
[1635]	valid_0's binary_logloss: 0.400524
[1636]	valid_0's binary_logloss: 0.400525
[1637]	valid_0's binary_logloss: 0.400527
[1638]	valid_0's binary_logloss: 0.400523
[1639]	valid_0's binary_logloss: 0.400522
[1640]	valid_0's binary_logloss: 0.400517
[1641]	valid_0's binary_logloss: 0.40052
[1642]	valid_0's binary_logloss: 0.400526
[1643]	valid_0's binary_logloss: 0.400532
[1644]	valid_0's binary_logloss: 0.400536
[1645]	valid_0's binary_logloss: 0.400542
[1646]	valid_0's binary_logloss: 0.400543
[1647]	valid_0's binary_logloss: 0.400552
[1648]	valid_0's binary_logloss: 0.400552
[1649]	valid_0's binary_logloss: 0.400551
[1650]	valid_0's binary_logloss: 0.400556
[1651]	valid_0's binary_logloss: 0.400561
[1652]	valid_0's binary_logloss: 0.400563
[1653]	valid_0's binary_logloss: 0.400571
[1654]	valid_0's binary_logloss: 0.400571
[1655]	valid_0's binary_logloss: 0.

[1837]	valid_0's binary_logloss: 0.40077
[1838]	valid_0's binary_logloss: 0.400769
[1839]	valid_0's binary_logloss: 0.400769
[1840]	valid_0's binary_logloss: 0.400772
[1841]	valid_0's binary_logloss: 0.400772
[1842]	valid_0's binary_logloss: 0.400769
[1843]	valid_0's binary_logloss: 0.400767
[1844]	valid_0's binary_logloss: 0.400769
[1845]	valid_0's binary_logloss: 0.400775
[1846]	valid_0's binary_logloss: 0.400783
[1847]	valid_0's binary_logloss: 0.400785
[1848]	valid_0's binary_logloss: 0.400786
[1849]	valid_0's binary_logloss: 0.400791
[1850]	valid_0's binary_logloss: 0.400782
[1851]	valid_0's binary_logloss: 0.400787
[1852]	valid_0's binary_logloss: 0.400787
[1853]	valid_0's binary_logloss: 0.400793
[1854]	valid_0's binary_logloss: 0.400784
[1855]	valid_0's binary_logloss: 0.400785
[1856]	valid_0's binary_logloss: 0.400784
[1857]	valid_0's binary_logloss: 0.400787
[1858]	valid_0's binary_logloss: 0.400795
[1859]	valid_0's binary_logloss: 0.400802
[1860]	valid_0's binary_logloss: 0.

In [69]:
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)


In [70]:
print(y_pred)

[0.11295624 0.21684206 0.22828585 ... 0.14440212 0.17542945 0.09905702]


In [71]:
# AUC
roc_auc_score(y_test, y_pred)

0.7429538586402475

In [72]:
#logloss
log_loss(y_test, y_pred)

0.399887375351831