In [1]:
# Pandas is an open source Python package that provides numerous tools for data analysis.
# Numpy is the fundamental package for scientific computing with Python. 
import pandas as pd #pd is an alias pointing to pandas
import numpy as np # np is an alias pointing to numpy

# Seed the generator to make this notebook's output stable across runs
np.random.seed(89)
from datetime import datetime
import random

# To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt #pyplot is matplotlib's plotting framework https://matplotlib.org/users/pyplot_tutorial.html
from matplotlib.colors import ListedColormap # Learn more about matplotlib.colors here https://matplotlib.org/3.1.1/api/colors_api.html
import seaborn as sns # sns is an alias pointing to seaborn

# Standardize features by removing the mean and scaling to unit variance
from sklearn.preprocessing import StandardScaler

# Split validation
from sklearn.model_selection import train_test_split

# GridSearchCV, KFold and Cross-Validation Score
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score

In [2]:
train = pd.read_csv('C:\\Users\\cassi\\Desktop\\ML Project\\Project Data\\ProjectTrainingData.csv')

In [3]:
# Look at basics of Dataframe df.# Look at basics of Dataframe 
print(train.head(5))
print(train.columns)

             id  click      hour    C1  banner_pos   site_id site_domain  \
0  1.000009e+18      0  14102100  1005           0  1fbe01fe    f3845767   
1  1.000017e+19      0  14102100  1005           0  1fbe01fe    f3845767   
2  1.000037e+19      0  14102100  1005           0  1fbe01fe    f3845767   
3  1.000064e+19      0  14102100  1005           0  1fbe01fe    f3845767   
4  1.000068e+19      0  14102100  1005           1  fe8cc448    9166c161   

  site_category    app_id app_domain  ... device_type device_conn_type    C14  \
0      28905ebd  ecad2386   7801e8d9  ...           1                2  15706   
1      28905ebd  ecad2386   7801e8d9  ...           1                0  15704   
2      28905ebd  ecad2386   7801e8d9  ...           1                0  15704   
3      28905ebd  ecad2386   7801e8d9  ...           1                0  15706   
4      0569f928  ecad2386   7801e8d9  ...           1                0  18993   

   C15  C16   C17  C18  C19     C20  C21  
0  320   50  

In [4]:
# Print columns
print(train.columns)

Index(['id', 'click', 'hour', 'C1', 'banner_pos', 'site_id', 'site_domain',
       'site_category', 'app_id', 'app_domain', 'app_category', 'device_id',
       'device_ip', 'device_model', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21'],
      dtype='object')


In [5]:
# Print data types of columns
print(train.dtypes)

id                  float64
click                 int64
hour                  int64
C1                    int64
banner_pos            int64
site_id              object
site_domain          object
site_category        object
app_id               object
app_domain           object
app_category         object
device_id            object
device_ip            object
device_model         object
device_type           int64
device_conn_type      int64
C14                   int64
C15                   int64
C16                   int64
C17                   int64
C18                   int64
C19                   int64
C20                   int64
C21                   int64
dtype: object


In [6]:
# Print info
print(train.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31991090 entries, 0 to 31991089
Data columns (total 24 columns):
 #   Column            Dtype  
---  ------            -----  
 0   id                float64
 1   click             int64  
 2   hour              int64  
 3   C1                int64  
 4   banner_pos        int64  
 5   site_id           object 
 6   site_domain       object 
 7   site_category     object 
 8   app_id            object 
 9   app_domain        object 
 10  app_category      object 
 11  device_id         object 
 12  device_ip         object 
 13  device_model      object 
 14  device_type       int64  
 15  device_conn_type  int64  
 16  C14               int64  
 17  C15               int64  
 18  C16               int64  
 19  C17               int64  
 20  C18               int64  
 21  C19               int64  
 22  C20               int64  
 23  C21               int64  
dtypes: float64(1), int64(14), object(9)
memory usage: 5.7+ GB
None


In [7]:
# Print missing values by column 
print(train.isnull().sum(axis = 0))


id                  0
click               0
hour                0
C1                  0
banner_pos          0
site_id             0
site_domain         0
site_category       0
app_id              0
app_domain          0
app_category        0
device_id           0
device_ip           0
device_model        0
device_type         0
device_conn_type    0
C14                 0
C15                 0
C16                 0
C17                 0
C18                 0
C19                 0
C20                 0
C21                 0
dtype: int64


In [8]:
# Print total number of missing values in rows
print(train.isnull().sum(axis = 1).sum())


0


In [9]:
# Change the hour column to a datetime and extract hour of day
train['hour'] = pd.to_datetime(train['hour'], format = '%y%m%d%H')
train['hour_of_day'] = train['hour'].dt.hour
print(train.head(5))


             id  click       hour    C1  banner_pos   site_id site_domain  \
0  1.000009e+18      0 2014-10-21  1005           0  1fbe01fe    f3845767   
1  1.000017e+19      0 2014-10-21  1005           0  1fbe01fe    f3845767   
2  1.000037e+19      0 2014-10-21  1005           0  1fbe01fe    f3845767   
3  1.000064e+19      0 2014-10-21  1005           0  1fbe01fe    f3845767   
4  1.000068e+19      0 2014-10-21  1005           1  fe8cc448    9166c161   

  site_category    app_id app_domain  ... device_conn_type    C14  C15 C16  \
0      28905ebd  ecad2386   7801e8d9  ...                2  15706  320  50   
1      28905ebd  ecad2386   7801e8d9  ...                0  15704  320  50   
2      28905ebd  ecad2386   7801e8d9  ...                0  15704  320  50   
3      28905ebd  ecad2386   7801e8d9  ...                0  15706  320  50   
4      0569f928  ecad2386   7801e8d9  ...                0  18993  320  50   

    C17  C18  C19     C20  C21  hour_of_day  
0  1722    0   35     

In [1]:
# Day of week
train['day_of_week'] = train['hour'].apply(lambda val: val.day_name())


In [36]:
# Hash function

def convert_obj_to_int(self):
    
    object_list_columns = self.columns
    object_list_dtypes = self.dtypes
    new_col_suffix = '_int'
    for index in range(0,len(object_list_columns)):
        if object_list_dtypes[index] == object :
            self[object_list_columns[index]+new_col_suffix] = self[object_list_columns[index]].map( lambda  x: hash(x))
            self.drop([object_list_columns[index]],inplace=True,axis=1)
    return self
train = convert_obj_to_int(train)

In [37]:
train.dtypes

id                          float64
click                         int64
hour                 datetime64[ns]
C1                            int64
banner_pos                    int64
device_type                   int64
device_conn_type              int64
C14                           int64
C15                           int64
C16                           int64
C17                           int64
C18                           int64
C19                           int64
C20                           int64
C21                           int64
hour_of_day                   int64
site_id_int                   int64
site_domain_int               int64
site_category_int             int64
app_id_int                    int64
app_domain_int                int64
app_category_int              int64
device_id_int                 int64
device_ip_int                 int64
device_model_int              int64
day_of_week_int               int64
dtype: object

# Logistic Regression Model Training

In [39]:
# Import necessary libraries and modules
from sklearn import linear_model
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, log_loss

In [40]:
# Features
features = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'hour_of_day',
       'site_id_int', 'site_domain_int', 'site_category_int', 'app_id_int',
       'app_domain_int', 'app_category_int', 'device_id_int', 'device_ip_int',
       'device_model_int', 'day_of_week_int']

In [41]:
# Retriving Attributes
X = train.loc[:, features]

In [42]:
# Retriving Target Varibale
y = train.click.values

In [43]:
######################## Split the Data ######################

# Imports
from sklearn.model_selection import train_test_split

# Split Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify=y)

In [47]:
# Logistic Regression Classifier
lr = linear_model.LogisticRegression(multi_class='auto', C=0.0001, solver="liblinear", penalty='l2')

In [48]:
# Train the model
lr = lr.fit(X_train, y_train)

In [49]:
# Predict the labels of the test set: preds
y_pred = lr.predict(X_test) # Classification prediction
y_pred_prob = lr.predict_proba(X_test) # Class probabilities

In [50]:
# Accuracy Score
accuracy_score(y_test, y_pred)

0.8280094551326636

In [51]:
# F-1 Score
f1_score(y_test, y_pred)

0.00666715411091523

In [52]:
# AUC
roc_auc_score(y_test, y_pred)

0.5000860027943145

# Xgboost Model Training

In [53]:
# Imports
from operator import itemgetter
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, log_loss


In [54]:
# Features
features = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'hour_of_day',
       'site_id_int', 'site_domain_int', 'site_category_int', 'app_id_int',
       'app_domain_int', 'app_category_int', 'device_id_int', 'device_ip_int',
       'device_model_int', 'day_of_week_int']

In [55]:
# Retriving Attributes
X = train.loc[:, features]

In [56]:
# Retriving Target Varibale
y = train.click.values

In [57]:
######################## Split the Data ######################

# Imports
from sklearn.model_selection import train_test_split

# Split Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify=y)

In [58]:
# Instantiate the XGBClassifier: xg_cl
xg_cl = xgb.XGBClassifier(objective='binary:logistic', n_estimators=2000, seed=123)

In [59]:
# Fit the classifier to the training set
xg_cl.fit(X_train,y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=2000, n_jobs=12, num_parallel_tree=1,
              random_state=123, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              seed=123, subsample=1, tree_method='approx',
              validate_parameters=1, verbosity=None)

In [60]:
# Predict the labels of the test set: preds
y_pred = xg_cl.predict(X_test) # Classification prediction
y_pred_prob = xg_cl.predict_proba(X_test) # Class probabilities


In [61]:
# Compute the accuracy: accuracy
accuracy = float(np.sum(y_pred==y_test))/y_test.shape[0]
print("accuracy: %f" % (accuracy))

accuracy: 0.835654


In [62]:
# Accuracy Score
accuracy_score(y_test, y_pred)

0.8356537710968898

In [63]:
# F-1 Score
f1_score(y_test, y_pred)

0.1603514070094551

In [64]:
# AUC
roc_auc_score(y_test, y_pred)

0.540072586197441

# Xgboost Model Training - 2

In [65]:
# Imports
from operator import itemgetter
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, log_loss


In [66]:
# Features
features = ['C1', 'banner_pos', 'device_type', 'device_conn_type', 'C14',
       'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'hour_of_day',
       'site_id_int', 'site_domain_int', 'site_category_int', 'app_id_int',
       'app_domain_int', 'app_category_int', 'device_id_int', 'device_ip_int',
       'device_model_int', 'day_of_week_int']

In [67]:
# Retriving Attributes
X = train.loc[:, features]

In [68]:
# Retriving Target Varibale
y = train.click.values

In [69]:
######################## Split the Data ######################

# Imports
from sklearn.model_selection import train_test_split

# Split Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify=y)

In [70]:
dtrain = xgb.DMatrix(X_train, label=y_train)

In [71]:
dtest = xgb.DMatrix(X_test, label=y_test)

In [72]:
params = {
        "objective": "binary:logistic",
        "booster" : "gbtree",
        "eta": 0.1,
        "max_depth": 5,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "seed": 42,
        "eval_metric": "logloss",
        }
num_boost_round = 2000
early_stopping_rounds = 20
test_size = 0.2
watchlist = [(dtrain, 'train'), (dtest, 'eval')]

In [73]:
model = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)

[0]	train-logloss:0.64705	eval-logloss:0.64887
[1]	train-logloss:0.61105	eval-logloss:0.61304
[2]	train-logloss:0.58051	eval-logloss:0.58317
[3]	train-logloss:0.55722	eval-logloss:0.55835
[4]	train-logloss:0.53684	eval-logloss:0.53678
[5]	train-logloss:0.51721	eval-logloss:0.51981
[6]	train-logloss:0.50602	eval-logloss:0.50444
[7]	train-logloss:0.49011	eval-logloss:0.49213
[8]	train-logloss:0.48163	eval-logloss:0.48211
[9]	train-logloss:0.47393	eval-logloss:0.47296
[10]	train-logloss:0.46498	eval-logloss:0.46541
[11]	train-logloss:0.45816	eval-logloss:0.45899
[12]	train-logloss:0.45351	eval-logloss:0.45333
[13]	train-logloss:0.44874	eval-logloss:0.44812
[14]	train-logloss:0.44449	eval-logloss:0.44430
[15]	train-logloss:0.44037	eval-logloss:0.44094
[16]	train-logloss:0.43808	eval-logloss:0.43783
[17]	train-logloss:0.43544	eval-logloss:0.43539
[18]	train-logloss:0.43313	eval-logloss:0.43356
[19]	train-logloss:0.43189	eval-logloss:0.43132
[20]	train-logloss:0.43006	eval-logloss:0.43017
[2

[170]	train-logloss:0.40473	eval-logloss:0.40473
[171]	train-logloss:0.40468	eval-logloss:0.40466
[172]	train-logloss:0.40463	eval-logloss:0.40463
[173]	train-logloss:0.40463	eval-logloss:0.40462
[174]	train-logloss:0.40458	eval-logloss:0.40458
[175]	train-logloss:0.40454	eval-logloss:0.40454
[176]	train-logloss:0.40453	eval-logloss:0.40453
[177]	train-logloss:0.40450	eval-logloss:0.40451
[178]	train-logloss:0.40444	eval-logloss:0.40445
[179]	train-logloss:0.40440	eval-logloss:0.40440
[180]	train-logloss:0.40438	eval-logloss:0.40437
[181]	train-logloss:0.40433	eval-logloss:0.40433
[182]	train-logloss:0.40432	eval-logloss:0.40431
[183]	train-logloss:0.40429	eval-logloss:0.40428
[184]	train-logloss:0.40425	eval-logloss:0.40425
[185]	train-logloss:0.40421	eval-logloss:0.40421
[186]	train-logloss:0.40419	eval-logloss:0.40420
[187]	train-logloss:0.40418	eval-logloss:0.40417
[188]	train-logloss:0.40416	eval-logloss:0.40415
[189]	train-logloss:0.40410	eval-logloss:0.40412
[190]	train-logloss:

[338]	train-logloss:0.40080	eval-logloss:0.40096
[339]	train-logloss:0.40079	eval-logloss:0.40095
[340]	train-logloss:0.40077	eval-logloss:0.40093
[341]	train-logloss:0.40076	eval-logloss:0.40092
[342]	train-logloss:0.40074	eval-logloss:0.40091
[343]	train-logloss:0.40073	eval-logloss:0.40090
[344]	train-logloss:0.40071	eval-logloss:0.40088
[345]	train-logloss:0.40071	eval-logloss:0.40088
[346]	train-logloss:0.40069	eval-logloss:0.40085
[347]	train-logloss:0.40068	eval-logloss:0.40085
[348]	train-logloss:0.40068	eval-logloss:0.40084
[349]	train-logloss:0.40067	eval-logloss:0.40084
[350]	train-logloss:0.40066	eval-logloss:0.40082
[351]	train-logloss:0.40064	eval-logloss:0.40080
[352]	train-logloss:0.40064	eval-logloss:0.40080
[353]	train-logloss:0.40063	eval-logloss:0.40079
[354]	train-logloss:0.40060	eval-logloss:0.40077
[355]	train-logloss:0.40060	eval-logloss:0.40077
[356]	train-logloss:0.40058	eval-logloss:0.40075
[357]	train-logloss:0.40057	eval-logloss:0.40073
[358]	train-logloss:

[506]	train-logloss:0.39880	eval-logloss:0.39913
[507]	train-logloss:0.39880	eval-logloss:0.39912
[508]	train-logloss:0.39879	eval-logloss:0.39911
[509]	train-logloss:0.39878	eval-logloss:0.39911
[510]	train-logloss:0.39877	eval-logloss:0.39910
[511]	train-logloss:0.39877	eval-logloss:0.39909
[512]	train-logloss:0.39876	eval-logloss:0.39909
[513]	train-logloss:0.39876	eval-logloss:0.39909
[514]	train-logloss:0.39875	eval-logloss:0.39907
[515]	train-logloss:0.39874	eval-logloss:0.39907
[516]	train-logloss:0.39873	eval-logloss:0.39906
[517]	train-logloss:0.39872	eval-logloss:0.39904
[518]	train-logloss:0.39871	eval-logloss:0.39903
[519]	train-logloss:0.39870	eval-logloss:0.39903
[520]	train-logloss:0.39869	eval-logloss:0.39902
[521]	train-logloss:0.39869	eval-logloss:0.39901
[522]	train-logloss:0.39868	eval-logloss:0.39900
[523]	train-logloss:0.39867	eval-logloss:0.39899
[524]	train-logloss:0.39867	eval-logloss:0.39899
[525]	train-logloss:0.39866	eval-logloss:0.39898
[526]	train-logloss:

[674]	train-logloss:0.39757	eval-logloss:0.39799
[675]	train-logloss:0.39756	eval-logloss:0.39798
[676]	train-logloss:0.39755	eval-logloss:0.39797
[677]	train-logloss:0.39755	eval-logloss:0.39797
[678]	train-logloss:0.39754	eval-logloss:0.39796
[679]	train-logloss:0.39753	eval-logloss:0.39796
[680]	train-logloss:0.39753	eval-logloss:0.39795
[681]	train-logloss:0.39752	eval-logloss:0.39794
[682]	train-logloss:0.39751	eval-logloss:0.39794
[683]	train-logloss:0.39751	eval-logloss:0.39793
[684]	train-logloss:0.39750	eval-logloss:0.39792
[685]	train-logloss:0.39750	eval-logloss:0.39792
[686]	train-logloss:0.39749	eval-logloss:0.39792
[687]	train-logloss:0.39749	eval-logloss:0.39791
[688]	train-logloss:0.39749	eval-logloss:0.39791
[689]	train-logloss:0.39749	eval-logloss:0.39791
[690]	train-logloss:0.39748	eval-logloss:0.39790
[691]	train-logloss:0.39748	eval-logloss:0.39790
[692]	train-logloss:0.39747	eval-logloss:0.39789
[693]	train-logloss:0.39747	eval-logloss:0.39789
[694]	train-logloss:

[842]	train-logloss:0.39667	eval-logloss:0.39717
[843]	train-logloss:0.39667	eval-logloss:0.39716
[844]	train-logloss:0.39666	eval-logloss:0.39716
[845]	train-logloss:0.39666	eval-logloss:0.39716
[846]	train-logloss:0.39666	eval-logloss:0.39716
[847]	train-logloss:0.39666	eval-logloss:0.39715
[848]	train-logloss:0.39666	eval-logloss:0.39715
[849]	train-logloss:0.39665	eval-logloss:0.39715
[850]	train-logloss:0.39665	eval-logloss:0.39715
[851]	train-logloss:0.39665	eval-logloss:0.39714
[852]	train-logloss:0.39664	eval-logloss:0.39714
[853]	train-logloss:0.39664	eval-logloss:0.39714
[854]	train-logloss:0.39664	eval-logloss:0.39714
[855]	train-logloss:0.39663	eval-logloss:0.39714
[856]	train-logloss:0.39663	eval-logloss:0.39713
[857]	train-logloss:0.39663	eval-logloss:0.39713
[858]	train-logloss:0.39662	eval-logloss:0.39713
[859]	train-logloss:0.39662	eval-logloss:0.39712
[860]	train-logloss:0.39661	eval-logloss:0.39712
[861]	train-logloss:0.39661	eval-logloss:0.39711
[862]	train-logloss:

[1010]	train-logloss:0.39606	eval-logloss:0.39663
[1011]	train-logloss:0.39605	eval-logloss:0.39662
[1012]	train-logloss:0.39605	eval-logloss:0.39662
[1013]	train-logloss:0.39605	eval-logloss:0.39662
[1014]	train-logloss:0.39604	eval-logloss:0.39662
[1015]	train-logloss:0.39604	eval-logloss:0.39661
[1016]	train-logloss:0.39604	eval-logloss:0.39661
[1017]	train-logloss:0.39603	eval-logloss:0.39661
[1018]	train-logloss:0.39603	eval-logloss:0.39660
[1019]	train-logloss:0.39603	eval-logloss:0.39660
[1020]	train-logloss:0.39602	eval-logloss:0.39660
[1021]	train-logloss:0.39602	eval-logloss:0.39659
[1022]	train-logloss:0.39602	eval-logloss:0.39659
[1023]	train-logloss:0.39601	eval-logloss:0.39659
[1024]	train-logloss:0.39601	eval-logloss:0.39659
[1025]	train-logloss:0.39601	eval-logloss:0.39659
[1026]	train-logloss:0.39600	eval-logloss:0.39658
[1027]	train-logloss:0.39600	eval-logloss:0.39658
[1028]	train-logloss:0.39600	eval-logloss:0.39658
[1029]	train-logloss:0.39600	eval-logloss:0.39658


[1174]	train-logloss:0.39553	eval-logloss:0.39617
[1175]	train-logloss:0.39553	eval-logloss:0.39617
[1176]	train-logloss:0.39553	eval-logloss:0.39617
[1177]	train-logloss:0.39552	eval-logloss:0.39616
[1178]	train-logloss:0.39552	eval-logloss:0.39616
[1179]	train-logloss:0.39552	eval-logloss:0.39615
[1180]	train-logloss:0.39551	eval-logloss:0.39615
[1181]	train-logloss:0.39551	eval-logloss:0.39615
[1182]	train-logloss:0.39551	eval-logloss:0.39615
[1183]	train-logloss:0.39551	eval-logloss:0.39614
[1184]	train-logloss:0.39550	eval-logloss:0.39614
[1185]	train-logloss:0.39550	eval-logloss:0.39614
[1186]	train-logloss:0.39550	eval-logloss:0.39614
[1187]	train-logloss:0.39549	eval-logloss:0.39613
[1188]	train-logloss:0.39549	eval-logloss:0.39613
[1189]	train-logloss:0.39549	eval-logloss:0.39613
[1190]	train-logloss:0.39549	eval-logloss:0.39613
[1191]	train-logloss:0.39549	eval-logloss:0.39613
[1192]	train-logloss:0.39549	eval-logloss:0.39613
[1193]	train-logloss:0.39548	eval-logloss:0.39612


[1338]	train-logloss:0.39509	eval-logloss:0.39578
[1339]	train-logloss:0.39509	eval-logloss:0.39578
[1340]	train-logloss:0.39508	eval-logloss:0.39578
[1341]	train-logloss:0.39508	eval-logloss:0.39577
[1342]	train-logloss:0.39508	eval-logloss:0.39577
[1343]	train-logloss:0.39508	eval-logloss:0.39577
[1344]	train-logloss:0.39508	eval-logloss:0.39577
[1345]	train-logloss:0.39508	eval-logloss:0.39577
[1346]	train-logloss:0.39508	eval-logloss:0.39577
[1347]	train-logloss:0.39507	eval-logloss:0.39577
[1348]	train-logloss:0.39507	eval-logloss:0.39576
[1349]	train-logloss:0.39506	eval-logloss:0.39576
[1350]	train-logloss:0.39506	eval-logloss:0.39576
[1351]	train-logloss:0.39506	eval-logloss:0.39575
[1352]	train-logloss:0.39506	eval-logloss:0.39575
[1353]	train-logloss:0.39505	eval-logloss:0.39575
[1354]	train-logloss:0.39505	eval-logloss:0.39575
[1355]	train-logloss:0.39505	eval-logloss:0.39575
[1356]	train-logloss:0.39504	eval-logloss:0.39574
[1357]	train-logloss:0.39504	eval-logloss:0.39574


[1502]	train-logloss:0.39472	eval-logloss:0.39547
[1503]	train-logloss:0.39472	eval-logloss:0.39547
[1504]	train-logloss:0.39472	eval-logloss:0.39547
[1505]	train-logloss:0.39472	eval-logloss:0.39547
[1506]	train-logloss:0.39472	eval-logloss:0.39547
[1507]	train-logloss:0.39472	eval-logloss:0.39547
[1508]	train-logloss:0.39471	eval-logloss:0.39546
[1509]	train-logloss:0.39471	eval-logloss:0.39546
[1510]	train-logloss:0.39471	eval-logloss:0.39546
[1511]	train-logloss:0.39470	eval-logloss:0.39546
[1512]	train-logloss:0.39470	eval-logloss:0.39546
[1513]	train-logloss:0.39470	eval-logloss:0.39545
[1514]	train-logloss:0.39470	eval-logloss:0.39545
[1515]	train-logloss:0.39470	eval-logloss:0.39545
[1516]	train-logloss:0.39470	eval-logloss:0.39545
[1517]	train-logloss:0.39469	eval-logloss:0.39545
[1518]	train-logloss:0.39469	eval-logloss:0.39544
[1519]	train-logloss:0.39469	eval-logloss:0.39544
[1520]	train-logloss:0.39469	eval-logloss:0.39544
[1521]	train-logloss:0.39469	eval-logloss:0.39544


[1666]	train-logloss:0.39437	eval-logloss:0.39519
[1667]	train-logloss:0.39437	eval-logloss:0.39518
[1668]	train-logloss:0.39437	eval-logloss:0.39518
[1669]	train-logloss:0.39436	eval-logloss:0.39518
[1670]	train-logloss:0.39436	eval-logloss:0.39517
[1671]	train-logloss:0.39436	eval-logloss:0.39517
[1672]	train-logloss:0.39436	eval-logloss:0.39517
[1673]	train-logloss:0.39436	eval-logloss:0.39517
[1674]	train-logloss:0.39436	eval-logloss:0.39517
[1675]	train-logloss:0.39435	eval-logloss:0.39517
[1676]	train-logloss:0.39435	eval-logloss:0.39517
[1677]	train-logloss:0.39435	eval-logloss:0.39517
[1678]	train-logloss:0.39435	eval-logloss:0.39516
[1679]	train-logloss:0.39435	eval-logloss:0.39516
[1680]	train-logloss:0.39434	eval-logloss:0.39516
[1681]	train-logloss:0.39434	eval-logloss:0.39516
[1682]	train-logloss:0.39434	eval-logloss:0.39516
[1683]	train-logloss:0.39434	eval-logloss:0.39516
[1684]	train-logloss:0.39434	eval-logloss:0.39515
[1685]	train-logloss:0.39433	eval-logloss:0.39515


[1830]	train-logloss:0.39408	eval-logloss:0.39494
[1831]	train-logloss:0.39408	eval-logloss:0.39494
[1832]	train-logloss:0.39408	eval-logloss:0.39493
[1833]	train-logloss:0.39408	eval-logloss:0.39493
[1834]	train-logloss:0.39407	eval-logloss:0.39493
[1835]	train-logloss:0.39407	eval-logloss:0.39493
[1836]	train-logloss:0.39407	eval-logloss:0.39493
[1837]	train-logloss:0.39407	eval-logloss:0.39493
[1838]	train-logloss:0.39407	eval-logloss:0.39493
[1839]	train-logloss:0.39407	eval-logloss:0.39493
[1840]	train-logloss:0.39407	eval-logloss:0.39493
[1841]	train-logloss:0.39406	eval-logloss:0.39492
[1842]	train-logloss:0.39406	eval-logloss:0.39492
[1843]	train-logloss:0.39406	eval-logloss:0.39492
[1844]	train-logloss:0.39406	eval-logloss:0.39492
[1845]	train-logloss:0.39406	eval-logloss:0.39492
[1846]	train-logloss:0.39405	eval-logloss:0.39492
[1847]	train-logloss:0.39405	eval-logloss:0.39492
[1848]	train-logloss:0.39405	eval-logloss:0.39492
[1849]	train-logloss:0.39405	eval-logloss:0.39492


[1994]	train-logloss:0.39381	eval-logloss:0.39473
[1995]	train-logloss:0.39381	eval-logloss:0.39473
[1996]	train-logloss:0.39381	eval-logloss:0.39473
[1997]	train-logloss:0.39381	eval-logloss:0.39473
[1998]	train-logloss:0.39380	eval-logloss:0.39473
[1999]	train-logloss:0.39380	eval-logloss:0.39472


In [74]:
predictions = model.predict(dtest)

In [75]:
print(predictions)

[0.09433921 0.3072926  0.04445183 ... 0.31683278 0.25221056 0.07827   ]


In [76]:
# AUC
roc_auc_score(y_test, predictions)

0.7526598857219878

In [77]:
log_loss(y_test, predictions)

0.394763147419688