In [1]:
from datetime import datetime
import pickle

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

In [2]:
def uplift_score(prediction, treatment, target, rate=0.3):
    """
    Подсчет Uplift Score
    """
    order = np.argsort(-prediction)
    treatment_n = int((treatment == 1).sum() * rate)
    treatment_p = target[order][treatment[order] == 1][:treatment_n].mean()
    control_n = int((treatment == 0).sum() * rate)
    control_p = target[order][treatment[order] == 0][:control_n].mean()
    score = treatment_p - control_p
    return score

In [3]:
X_control_train = pd.read_csv(
    "../data/processed/two_models/X_control_train.csv", index_col="client_id"
)
y_control_train = pd.read_csv(
    "../data/processed/two_models/y_control_train.csv",
    header=None,
    names=["client_id", "target"],
    index_col="client_id"
)["target"]

X_treatment_train = pd.read_csv(
    "../data/processed/two_models/X_treatment_train.csv", index_col="client_id"
)
y_treatment_train = pd.read_csv(
    "../data/processed/two_models/y_treatment_train.csv",
    header=None,
    names=["client_id", "target"],
    index_col="client_id"
)["target"]

X_control_train.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_weekday,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000220a0a7,115,15,7516.482892,2.0,896.6,0.0,0.0,0.0,10.548235,0.0,...,5,9,2017,12,49,49,4,15,49,39
0002ce2217,38,13,11973.352672,6.0,219.4,0.0,0.0,0.0,3.047222,0.0,...,2,26,2017,7,30,30,3,10,31,24
00031cbbe6,48,29,9344.993547,6.0,616.8,0.0,-126.0,0.0,4.405714,0.0,...,6,28,2017,5,21,21,2,11,42,54
00035a21d9,69,3,4246.954662,6.0,259.2,0.0,-104.0,-390.0,9.969231,0.0,...,5,9,2019,2,6,6,1,11,33,8
00042a927a,55,17,13078.579713,1.0,336.8,0.0,-1614.0,-150.0,4.881159,0.0,...,3,19,2017,10,42,42,4,13,33,55


In [5]:
X_control_train.dtypes

age                                  int64
n_transactions                       int64
stddev_transaction_time            float64
mode_transaction_weekday           float64
sum_regular_points_received        float64
sum_express_points_received        float64
sum_regular_points_spent           float64
sum_express_points_spent           float64
avg_regular_points_received        float64
avg_express_points_received        float64
avg_regular_points_spent           float64
avg_express_points_spent           float64
stdddev_regular_points_received    float64
stdddev_express_points_received    float64
stdddev_regular_points_spent       float64
stdddev_express_points_spent       float64
sum_purchase_sum                   float64
avg_purchase_sum                   float64
stddev_purchase_sum                float64
sum_product_quantity               float64
avg_product_quantity               float64
stddev_product_quantity            float64
sum_trn_sum_from_iss               float64
avg_trn_sum

In [4]:
y_control_train.head()

client_id
000220a0a7    1
0002ce2217    1
00031cbbe6    1
00035a21d9    0
00042a927a    1
Name: target, dtype: int64

In [6]:
X_treatment_train.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_weekday,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000036f903,72,32,3027.667379,0.0,348.3,360.0,0.0,0.0,2.15,2.222222,...,0,10,2017,4,15,15,2,10,35,56
00010925a5,83,18,7944.017131,2.0,163.0,0.0,-85.0,0.0,2.089744,0.0,...,1,24,2018,7,30,30,3,10,54,13
0001f552b0,33,15,13764.619367,6.0,552.2,0.0,0.0,0.0,6.42093,0.0,...,4,30,2017,6,26,26,2,12,45,6
00038f9200,79,48,7773.580823,0.0,351.7,0.0,-87.0,0.0,2.225949,0.0,...,4,7,2018,12,49,49,4,10,4,29
00047b3720,53,35,8019.326664,0.0,1399.4,0.0,-20.0,0.0,5.665587,0.0,...,5,13,2018,10,41,41,4,11,49,58


In [7]:
y_treatment_train.head()

client_id
000036f903    1
00010925a5    1
0001f552b0    1
00038f9200    1
00047b3720    0
Name: target, dtype: int64

In [8]:
X_valid = pd.read_csv("../data/processed/two_models/X_valid.csv", index_col="client_id")
y_valid = pd.read_csv(
    "../data/processed/two_models/y_valid.csv",
    header=None,
    names=["client_id", "target"],
    index_col="client_id"
)["target"]

valid_is_treatment = pd.read_csv(
    "../data/processed/two_models/valid_is_treatment.csv",
    header=None,
    names=["client_id", "is_treatment"],
    index_col="client_id"
)["is_treatment"]

In [10]:
X_valid.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_weekday,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ac126a8be6,27,7,13044.929018,1.0,1169.8,0.0,0.0,0.0,13.762353,0.0,...,3,22,2017,6,25,25,2,12,10,48
ba4a67aba7,81,21,3820.134196,1.0,269.0,0.0,0.0,0.0,4.138462,0.0,...,0,25,2017,12,52,52,4,7,56,59
b2fc4ea450,18,11,10384.181488,1.0,36.6,0.0,0.0,0.0,0.963158,0.0,...,1,6,2018,3,10,10,1,13,31,22
67dc2d5e46,31,3,8024.53715,0.0,79.9,0.0,0.0,0.0,4.7,0.0,...,0,19,2017,6,25,25,2,12,10,31
7b061de2b1,57,8,9716.485661,0.0,48.3,0.0,0.0,0.0,2.0125,0.0,...,2,12,2017,7,28,28,3,13,9,38


In [13]:
assert X_control_train.columns.tolist() == X_treatment_train.columns.tolist() == X_valid.columns.tolist()

In [14]:
X_test = pd.read_csv("../data/processed/two_models/X_test.csv", index_col="client_id")
X_test.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_weekday,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000048b7a6,68,8,3559.363432,6.0,218.8,0.0,0.0,0.0,3.907143,0.0,...,5,15,2018,12,50,50,4,10,14,12
000073194a,60,17,5256.161881,4.0,518.8,0.0,-576.0,0.0,6.326829,0.0,...,1,23,2017,5,21,21,2,10,17,26
00007c7133,67,11,12254.384311,6.0,459.7,0.0,-1680.0,0.0,5.538554,0.0,...,0,22,2017,5,21,21,2,13,30,51
00007f9014,45,29,10474.665113,5.0,263.2,0.0,-635.0,0.0,2.371171,0.0,...,1,22,2017,8,34,34,3,14,19,23
0000a90cf7,45,35,12898.474409,4.0,1087.9,0.0,-983.0,0.0,6.216571,0.0,...,5,20,2017,5,20,20,2,11,14,55


In [15]:
assert X_control_train.columns.tolist() == X_treatment_train.columns.tolist() == X_valid.columns.tolist() == X_test.columns.tolist()

In [16]:
X_control_train["is_treatment"] = 0
X_treatment_train["is_treatment"] = 1
X_train = pd.concat([X_control_train, X_treatment_train], ignore_index=False)
X_train.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds,is_treatment
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000220a0a7,115,15,7516.482892,2.0,896.6,0.0,0.0,0.0,10.548235,0.0,...,9,2017,12,49,49,4,15,49,39,0
0002ce2217,38,13,11973.352672,6.0,219.4,0.0,0.0,0.0,3.047222,0.0,...,26,2017,7,30,30,3,10,31,24,0
00031cbbe6,48,29,9344.993547,6.0,616.8,0.0,-126.0,0.0,4.405714,0.0,...,28,2017,5,21,21,2,11,42,54,0
00035a21d9,69,3,4246.954662,6.0,259.2,0.0,-104.0,-390.0,9.969231,0.0,...,9,2019,2,6,6,1,11,33,8,0
00042a927a,55,17,13078.579713,1.0,336.8,0.0,-1614.0,-150.0,4.881159,0.0,...,19,2017,10,42,42,4,13,33,55,0


In [21]:
X_train["target"] = pd.concat([y_control_train, y_treatment_train], ignore_index=False)

In [27]:
X_train["Z"] = (
    (
        (X_train["is_treatment"] == 1) & (X_train["target"] == 1)
    ) | (
        (X_train["is_treatment"] == 0) & (X_train["target"] == 0)
    )
)
X_train.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds,is_treatment,target,Z
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000220a0a7,115,15,7516.482892,2.0,896.6,0.0,0.0,0.0,10.548235,0.0,...,12,49,49,4,15,49,39,0,1,False
0002ce2217,38,13,11973.352672,6.0,219.4,0.0,0.0,0.0,3.047222,0.0,...,7,30,30,3,10,31,24,0,1,False
00031cbbe6,48,29,9344.993547,6.0,616.8,0.0,-126.0,0.0,4.405714,0.0,...,5,21,21,2,11,42,54,0,1,False
00035a21d9,69,3,4246.954662,6.0,259.2,0.0,-104.0,-390.0,9.969231,0.0,...,2,6,6,1,11,33,8,0,0,True
00042a927a,55,17,13078.579713,1.0,336.8,0.0,-1614.0,-150.0,4.881159,0.0,...,10,42,42,4,13,33,55,0,1,False


In [28]:
X_train["Z"] = X_train["Z"].astype(int)
X_train.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds,is_treatment,target,Z
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000220a0a7,115,15,7516.482892,2.0,896.6,0.0,0.0,0.0,10.548235,0.0,...,12,49,49,4,15,49,39,0,1,0
0002ce2217,38,13,11973.352672,6.0,219.4,0.0,0.0,0.0,3.047222,0.0,...,7,30,30,3,10,31,24,0,1,0
00031cbbe6,48,29,9344.993547,6.0,616.8,0.0,-126.0,0.0,4.405714,0.0,...,5,21,21,2,11,42,54,0,1,0
00035a21d9,69,3,4246.954662,6.0,259.2,0.0,-104.0,-390.0,9.969231,0.0,...,2,6,6,1,11,33,8,0,0,1
00042a927a,55,17,13078.579713,1.0,336.8,0.0,-1614.0,-150.0,4.881159,0.0,...,10,42,42,4,13,33,55,0,1,0


In [30]:
y_train = X_train["Z"]
X_train = X_train.drop(["is_treatment", "target", "Z"], axis=1)
X_train.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_weekday,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000220a0a7,115,15,7516.482892,2.0,896.6,0.0,0.0,0.0,10.548235,0.0,...,5,9,2017,12,49,49,4,15,49,39
0002ce2217,38,13,11973.352672,6.0,219.4,0.0,0.0,0.0,3.047222,0.0,...,2,26,2017,7,30,30,3,10,31,24
00031cbbe6,48,29,9344.993547,6.0,616.8,0.0,-126.0,0.0,4.405714,0.0,...,6,28,2017,5,21,21,2,11,42,54
00035a21d9,69,3,4246.954662,6.0,259.2,0.0,-104.0,-390.0,9.969231,0.0,...,5,9,2019,2,6,6,1,11,33,8
00042a927a,55,17,13078.579713,1.0,336.8,0.0,-1614.0,-150.0,4.881159,0.0,...,3,19,2017,10,42,42,4,13,33,55


In [31]:
X_train.columns.tolist() == X_test.columns.tolist()

True

In [38]:
valid_is_treatment.index.tolist() == y_valid.index.tolist()

True

In [33]:
X_valid["Z"] = (
    (
        (valid_is_treatment == 1) & (y_valid == 1)
    ) | (
        (valid_is_treatment == 0) & (y_valid == 0)
    )
)
X_valid["Z"] = X_valid["Z"].astype(int)
y_valid = X_valid["Z"]
X_valid = X_valid.drop(["Z"], axis=1)
X_valid.head()

Unnamed: 0_level_0,age,n_transactions,stddev_transaction_time,mode_transaction_weekday,sum_regular_points_received,sum_express_points_received,sum_regular_points_spent,sum_express_points_spent,avg_regular_points_received,avg_express_points_received,...,first_issue_weekday,first_issue_dayofmonth,first_issue_year,first_issue_month,first_issue_weekofyear,first_issue_week,first_issue_quarter,avg_transaction_hour,avg_transaction_minute,avg_transaction_seconds
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ac126a8be6,27,7,13044.929018,1.0,1169.8,0.0,0.0,0.0,13.762353,0.0,...,3,22,2017,6,25,25,2,12,10,48
ba4a67aba7,81,21,3820.134196,1.0,269.0,0.0,0.0,0.0,4.138462,0.0,...,0,25,2017,12,52,52,4,7,56,59
b2fc4ea450,18,11,10384.181488,1.0,36.6,0.0,0.0,0.0,0.963158,0.0,...,1,6,2018,3,10,10,1,13,31,22
67dc2d5e46,31,3,8024.53715,0.0,79.9,0.0,0.0,0.0,4.7,0.0,...,0,19,2017,6,25,25,2,12,10,31
7b061de2b1,57,8,9716.485661,0.0,48.3,0.0,0.0,0.0,2.0125,0.0,...,2,12,2017,7,28,28,3,13,9,38


In [34]:
X_train.columns.tolist() == X_valid.columns.tolist() == X_test.columns.tolist()

True

In [35]:
clf = RandomForestClassifier()
X_train = X_train.fillna(-999)
X_valid = X_valid.fillna(-999)
clf.fit(X_train, y_train)
print(clf.score(X_train, y_train))
print(f"Accuracy on validation set: {clf.score(X_valid, y_valid)}")



0.9860955387175331
Accuracy on validation set: 0.47280543891221755


In [39]:
eval_set = [(X_train, y_train), (X_valid, y_valid)]
eval_metric = ["auc","error"]
model = xgb.XGBClassifier()
model.fit(X_train, y_train, eval_metric=eval_metric, eval_set=eval_set, verbose=True)

[0]	validation_0-auc:0.513712	validation_0-error:0.482943	validation_1-auc:0.450605	validation_1-error:0.43503
[1]	validation_0-auc:0.51543	validation_0-error:0.482307	validation_1-auc:0.433437	validation_1-error:0.435146
[2]	validation_0-auc:0.517918	validation_0-error:0.482336	validation_1-auc:0.402781	validation_1-error:0.431964
[3]	validation_0-auc:0.518685	validation_0-error:0.483457	validation_1-auc:0.394857	validation_1-error:0.383123
[4]	validation_0-auc:0.520207	validation_0-error:0.483164	validation_1-auc:0.407901	validation_1-error:0.40037
[5]	validation_0-auc:0.520415	validation_0-error:0.482514	validation_1-auc:0.408595	validation_1-error:0.404269
[6]	validation_0-auc:0.520477	validation_0-error:0.482386	validation_1-auc:0.4107	validation_1-error:0.403003
[7]	validation_0-auc:0.522019	validation_0-error:0.482536	validation_1-auc:0.407879	validation_1-error:0.403019
[8]	validation_0-auc:0.522588	validation_0-error:0.482221	validation_1-auc:0.412747	validation_1-error:0.4030

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [40]:
model.score(X_train, y_train)

0.5314117991530205

In [41]:
model.score(X_valid, y_valid)

0.5735686196094114