Basic imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (
    KFold,
    cross_val_score,
    train_test_split,
    GridSearchCV,
    StratifiedKFold,
)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
)
from lightgbm import LGBMClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier

from bayes_opt import BayesianOptimization
from skopt import BayesSearchCV
import lightgbm as lgb

warnings.filterwarnings("ignore")
%load_ext nb_black

RANDOM_STATE = 7

plt.style.use("ggplot")

<IPython.core.display.Javascript object>

Importing data

In [2]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
under_train_df = pd.read_csv("train_under.csv")

<IPython.core.display.Javascript object>

### 2-5 Modeling.

In [20]:
# extracting X and y from dataframes (unbalanced dataset)
X = train_df.iloc[:, 2:].values
y = train_df.TARGET.values

# extracting X and y from dataframes (balanced dataset)
X_under = under_train_df.iloc[:, 1:].values
y_under = under_train_df.TARGET.values

# extracting X from test dataframe
X_val = test_df.iloc[:, 1:].values

<IPython.core.display.Javascript object>

In [21]:
# splitting X and y to train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=RANDOM_STATE
)

X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(
    X_under, y_under, test_size=0.2, random_state=RANDOM_STATE
)

<IPython.core.display.Javascript object>

**2-5-1** Building baseline model - Dummy Classifier and finding its performance.

In [5]:
dummy_clf = DummyClassifier(strategy="most_frequent")
dummy_clf.fit(X_train, y_train)

y_pred = dummy_clf.predict(X_test)

<IPython.core.display.Javascript object>

In [6]:
def clf_performance_metrics(y_test: np.array, y_pred: np.array) -> tuple:
    """By giving y_test and y_pred, returns classifiers accuracy, precision, recall, f1"""
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return accuracy, precision, recall, f1

<IPython.core.display.Javascript object>

In [7]:
perf_metrics = clf_performance_metrics(y_test, y_pred)
perf_metrics

(0.9192486797263196, 0.0, 0.0, 0.0)

<IPython.core.display.Javascript object>

In [8]:
y_proba = dummy_clf.predict_proba(X_val)

<IPython.core.display.Javascript object>

In [9]:
def to_csv(y_pred_proba: list, csv_name: str) -> None:
    """Saves predictions to csv file with specified name"""
    df = pd.DataFrame()
    df["SK_ID_CURR"] = test_df.SK_ID_CURR
    df["TARGET"] = y_pred_proba
    df.to_csv(csv_name + ".csv", index=False)

<IPython.core.display.Javascript object>

In [10]:
to_csv(y_proba[:, 1], "dummy")

<IPython.core.display.Javascript object>

**2-5-2** Creating dataframe to keep tracking information about model performance.

In [11]:
models_df = pd.DataFrame(
    columns=[
        "model",
        "balanced_dataset",
        "features",
        "hyperparameters",
        "accuracy",
        "precision",
        "recall",
        "f1",
        "Kaggle",
    ]
)

<IPython.core.display.Javascript object>

In [12]:
new_row = {
    "model": "DummyClassifier",
    "balanced_dataset": False,
    "features": "All",
    "hyperparameters": None,
    "accuracy": perf_metrics[0],
    "precision": perf_metrics[1],
    "recall": perf_metrics[2],
    "f1": perf_metrics[3],
    "Kaggle": 0.5,
}
models_df = models_df.append(new_row, ignore_index=True)

<IPython.core.display.Javascript object>

**2-5-3** Building basic models without selector and hyperparameters tuning on unbalanced dataset.

In [13]:
# 5 models will be used: LogisticReg, RFC, KNNC, LightGBM
models = [
    LogisticRegression(random_state=RANDOM_STATE),
    RandomForestClassifier(random_state=RANDOM_STATE),
    KNeighborsClassifier(),
    LGBMClassifier(random_state=RANDOM_STATE),
]
model_names = ["logreg", "rfc", "knn", "lgbm"]

for counter in range(len(models)):
    # training model
    model = models[counter]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # measuring performance
    perf_metrics = clf_performance_metrics(y_test, y_pred)
    y_proba = model.predict_proba(X_val)

    # saving to csv
    to_csv(y_proba[:, 1], "kaggle_" + model_names[counter] + str(1))

    # exporting performance data to models_df
    new_row = {
        "model": model_names[counter],
        "balanced_dataset": False,
        "features": "All",
        "hyperparameters": None,
        "accuracy": perf_metrics[0],
        "precision": perf_metrics[1],
        "recall": perf_metrics[2],
        "f1": perf_metrics[3],
        "Kaggle": None,
    }
    models_df = models_df.append(new_row, ignore_index=True)

<IPython.core.display.Javascript object>

In [14]:
# updating Kaggle scores
models_df.iloc[1:5, -1] = [0.7539, 0.7174, 0.5871, 0.7731]

<IPython.core.display.Javascript object>

In [15]:
models_df[models_df.balanced_dataset == False]

Unnamed: 0,model,balanced_dataset,features,hyperparameters,accuracy,precision,recall,f1,Kaggle
0,DummyClassifier,False,All,,0.919249,0.0,0.0,0.0,0.5
1,logreg,False,All,,0.919522,0.520154,0.043653,0.080547,0.7539
2,rfc,False,All,,0.919418,0.551181,0.011276,0.022099,0.7174
3,knn,False,All,,0.911535,0.218424,0.037049,0.063352,0.5871
4,lgbm,False,All,,0.920003,0.590625,0.030445,0.057904,0.7731


<IPython.core.display.Javascript object>

LGBM won the fight so far, however score needs to be improved.

**2-5-4** Building basic models without selector and hyperparameters tuning on balanced dataset.

In [16]:
# 4 models will be used: LogisticReg, RFC, KNNC, LightGBM
models = [
    LogisticRegression(random_state=RANDOM_STATE),
    RandomForestClassifier(random_state=RANDOM_STATE),
    KNeighborsClassifier(),
    LGBMClassifier(random_state=RANDOM_STATE),
]

for counter in range(len(models)):
    # training model
    model = models[counter]
    model.fit(X_train_under, y_train_under)
    y_pred = model.predict(X_test_under)

    # measuring performance
    perf_metrics = clf_performance_metrics(y_test_under, y_pred)
    y_proba = model.predict_proba(X_val)

    # saving to csv
    to_csv(y_proba[:, 1], "kaggle_bal_" + model_names[counter] + str(1))

    # exporting performance data to models_df
    new_row = {
        "model": model_names[counter],
        "balanced_dataset": True,
        "features": "All",
        "hyperparameters": None,
        "accuracy": perf_metrics[0],
        "precision": perf_metrics[1],
        "recall": perf_metrics[2],
        "f1": perf_metrics[3],
        "Kaggle": None,
    }
    models_df = models_df.append(new_row, ignore_index=True)

<IPython.core.display.Javascript object>

In [17]:
# updating Kaggle scores
models_df.iloc[5:, -1] = [0.7591, 0.7358, 0.6452, 0.7707]

<IPython.core.display.Javascript object>

In [18]:
models_df[models_df.balanced_dataset == True]

Unnamed: 0,model,balanced_dataset,features,hyperparameters,accuracy,precision,recall,f1,Kaggle
5,logreg,True,All,,0.701309,0.708428,0.689116,0.698638,0.7591
6,rfc,True,All,,0.682175,0.686167,0.67709,0.681598,0.7358
7,knn,True,All,,0.611078,0.607109,0.640208,0.62322,0.6452
8,lgbm,True,All,,0.713293,0.71532,0.713169,0.714243,0.7707


<IPython.core.display.Javascript object>

Kaggle score improved: RFC, KNN, Logistic Regression

Kaggle score dropped: LightGBM

Still need to improve it.

**2-5-5** Models with features selectors on unbalanced dataset. KNeighborsClassifier does not work with the selector.

In [22]:
# 3 models will be used: LogisticReg, RFC, LightGBM
models = [
    LogisticRegression(random_state=RANDOM_STATE),
    RandomForestClassifier(random_state=RANDOM_STATE),
    LGBMClassifier(random_state=RANDOM_STATE),
]
model_names = ["logreg", "rfc", "lgbm"]

for counter in range(len(models)):
    # training selector
    try:
        selector = SelectFromModel(models[counter])
        selector.fit(X_train, y_train)
    except:
        selector = SelectFromModel(LogisticRegression())
        selector.fit(X_train, y_train)

    # training model
    model = models[counter]
    model.fit(X_train[:, selector.get_support()], y_train)
    y_pred = model.predict(X_test[:, selector.get_support()])

    # measuring performance
    perf_metrics = clf_performance_metrics(y_test, y_pred)
    y_proba = model.predict_proba(X_val[:, selector.get_support()])

    # saving to csv
    to_csv(y_proba[:, 1], "kaggle_sel_un_" + model_names[counter] + str(1))

    # exporting performance data to models_df
    new_row = {
        "model": model_names[counter],
        "balanced_dataset": False,
        "features": "Selected",
        "hyperparameters": None,
        "accuracy": perf_metrics[0],
        "precision": perf_metrics[1],
        "recall": perf_metrics[2],
        "f1": perf_metrics[3],
        "Kaggle": None,
    }
    models_df = models_df.append(new_row, ignore_index=True)

<IPython.core.display.Javascript object>

In [23]:
# updating Kaggle scores
models_df.iloc[9:, -1] = [0.7554, 0.7124, 0.7733]

<IPython.core.display.Javascript object>

In [24]:
models_df[models_df.features == "Selected"]

Unnamed: 0,model,balanced_dataset,features,hyperparameters,accuracy,precision,recall,f1,Kaggle
9,logreg,False,Selected,,0.9196,0.528067,0.040915,0.075946,0.7554
10,rfc,False,Selected,,0.919444,0.559055,0.011437,0.022415,0.7124
11,lgbm,False,Selected,,0.91986,0.569733,0.030928,0.058671,0.7733


<IPython.core.display.Javascript object>

**2-5-6** Models with features selectors on balanced dataset.

In [25]:
# 3 models will be used: LogisticReg, RFC, LightGBM
models = [
    LogisticRegression(random_state=RANDOM_STATE),
    RandomForestClassifier(random_state=RANDOM_STATE),
    LGBMClassifier(random_state=RANDOM_STATE),
]
model_names = ["logreg", "rfc", "lgbm"]

for counter in range(len(models)):
    # training selector
    try:
        selector = SelectFromModel(models[counter])
        selector.fit(X_train_under, y_train_under)
    except:
        selector = SelectFromModel(LogisticRegression())
        selector.fit(X_train_under, y_train_under)

    # training model
    model = models[counter]
    model.fit(X_train_under[:, selector.get_support()], y_train_under)
    y_pred = model.predict(X_test_under[:, selector.get_support()])

    # measuring performance
    perf_metrics = clf_performance_metrics(y_test_under, y_pred)
    y_proba = model.predict_proba(X_val[:, selector.get_support()])

    # saving to csv
    to_csv(y_proba[:, 1], "kaggle_sel_bal_" + model_names[counter] + str(1))

    # exporting performance data to models_df
    new_row = {
        "model": model_names[counter],
        "balanced_dataset": True,
        "features": "Selected",
        "hyperparameters": None,
        "accuracy": perf_metrics[0],
        "precision": perf_metrics[1],
        "recall": perf_metrics[2],
        "f1": perf_metrics[3],
        "Kaggle": None,
    }
    models_df = models_df.append(new_row, ignore_index=True)

<IPython.core.display.Javascript object>

In [26]:
# updating Kaggle scores
models_df.iloc[12:15, -1] = [0.7574, 0.7407, 0.7706]

<IPython.core.display.Javascript object>

In [27]:
models_df[(models_df.features == "Selected") & (models_df.balanced_dataset == True)]

Unnamed: 0,model,balanced_dataset,features,hyperparameters,accuracy,precision,recall,f1,Kaggle
12,logreg,True,Selected,,0.699799,0.707781,0.685508,0.696467,0.7574
13,rfc,True,Selected,,0.692447,0.700103,0.678493,0.689129,0.7407
14,lgbm,True,Selected,,0.709164,0.711496,0.708358,0.709924,0.7706


<IPython.core.display.Javascript object>

LGBM worked best without selector and with imbalanced dataset.

**2-5-7** LGBM hyperparameters tuning using Bayesian Optimization (GridSearch does not work because model is not from ScikitLearn).

In [28]:
def lgbm_evaluation(
    num_leaves,
    max_depth,
    min_split_gain,
    min_child_weight,
    min_child_samples,
    reg_alpha,
    reg_lambda,
):
    """
    Objective function for Bayesian Optimization of LightGBM's Hyperparamters. Takes the hyperparameters as input, and
    returns the Cross-Validation AUC as output.
    """

    params = {
        "objective": "binary",
        "boosting_type": "gbdt",
        "learning_rate": 0.005,
        "n_estimators": 10000,
        "n_jobs": -1,
        "num_leaves": int(round(num_leaves)),
        "max_depth": int(round(max_depth)),
        "min_split_gain": min_split_gain,
        "min_child_weight": min_child_weight,
        "min_child_samples": int(round(min_child_samples)),
        "reg_alpha": reg_alpha,
        "reg_lambda": reg_lambda,
        "verbosity": -1,
        "seed": RANDOM_STATE,
    }
    stratified_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)

    cv_preds = np.zeros(X_train.shape[0])
    for train_indices, cv_indices in stratified_cv.split(X_train, y_train):

        x_tr = X_train.iloc[train_indices]
        y_tr = y_train.iloc[train_indices]
        x_cv = X_train.iloc[cv_indices]
        y_cv = y_train.iloc[cv_indices]

        lgbm_clf = lgb.LGBMClassifier(**params)
        lgbm_clf.fit(
            x_tr,
            y_tr,
            eval_set=[(x_cv, y_cv)],
            eval_metric="auc",
            verbose=False,
            early_stopping_rounds=200,
        )

        cv_preds[cv_indices] = lgbm_clf.predict_proba(
            x_cv, num_iteration=lgbm_clf.best_iteration_
        )[:, 1]

    return roc_auc_score(target_train, cv_preds)

<IPython.core.display.Javascript object>

In [29]:
bopt_lgbm = BayesianOptimization(
    lgbm_evaluation,
    {
        "num_leaves": (25, 50),
        "max_depth": (4, 11),
        "min_split_gain": (0, 0.1),
        "min_child_weight": (5, 80),
        "min_child_samples": (5, 80),
        "reg_alpha": (0.001, 0.3),
        "reg_lambda": (0.001, 0.3),
    },
    random_state=RANDOM_STATE,
)

# bayesian_optimization = bopt_lgbm.maximize(n_iter=6, init_points=4)

<IPython.core.display.Javascript object>

In [30]:
# extracting the best parameters
target_values = []
# for result in bopt_lgbm.res:
# target_values.append(result['target'])
# if result['target'] == max(target_values):
# best_params = result['params']

# print("Best Hyperparameters obtained are:\n")
# print(best_params)

<IPython.core.display.Javascript object>

**2-5-8** Training on optimized parameters.

In [31]:
params = {
    "learning_rate": 0.01,
    "n_estimators": 4750,
    "n_jobs": -1,
    "objective": "binary",
    "num_leaves": 39,
    "max_depth": 10,
    "boosting_type": "gbdt",
    "min_split_gain": 0.030820727751758883,
    "min_child_weight": 30.074868967458226,
    "min_child_samples": 31,
    "reg_alpha": 0.15663020002553255,
    "reg_lambda": 0.22503178038757748,
    "verbosity": -1,
    "metric": "auc",
    "seed": RANDOM_STATE,
    "feature_fraction": 0.1,
}

<IPython.core.display.Javascript object>

In [32]:
training_dataset = lgb.Dataset(X_train, label=y_train)
testing_dataset = lgb.Dataset(X_test, label=y_test)

<IPython.core.display.Javascript object>

In [33]:
clf = lgb.train(
    params,
    training_dataset,
    valid_sets=[training_dataset, testing_dataset],
)

[1]	training's auc: 0.71691	valid_1's auc: 0.710271
[2]	training's auc: 0.75442	valid_1's auc: 0.747887
[3]	training's auc: 0.750402	valid_1's auc: 0.742777
[4]	training's auc: 0.748001	valid_1's auc: 0.740192
[5]	training's auc: 0.745851	valid_1's auc: 0.738076
[6]	training's auc: 0.7445	valid_1's auc: 0.736491
[7]	training's auc: 0.742605	valid_1's auc: 0.734697
[8]	training's auc: 0.742075	valid_1's auc: 0.734509
[9]	training's auc: 0.742135	valid_1's auc: 0.734483
[10]	training's auc: 0.747942	valid_1's auc: 0.740503
[11]	training's auc: 0.746659	valid_1's auc: 0.739327
[12]	training's auc: 0.745487	valid_1's auc: 0.73825
[13]	training's auc: 0.74501	valid_1's auc: 0.737577
[14]	training's auc: 0.744366	valid_1's auc: 0.736924
[15]	training's auc: 0.744075	valid_1's auc: 0.736677
[16]	training's auc: 0.743421	valid_1's auc: 0.736059
[17]	training's auc: 0.742956	valid_1's auc: 0.735528
[18]	training's auc: 0.746151	valid_1's auc: 0.738805
[19]	training's auc: 0.746103	valid_1's auc

[152]	training's auc: 0.763973	valid_1's auc: 0.751868
[153]	training's auc: 0.764115	valid_1's auc: 0.751957
[154]	training's auc: 0.764188	valid_1's auc: 0.751988
[155]	training's auc: 0.764395	valid_1's auc: 0.752166
[156]	training's auc: 0.764468	valid_1's auc: 0.752212
[157]	training's auc: 0.764686	valid_1's auc: 0.75238
[158]	training's auc: 0.764782	valid_1's auc: 0.752429
[159]	training's auc: 0.764885	valid_1's auc: 0.75247
[160]	training's auc: 0.765054	valid_1's auc: 0.752604
[161]	training's auc: 0.765179	valid_1's auc: 0.752678
[162]	training's auc: 0.765268	valid_1's auc: 0.752711
[163]	training's auc: 0.765357	valid_1's auc: 0.752764
[164]	training's auc: 0.765397	valid_1's auc: 0.752777
[165]	training's auc: 0.765698	valid_1's auc: 0.753057
[166]	training's auc: 0.765998	valid_1's auc: 0.753335
[167]	training's auc: 0.766074	valid_1's auc: 0.753381
[168]	training's auc: 0.766171	valid_1's auc: 0.753461
[169]	training's auc: 0.766192	valid_1's auc: 0.753466
[170]	traini

[303]	training's auc: 0.780823	valid_1's auc: 0.763374
[304]	training's auc: 0.780881	valid_1's auc: 0.76338
[305]	training's auc: 0.780968	valid_1's auc: 0.763444
[306]	training's auc: 0.781145	valid_1's auc: 0.763598
[307]	training's auc: 0.781247	valid_1's auc: 0.763637
[308]	training's auc: 0.781327	valid_1's auc: 0.763686
[309]	training's auc: 0.781389	valid_1's auc: 0.763718
[310]	training's auc: 0.781475	valid_1's auc: 0.763771
[311]	training's auc: 0.781552	valid_1's auc: 0.763823
[312]	training's auc: 0.781717	valid_1's auc: 0.763958
[313]	training's auc: 0.781833	valid_1's auc: 0.764036
[314]	training's auc: 0.781983	valid_1's auc: 0.764166
[315]	training's auc: 0.782069	valid_1's auc: 0.764212
[316]	training's auc: 0.782225	valid_1's auc: 0.76435
[317]	training's auc: 0.782335	valid_1's auc: 0.764419
[318]	training's auc: 0.782432	valid_1's auc: 0.76449
[319]	training's auc: 0.782529	valid_1's auc: 0.764548
[320]	training's auc: 0.782605	valid_1's auc: 0.764595
[321]	trainin

[454]	training's auc: 0.793541	valid_1's auc: 0.771004
[455]	training's auc: 0.793596	valid_1's auc: 0.771027
[456]	training's auc: 0.793658	valid_1's auc: 0.771048
[457]	training's auc: 0.793715	valid_1's auc: 0.771065
[458]	training's auc: 0.793763	valid_1's auc: 0.771069
[459]	training's auc: 0.793815	valid_1's auc: 0.771087
[460]	training's auc: 0.793869	valid_1's auc: 0.771097
[461]	training's auc: 0.793954	valid_1's auc: 0.771156
[462]	training's auc: 0.794018	valid_1's auc: 0.771181
[463]	training's auc: 0.794092	valid_1's auc: 0.771219
[464]	training's auc: 0.794148	valid_1's auc: 0.771236
[465]	training's auc: 0.794239	valid_1's auc: 0.771301
[466]	training's auc: 0.79431	valid_1's auc: 0.771332
[467]	training's auc: 0.794361	valid_1's auc: 0.771358
[468]	training's auc: 0.794425	valid_1's auc: 0.771371
[469]	training's auc: 0.7945	valid_1's auc: 0.771405
[470]	training's auc: 0.794578	valid_1's auc: 0.771431
[471]	training's auc: 0.794654	valid_1's auc: 0.77147
[472]	training

[605]	training's auc: 0.802935	valid_1's auc: 0.775411
[606]	training's auc: 0.803002	valid_1's auc: 0.775432
[607]	training's auc: 0.803062	valid_1's auc: 0.775456
[608]	training's auc: 0.803112	valid_1's auc: 0.775465
[609]	training's auc: 0.803173	valid_1's auc: 0.775488
[610]	training's auc: 0.803239	valid_1's auc: 0.775514
[611]	training's auc: 0.803288	valid_1's auc: 0.77553
[612]	training's auc: 0.803334	valid_1's auc: 0.775548
[613]	training's auc: 0.803387	valid_1's auc: 0.775574
[614]	training's auc: 0.803441	valid_1's auc: 0.775595
[615]	training's auc: 0.803492	valid_1's auc: 0.775605
[616]	training's auc: 0.803531	valid_1's auc: 0.775604
[617]	training's auc: 0.803587	valid_1's auc: 0.775631
[618]	training's auc: 0.803653	valid_1's auc: 0.775687
[619]	training's auc: 0.803707	valid_1's auc: 0.775709
[620]	training's auc: 0.803764	valid_1's auc: 0.775748
[621]	training's auc: 0.803833	valid_1's auc: 0.775793
[622]	training's auc: 0.803886	valid_1's auc: 0.775811
[623]	train

[756]	training's auc: 0.811046	valid_1's auc: 0.778617
[757]	training's auc: 0.811101	valid_1's auc: 0.778624
[758]	training's auc: 0.811141	valid_1's auc: 0.778647
[759]	training's auc: 0.811201	valid_1's auc: 0.778667
[760]	training's auc: 0.81125	valid_1's auc: 0.778687
[761]	training's auc: 0.811292	valid_1's auc: 0.778703
[762]	training's auc: 0.811346	valid_1's auc: 0.77872
[763]	training's auc: 0.811405	valid_1's auc: 0.778749
[764]	training's auc: 0.811442	valid_1's auc: 0.778757
[765]	training's auc: 0.811493	valid_1's auc: 0.77879
[766]	training's auc: 0.81155	valid_1's auc: 0.778811
[767]	training's auc: 0.811615	valid_1's auc: 0.778839
[768]	training's auc: 0.811663	valid_1's auc: 0.778847
[769]	training's auc: 0.811709	valid_1's auc: 0.778866
[770]	training's auc: 0.811752	valid_1's auc: 0.778881
[771]	training's auc: 0.811805	valid_1's auc: 0.778914
[772]	training's auc: 0.811866	valid_1's auc: 0.778955
[773]	training's auc: 0.811926	valid_1's auc: 0.778987
[774]	training

[907]	training's auc: 0.818223	valid_1's auc: 0.781011
[908]	training's auc: 0.818258	valid_1's auc: 0.781011
[909]	training's auc: 0.818302	valid_1's auc: 0.781027
[910]	training's auc: 0.818346	valid_1's auc: 0.781043
[911]	training's auc: 0.818388	valid_1's auc: 0.781049
[912]	training's auc: 0.818432	valid_1's auc: 0.781064
[913]	training's auc: 0.818473	valid_1's auc: 0.781094
[914]	training's auc: 0.81852	valid_1's auc: 0.781104
[915]	training's auc: 0.81857	valid_1's auc: 0.78111
[916]	training's auc: 0.818608	valid_1's auc: 0.781129
[917]	training's auc: 0.818651	valid_1's auc: 0.78114
[918]	training's auc: 0.818699	valid_1's auc: 0.781158
[919]	training's auc: 0.818747	valid_1's auc: 0.781162
[920]	training's auc: 0.818797	valid_1's auc: 0.781177
[921]	training's auc: 0.818838	valid_1's auc: 0.781197
[922]	training's auc: 0.81888	valid_1's auc: 0.781209
[923]	training's auc: 0.818923	valid_1's auc: 0.781226
[924]	training's auc: 0.818968	valid_1's auc: 0.781226
[925]	training'

[1057]	training's auc: 0.824545	valid_1's auc: 0.782771
[1058]	training's auc: 0.824585	valid_1's auc: 0.782792
[1059]	training's auc: 0.824631	valid_1's auc: 0.782802
[1060]	training's auc: 0.824671	valid_1's auc: 0.782823
[1061]	training's auc: 0.824719	valid_1's auc: 0.782835
[1062]	training's auc: 0.824758	valid_1's auc: 0.782842
[1063]	training's auc: 0.824801	valid_1's auc: 0.782859
[1064]	training's auc: 0.824843	valid_1's auc: 0.782874
[1065]	training's auc: 0.824887	valid_1's auc: 0.782887
[1066]	training's auc: 0.824926	valid_1's auc: 0.782906
[1067]	training's auc: 0.824966	valid_1's auc: 0.782917
[1068]	training's auc: 0.825003	valid_1's auc: 0.782926
[1069]	training's auc: 0.825045	valid_1's auc: 0.782939
[1070]	training's auc: 0.825089	valid_1's auc: 0.782956
[1071]	training's auc: 0.825131	valid_1's auc: 0.782958
[1072]	training's auc: 0.825169	valid_1's auc: 0.782965
[1073]	training's auc: 0.825211	valid_1's auc: 0.782979
[1074]	training's auc: 0.825243	valid_1's auc: 0

[1205]	training's auc: 0.830364	valid_1's auc: 0.784251
[1206]	training's auc: 0.830408	valid_1's auc: 0.784257
[1207]	training's auc: 0.830436	valid_1's auc: 0.784256
[1208]	training's auc: 0.830466	valid_1's auc: 0.784261
[1209]	training's auc: 0.830494	valid_1's auc: 0.784264
[1210]	training's auc: 0.830527	valid_1's auc: 0.784279
[1211]	training's auc: 0.830567	valid_1's auc: 0.78429
[1212]	training's auc: 0.830601	valid_1's auc: 0.784297
[1213]	training's auc: 0.830644	valid_1's auc: 0.784302
[1214]	training's auc: 0.830686	valid_1's auc: 0.784308
[1215]	training's auc: 0.830722	valid_1's auc: 0.784325
[1216]	training's auc: 0.830754	valid_1's auc: 0.784329
[1217]	training's auc: 0.830789	valid_1's auc: 0.784335
[1218]	training's auc: 0.83084	valid_1's auc: 0.78435
[1219]	training's auc: 0.830884	valid_1's auc: 0.784358
[1220]	training's auc: 0.830914	valid_1's auc: 0.784358
[1221]	training's auc: 0.830956	valid_1's auc: 0.784371
[1222]	training's auc: 0.830995	valid_1's auc: 0.78

[1352]	training's auc: 0.83558	valid_1's auc: 0.78523
[1353]	training's auc: 0.835614	valid_1's auc: 0.785241
[1354]	training's auc: 0.835641	valid_1's auc: 0.785246
[1355]	training's auc: 0.835665	valid_1's auc: 0.78525
[1356]	training's auc: 0.8357	valid_1's auc: 0.785255
[1357]	training's auc: 0.835748	valid_1's auc: 0.785262
[1358]	training's auc: 0.835777	valid_1's auc: 0.78526
[1359]	training's auc: 0.835809	valid_1's auc: 0.785267
[1360]	training's auc: 0.835843	valid_1's auc: 0.785268
[1361]	training's auc: 0.835879	valid_1's auc: 0.785271
[1362]	training's auc: 0.835906	valid_1's auc: 0.785273
[1363]	training's auc: 0.835945	valid_1's auc: 0.785271
[1364]	training's auc: 0.835983	valid_1's auc: 0.785282
[1365]	training's auc: 0.836023	valid_1's auc: 0.785285
[1366]	training's auc: 0.836067	valid_1's auc: 0.785295
[1367]	training's auc: 0.836106	valid_1's auc: 0.7853
[1368]	training's auc: 0.836144	valid_1's auc: 0.785307
[1369]	training's auc: 0.836174	valid_1's auc: 0.78531
[

[1502]	training's auc: 0.840565	valid_1's auc: 0.786022
[1503]	training's auc: 0.840599	valid_1's auc: 0.786032
[1504]	training's auc: 0.840621	valid_1's auc: 0.786033
[1505]	training's auc: 0.840655	valid_1's auc: 0.78605
[1506]	training's auc: 0.840684	valid_1's auc: 0.786064
[1507]	training's auc: 0.840712	valid_1's auc: 0.786066
[1508]	training's auc: 0.840747	valid_1's auc: 0.786071
[1509]	training's auc: 0.840778	valid_1's auc: 0.786076
[1510]	training's auc: 0.840819	valid_1's auc: 0.786074
[1511]	training's auc: 0.840854	valid_1's auc: 0.786077
[1512]	training's auc: 0.840881	valid_1's auc: 0.786082
[1513]	training's auc: 0.84092	valid_1's auc: 0.786086
[1514]	training's auc: 0.840955	valid_1's auc: 0.78609
[1515]	training's auc: 0.840986	valid_1's auc: 0.786091
[1516]	training's auc: 0.841019	valid_1's auc: 0.786096
[1517]	training's auc: 0.841051	valid_1's auc: 0.786098
[1518]	training's auc: 0.841076	valid_1's auc: 0.786095
[1519]	training's auc: 0.841101	valid_1's auc: 0.78

[1650]	training's auc: 0.845236	valid_1's auc: 0.78674
[1651]	training's auc: 0.845271	valid_1's auc: 0.786742
[1652]	training's auc: 0.845299	valid_1's auc: 0.786741
[1653]	training's auc: 0.845325	valid_1's auc: 0.786746
[1654]	training's auc: 0.845357	valid_1's auc: 0.786742
[1655]	training's auc: 0.845394	valid_1's auc: 0.786749
[1656]	training's auc: 0.845426	valid_1's auc: 0.786754
[1657]	training's auc: 0.845463	valid_1's auc: 0.786766
[1658]	training's auc: 0.845509	valid_1's auc: 0.786766
[1659]	training's auc: 0.845545	valid_1's auc: 0.786788
[1660]	training's auc: 0.845581	valid_1's auc: 0.786791
[1661]	training's auc: 0.845612	valid_1's auc: 0.786794
[1662]	training's auc: 0.845642	valid_1's auc: 0.786791
[1663]	training's auc: 0.84567	valid_1's auc: 0.786791
[1664]	training's auc: 0.845707	valid_1's auc: 0.786795
[1665]	training's auc: 0.845726	valid_1's auc: 0.786799
[1666]	training's auc: 0.84576	valid_1's auc: 0.786796
[1667]	training's auc: 0.845797	valid_1's auc: 0.78

[1796]	training's auc: 0.849552	valid_1's auc: 0.787184
[1797]	training's auc: 0.849582	valid_1's auc: 0.787188
[1798]	training's auc: 0.849603	valid_1's auc: 0.787192
[1799]	training's auc: 0.849638	valid_1's auc: 0.787199
[1800]	training's auc: 0.849666	valid_1's auc: 0.787202
[1801]	training's auc: 0.849686	valid_1's auc: 0.787214
[1802]	training's auc: 0.849704	valid_1's auc: 0.787211
[1803]	training's auc: 0.849733	valid_1's auc: 0.787213
[1804]	training's auc: 0.849763	valid_1's auc: 0.787223
[1805]	training's auc: 0.849788	valid_1's auc: 0.78722
[1806]	training's auc: 0.849814	valid_1's auc: 0.787231
[1807]	training's auc: 0.849831	valid_1's auc: 0.78723
[1808]	training's auc: 0.849861	valid_1's auc: 0.787232
[1809]	training's auc: 0.849892	valid_1's auc: 0.787234
[1810]	training's auc: 0.849913	valid_1's auc: 0.787236
[1811]	training's auc: 0.849939	valid_1's auc: 0.787241
[1812]	training's auc: 0.849964	valid_1's auc: 0.787241
[1813]	training's auc: 0.849999	valid_1's auc: 0.7

[1945]	training's auc: 0.853697	valid_1's auc: 0.787681
[1946]	training's auc: 0.853713	valid_1's auc: 0.787683
[1947]	training's auc: 0.853736	valid_1's auc: 0.787681
[1948]	training's auc: 0.853771	valid_1's auc: 0.787679
[1949]	training's auc: 0.853803	valid_1's auc: 0.787677
[1950]	training's auc: 0.853825	valid_1's auc: 0.787672
[1951]	training's auc: 0.853857	valid_1's auc: 0.787671
[1952]	training's auc: 0.853885	valid_1's auc: 0.787675
[1953]	training's auc: 0.853912	valid_1's auc: 0.787674
[1954]	training's auc: 0.853934	valid_1's auc: 0.787675
[1955]	training's auc: 0.853964	valid_1's auc: 0.787678
[1956]	training's auc: 0.853993	valid_1's auc: 0.787677
[1957]	training's auc: 0.854025	valid_1's auc: 0.787669
[1958]	training's auc: 0.854058	valid_1's auc: 0.787676
[1959]	training's auc: 0.854081	valid_1's auc: 0.787674
[1960]	training's auc: 0.854112	valid_1's auc: 0.787679
[1961]	training's auc: 0.854127	valid_1's auc: 0.787679
[1962]	training's auc: 0.854159	valid_1's auc: 0

[2095]	training's auc: 0.857683	valid_1's auc: 0.787973
[2096]	training's auc: 0.857711	valid_1's auc: 0.787986
[2097]	training's auc: 0.857734	valid_1's auc: 0.787988
[2098]	training's auc: 0.857759	valid_1's auc: 0.787992
[2099]	training's auc: 0.857788	valid_1's auc: 0.787993
[2100]	training's auc: 0.857809	valid_1's auc: 0.787995
[2101]	training's auc: 0.857832	valid_1's auc: 0.787996
[2102]	training's auc: 0.857856	valid_1's auc: 0.787994
[2103]	training's auc: 0.857873	valid_1's auc: 0.787995
[2104]	training's auc: 0.857893	valid_1's auc: 0.787998
[2105]	training's auc: 0.857919	valid_1's auc: 0.788003
[2106]	training's auc: 0.857943	valid_1's auc: 0.788007
[2107]	training's auc: 0.85797	valid_1's auc: 0.788008
[2108]	training's auc: 0.857999	valid_1's auc: 0.788008
[2109]	training's auc: 0.858026	valid_1's auc: 0.78801
[2110]	training's auc: 0.858046	valid_1's auc: 0.788014
[2111]	training's auc: 0.858069	valid_1's auc: 0.788026
[2112]	training's auc: 0.858097	valid_1's auc: 0.7

[2245]	training's auc: 0.861541	valid_1's auc: 0.788332
[2246]	training's auc: 0.86156	valid_1's auc: 0.788332
[2247]	training's auc: 0.861585	valid_1's auc: 0.788326
[2248]	training's auc: 0.861604	valid_1's auc: 0.788336
[2249]	training's auc: 0.861637	valid_1's auc: 0.788338
[2250]	training's auc: 0.86166	valid_1's auc: 0.788342
[2251]	training's auc: 0.861688	valid_1's auc: 0.788348
[2252]	training's auc: 0.861711	valid_1's auc: 0.78835
[2253]	training's auc: 0.861735	valid_1's auc: 0.788352
[2254]	training's auc: 0.861757	valid_1's auc: 0.788351
[2255]	training's auc: 0.861787	valid_1's auc: 0.78835
[2256]	training's auc: 0.86182	valid_1's auc: 0.788352
[2257]	training's auc: 0.861835	valid_1's auc: 0.788357
[2258]	training's auc: 0.861859	valid_1's auc: 0.78836
[2259]	training's auc: 0.861902	valid_1's auc: 0.788368
[2260]	training's auc: 0.861926	valid_1's auc: 0.78837
[2261]	training's auc: 0.861947	valid_1's auc: 0.788366
[2262]	training's auc: 0.861974	valid_1's auc: 0.788362

[2393]	training's auc: 0.865176	valid_1's auc: 0.788532
[2394]	training's auc: 0.865206	valid_1's auc: 0.788534
[2395]	training's auc: 0.865241	valid_1's auc: 0.788529
[2396]	training's auc: 0.865268	valid_1's auc: 0.788533
[2397]	training's auc: 0.86529	valid_1's auc: 0.788538
[2398]	training's auc: 0.865321	valid_1's auc: 0.788543
[2399]	training's auc: 0.865333	valid_1's auc: 0.788542
[2400]	training's auc: 0.865357	valid_1's auc: 0.788534
[2401]	training's auc: 0.865388	valid_1's auc: 0.788539
[2402]	training's auc: 0.865415	valid_1's auc: 0.788538
[2403]	training's auc: 0.865441	valid_1's auc: 0.788543
[2404]	training's auc: 0.865461	valid_1's auc: 0.788541
[2405]	training's auc: 0.865478	valid_1's auc: 0.788548
[2406]	training's auc: 0.865501	valid_1's auc: 0.78855
[2407]	training's auc: 0.865519	valid_1's auc: 0.788553
[2408]	training's auc: 0.865541	valid_1's auc: 0.788554
[2409]	training's auc: 0.865558	valid_1's auc: 0.78855
[2410]	training's auc: 0.86558	valid_1's auc: 0.788

[2541]	training's auc: 0.86869	valid_1's auc: 0.788706
[2542]	training's auc: 0.86871	valid_1's auc: 0.788715
[2543]	training's auc: 0.868737	valid_1's auc: 0.788713
[2544]	training's auc: 0.86876	valid_1's auc: 0.788716
[2545]	training's auc: 0.868778	valid_1's auc: 0.788716
[2546]	training's auc: 0.8688	valid_1's auc: 0.788716
[2547]	training's auc: 0.868827	valid_1's auc: 0.788715
[2548]	training's auc: 0.868855	valid_1's auc: 0.788715
[2549]	training's auc: 0.868879	valid_1's auc: 0.788714
[2550]	training's auc: 0.868904	valid_1's auc: 0.788722
[2551]	training's auc: 0.868918	valid_1's auc: 0.788723
[2552]	training's auc: 0.868938	valid_1's auc: 0.788728
[2553]	training's auc: 0.868962	valid_1's auc: 0.788735
[2554]	training's auc: 0.868991	valid_1's auc: 0.788736
[2555]	training's auc: 0.869017	valid_1's auc: 0.788741
[2556]	training's auc: 0.869038	valid_1's auc: 0.788739
[2557]	training's auc: 0.869062	valid_1's auc: 0.788744
[2558]	training's auc: 0.869104	valid_1's auc: 0.7887

[2688]	training's auc: 0.872156	valid_1's auc: 0.788918
[2689]	training's auc: 0.872185	valid_1's auc: 0.788923
[2690]	training's auc: 0.87221	valid_1's auc: 0.788918
[2691]	training's auc: 0.872238	valid_1's auc: 0.788917
[2692]	training's auc: 0.872249	valid_1's auc: 0.788917
[2693]	training's auc: 0.872264	valid_1's auc: 0.78892
[2694]	training's auc: 0.872287	valid_1's auc: 0.788916
[2695]	training's auc: 0.872318	valid_1's auc: 0.788921
[2696]	training's auc: 0.872338	valid_1's auc: 0.788921
[2697]	training's auc: 0.872359	valid_1's auc: 0.788916
[2698]	training's auc: 0.872378	valid_1's auc: 0.78892
[2699]	training's auc: 0.872401	valid_1's auc: 0.788927
[2700]	training's auc: 0.87242	valid_1's auc: 0.788923
[2701]	training's auc: 0.872443	valid_1's auc: 0.788928
[2702]	training's auc: 0.872472	valid_1's auc: 0.788933
[2703]	training's auc: 0.872496	valid_1's auc: 0.788929
[2704]	training's auc: 0.87252	valid_1's auc: 0.788928
[2705]	training's auc: 0.872549	valid_1's auc: 0.7889

[2836]	training's auc: 0.875462	valid_1's auc: 0.789054
[2837]	training's auc: 0.875486	valid_1's auc: 0.789056
[2838]	training's auc: 0.87551	valid_1's auc: 0.789061
[2839]	training's auc: 0.875543	valid_1's auc: 0.789067
[2840]	training's auc: 0.875566	valid_1's auc: 0.789073
[2841]	training's auc: 0.875585	valid_1's auc: 0.789077
[2842]	training's auc: 0.875612	valid_1's auc: 0.789081
[2843]	training's auc: 0.875634	valid_1's auc: 0.789081
[2844]	training's auc: 0.875662	valid_1's auc: 0.789085
[2845]	training's auc: 0.875681	valid_1's auc: 0.789087
[2846]	training's auc: 0.875699	valid_1's auc: 0.789085
[2847]	training's auc: 0.875719	valid_1's auc: 0.789088
[2848]	training's auc: 0.875737	valid_1's auc: 0.789086
[2849]	training's auc: 0.87576	valid_1's auc: 0.789088
[2850]	training's auc: 0.875767	valid_1's auc: 0.789092
[2851]	training's auc: 0.875792	valid_1's auc: 0.78909
[2852]	training's auc: 0.875818	valid_1's auc: 0.789088
[2853]	training's auc: 0.875839	valid_1's auc: 0.78

[2983]	training's auc: 0.878774	valid_1's auc: 0.789232
[2984]	training's auc: 0.878804	valid_1's auc: 0.78923
[2985]	training's auc: 0.87883	valid_1's auc: 0.789229
[2986]	training's auc: 0.87886	valid_1's auc: 0.789231
[2987]	training's auc: 0.878883	valid_1's auc: 0.789235
[2988]	training's auc: 0.878906	valid_1's auc: 0.789229
[2989]	training's auc: 0.878923	valid_1's auc: 0.789233
[2990]	training's auc: 0.878938	valid_1's auc: 0.789234
[2991]	training's auc: 0.878968	valid_1's auc: 0.78924
[2992]	training's auc: 0.878988	valid_1's auc: 0.789239
[2993]	training's auc: 0.87901	valid_1's auc: 0.789243
[2994]	training's auc: 0.879031	valid_1's auc: 0.789245
[2995]	training's auc: 0.879055	valid_1's auc: 0.789235
[2996]	training's auc: 0.879076	valid_1's auc: 0.789234
[2997]	training's auc: 0.879097	valid_1's auc: 0.78924
[2998]	training's auc: 0.879117	valid_1's auc: 0.789239
[2999]	training's auc: 0.879141	valid_1's auc: 0.789243
[3000]	training's auc: 0.879153	valid_1's auc: 0.78924

[3134]	training's auc: 0.882133	valid_1's auc: 0.789306
[3135]	training's auc: 0.882148	valid_1's auc: 0.789306
[3136]	training's auc: 0.88217	valid_1's auc: 0.789304
[3137]	training's auc: 0.882193	valid_1's auc: 0.7893
[3138]	training's auc: 0.882215	valid_1's auc: 0.789301
[3139]	training's auc: 0.88223	valid_1's auc: 0.789305
[3140]	training's auc: 0.882255	valid_1's auc: 0.789308
[3141]	training's auc: 0.88227	valid_1's auc: 0.789306
[3142]	training's auc: 0.882288	valid_1's auc: 0.789311
[3143]	training's auc: 0.882311	valid_1's auc: 0.789304
[3144]	training's auc: 0.88234	valid_1's auc: 0.78931
[3145]	training's auc: 0.882356	valid_1's auc: 0.78931
[3146]	training's auc: 0.882389	valid_1's auc: 0.789312
[3147]	training's auc: 0.882403	valid_1's auc: 0.789315
[3148]	training's auc: 0.882427	valid_1's auc: 0.78931
[3149]	training's auc: 0.882445	valid_1's auc: 0.789315
[3150]	training's auc: 0.882465	valid_1's auc: 0.789312
[3151]	training's auc: 0.882484	valid_1's auc: 0.78931
[3

[3280]	training's auc: 0.885103	valid_1's auc: 0.789332
[3281]	training's auc: 0.885128	valid_1's auc: 0.789331
[3282]	training's auc: 0.885144	valid_1's auc: 0.789327
[3283]	training's auc: 0.885168	valid_1's auc: 0.78933
[3284]	training's auc: 0.885188	valid_1's auc: 0.789328
[3285]	training's auc: 0.885209	valid_1's auc: 0.789331
[3286]	training's auc: 0.885231	valid_1's auc: 0.789325
[3287]	training's auc: 0.885243	valid_1's auc: 0.789322
[3288]	training's auc: 0.885267	valid_1's auc: 0.789323
[3289]	training's auc: 0.885288	valid_1's auc: 0.789319
[3290]	training's auc: 0.885311	valid_1's auc: 0.78931
[3291]	training's auc: 0.88533	valid_1's auc: 0.789322
[3292]	training's auc: 0.885353	valid_1's auc: 0.789326
[3293]	training's auc: 0.885371	valid_1's auc: 0.789324
[3294]	training's auc: 0.885393	valid_1's auc: 0.789327
[3295]	training's auc: 0.88542	valid_1's auc: 0.789324
[3296]	training's auc: 0.885448	valid_1's auc: 0.789326
[3297]	training's auc: 0.885462	valid_1's auc: 0.789

[3432]	training's auc: 0.888076	valid_1's auc: 0.789376
[3433]	training's auc: 0.888103	valid_1's auc: 0.789375
[3434]	training's auc: 0.888118	valid_1's auc: 0.789375
[3435]	training's auc: 0.88814	valid_1's auc: 0.789371
[3436]	training's auc: 0.888162	valid_1's auc: 0.789377
[3437]	training's auc: 0.888178	valid_1's auc: 0.789375
[3438]	training's auc: 0.888198	valid_1's auc: 0.789375
[3439]	training's auc: 0.888217	valid_1's auc: 0.789371
[3440]	training's auc: 0.888238	valid_1's auc: 0.789368
[3441]	training's auc: 0.888259	valid_1's auc: 0.789363
[3442]	training's auc: 0.888286	valid_1's auc: 0.789356
[3443]	training's auc: 0.888307	valid_1's auc: 0.789351
[3444]	training's auc: 0.888317	valid_1's auc: 0.789352
[3445]	training's auc: 0.888333	valid_1's auc: 0.789347
[3446]	training's auc: 0.888359	valid_1's auc: 0.789345
[3447]	training's auc: 0.888378	valid_1's auc: 0.789358
[3448]	training's auc: 0.888406	valid_1's auc: 0.789357
[3449]	training's auc: 0.888413	valid_1's auc: 0.

[3583]	training's auc: 0.890993	valid_1's auc: 0.789346
[3584]	training's auc: 0.89102	valid_1's auc: 0.789346
[3585]	training's auc: 0.891038	valid_1's auc: 0.78935
[3586]	training's auc: 0.891053	valid_1's auc: 0.789358
[3587]	training's auc: 0.891064	valid_1's auc: 0.789357
[3588]	training's auc: 0.89109	valid_1's auc: 0.789357
[3589]	training's auc: 0.89112	valid_1's auc: 0.789362
[3590]	training's auc: 0.891143	valid_1's auc: 0.789365
[3591]	training's auc: 0.891156	valid_1's auc: 0.789366
[3592]	training's auc: 0.891182	valid_1's auc: 0.789372
[3593]	training's auc: 0.8912	valid_1's auc: 0.789369
[3594]	training's auc: 0.891222	valid_1's auc: 0.789369
[3595]	training's auc: 0.891239	valid_1's auc: 0.789372
[3596]	training's auc: 0.891256	valid_1's auc: 0.789373
[3597]	training's auc: 0.891274	valid_1's auc: 0.789374
[3598]	training's auc: 0.891283	valid_1's auc: 0.789373
[3599]	training's auc: 0.8913	valid_1's auc: 0.789377
[3600]	training's auc: 0.891317	valid_1's auc: 0.789379


[3734]	training's auc: 0.893749	valid_1's auc: 0.789376
[3735]	training's auc: 0.893768	valid_1's auc: 0.789379
[3736]	training's auc: 0.893792	valid_1's auc: 0.789382
[3737]	training's auc: 0.893809	valid_1's auc: 0.789382
[3738]	training's auc: 0.893826	valid_1's auc: 0.789387
[3739]	training's auc: 0.893846	valid_1's auc: 0.789382
[3740]	training's auc: 0.893862	valid_1's auc: 0.789382
[3741]	training's auc: 0.893882	valid_1's auc: 0.789378
[3742]	training's auc: 0.893899	valid_1's auc: 0.789381
[3743]	training's auc: 0.893919	valid_1's auc: 0.789384
[3744]	training's auc: 0.893943	valid_1's auc: 0.789388
[3745]	training's auc: 0.893963	valid_1's auc: 0.789388
[3746]	training's auc: 0.893985	valid_1's auc: 0.789392
[3747]	training's auc: 0.894006	valid_1's auc: 0.789387
[3748]	training's auc: 0.894015	valid_1's auc: 0.789385
[3749]	training's auc: 0.894034	valid_1's auc: 0.789386
[3750]	training's auc: 0.894055	valid_1's auc: 0.789384
[3751]	training's auc: 0.894074	valid_1's auc: 0

[3885]	training's auc: 0.896503	valid_1's auc: 0.789434
[3886]	training's auc: 0.896514	valid_1's auc: 0.789433
[3887]	training's auc: 0.896532	valid_1's auc: 0.789434
[3888]	training's auc: 0.89654	valid_1's auc: 0.789432
[3889]	training's auc: 0.89656	valid_1's auc: 0.789432
[3890]	training's auc: 0.896581	valid_1's auc: 0.789433
[3891]	training's auc: 0.896608	valid_1's auc: 0.789434
[3892]	training's auc: 0.896635	valid_1's auc: 0.789433
[3893]	training's auc: 0.896653	valid_1's auc: 0.789436
[3894]	training's auc: 0.896667	valid_1's auc: 0.789434
[3895]	training's auc: 0.896679	valid_1's auc: 0.789436
[3896]	training's auc: 0.896696	valid_1's auc: 0.789435
[3897]	training's auc: 0.896707	valid_1's auc: 0.789437
[3898]	training's auc: 0.896725	valid_1's auc: 0.789435
[3899]	training's auc: 0.896748	valid_1's auc: 0.789433
[3900]	training's auc: 0.896766	valid_1's auc: 0.789435
[3901]	training's auc: 0.896788	valid_1's auc: 0.789433
[3902]	training's auc: 0.896816	valid_1's auc: 0.7

[4035]	training's auc: 0.899046	valid_1's auc: 0.789429
[4036]	training's auc: 0.899065	valid_1's auc: 0.789429
[4037]	training's auc: 0.899081	valid_1's auc: 0.789429
[4038]	training's auc: 0.899101	valid_1's auc: 0.789426
[4039]	training's auc: 0.899115	valid_1's auc: 0.789425
[4040]	training's auc: 0.89913	valid_1's auc: 0.789422
[4041]	training's auc: 0.899143	valid_1's auc: 0.789423
[4042]	training's auc: 0.899163	valid_1's auc: 0.789417
[4043]	training's auc: 0.899184	valid_1's auc: 0.789418
[4044]	training's auc: 0.899199	valid_1's auc: 0.789424
[4045]	training's auc: 0.899214	valid_1's auc: 0.789429
[4046]	training's auc: 0.899235	valid_1's auc: 0.78943
[4047]	training's auc: 0.899257	valid_1's auc: 0.789429
[4048]	training's auc: 0.899273	valid_1's auc: 0.789425
[4049]	training's auc: 0.899289	valid_1's auc: 0.789425
[4050]	training's auc: 0.899313	valid_1's auc: 0.789426
[4051]	training's auc: 0.89933	valid_1's auc: 0.789431
[4052]	training's auc: 0.899347	valid_1's auc: 0.78

[4183]	training's auc: 0.901591	valid_1's auc: 0.789435
[4184]	training's auc: 0.901618	valid_1's auc: 0.789432
[4185]	training's auc: 0.901642	valid_1's auc: 0.789435
[4186]	training's auc: 0.901657	valid_1's auc: 0.789435
[4187]	training's auc: 0.901679	valid_1's auc: 0.789437
[4188]	training's auc: 0.901682	valid_1's auc: 0.789437
[4189]	training's auc: 0.901696	valid_1's auc: 0.789441
[4190]	training's auc: 0.901711	valid_1's auc: 0.789443
[4191]	training's auc: 0.901718	valid_1's auc: 0.789446
[4192]	training's auc: 0.901738	valid_1's auc: 0.789435
[4193]	training's auc: 0.901761	valid_1's auc: 0.789429
[4194]	training's auc: 0.901782	valid_1's auc: 0.789427
[4195]	training's auc: 0.901801	valid_1's auc: 0.789421
[4196]	training's auc: 0.901826	valid_1's auc: 0.789418
[4197]	training's auc: 0.901848	valid_1's auc: 0.789414
[4198]	training's auc: 0.901867	valid_1's auc: 0.789415
[4199]	training's auc: 0.90189	valid_1's auc: 0.789416
[4200]	training's auc: 0.901911	valid_1's auc: 0.

[4331]	training's auc: 0.904105	valid_1's auc: 0.789299
[4332]	training's auc: 0.904118	valid_1's auc: 0.789297
[4333]	training's auc: 0.904133	valid_1's auc: 0.789298
[4334]	training's auc: 0.90414	valid_1's auc: 0.789297
[4335]	training's auc: 0.904156	valid_1's auc: 0.789299
[4336]	training's auc: 0.904173	valid_1's auc: 0.789296
[4337]	training's auc: 0.904178	valid_1's auc: 0.789299
[4338]	training's auc: 0.90419	valid_1's auc: 0.789303
[4339]	training's auc: 0.904208	valid_1's auc: 0.789305
[4340]	training's auc: 0.904225	valid_1's auc: 0.789309
[4341]	training's auc: 0.904246	valid_1's auc: 0.789314
[4342]	training's auc: 0.904265	valid_1's auc: 0.789314
[4343]	training's auc: 0.904283	valid_1's auc: 0.789312
[4344]	training's auc: 0.904306	valid_1's auc: 0.789321
[4345]	training's auc: 0.904318	valid_1's auc: 0.789322
[4346]	training's auc: 0.904335	valid_1's auc: 0.789322
[4347]	training's auc: 0.904356	valid_1's auc: 0.78933
[4348]	training's auc: 0.904378	valid_1's auc: 0.78

[4479]	training's auc: 0.906387	valid_1's auc: 0.789332
[4480]	training's auc: 0.906405	valid_1's auc: 0.789332
[4481]	training's auc: 0.906415	valid_1's auc: 0.789328
[4482]	training's auc: 0.906432	valid_1's auc: 0.789327
[4483]	training's auc: 0.906447	valid_1's auc: 0.789321
[4484]	training's auc: 0.906463	valid_1's auc: 0.789317
[4485]	training's auc: 0.90648	valid_1's auc: 0.789315
[4486]	training's auc: 0.9065	valid_1's auc: 0.789312
[4487]	training's auc: 0.906527	valid_1's auc: 0.789307
[4488]	training's auc: 0.906537	valid_1's auc: 0.78931
[4489]	training's auc: 0.906555	valid_1's auc: 0.789314
[4490]	training's auc: 0.906567	valid_1's auc: 0.789319
[4491]	training's auc: 0.906584	valid_1's auc: 0.789321
[4492]	training's auc: 0.906609	valid_1's auc: 0.789317
[4493]	training's auc: 0.906619	valid_1's auc: 0.789318
[4494]	training's auc: 0.906641	valid_1's auc: 0.789323
[4495]	training's auc: 0.906657	valid_1's auc: 0.789321
[4496]	training's auc: 0.90666	valid_1's auc: 0.7893

[4628]	training's auc: 0.908769	valid_1's auc: 0.789283
[4629]	training's auc: 0.908792	valid_1's auc: 0.789275
[4630]	training's auc: 0.908806	valid_1's auc: 0.789274
[4631]	training's auc: 0.908823	valid_1's auc: 0.789273
[4632]	training's auc: 0.908843	valid_1's auc: 0.789281
[4633]	training's auc: 0.908849	valid_1's auc: 0.789279
[4634]	training's auc: 0.908856	valid_1's auc: 0.789281
[4635]	training's auc: 0.908877	valid_1's auc: 0.789283
[4636]	training's auc: 0.908886	valid_1's auc: 0.789282
[4637]	training's auc: 0.908905	valid_1's auc: 0.789282
[4638]	training's auc: 0.908922	valid_1's auc: 0.789283
[4639]	training's auc: 0.908935	valid_1's auc: 0.789284
[4640]	training's auc: 0.908949	valid_1's auc: 0.789285
[4641]	training's auc: 0.908964	valid_1's auc: 0.789278
[4642]	training's auc: 0.90898	valid_1's auc: 0.789276
[4643]	training's auc: 0.908993	valid_1's auc: 0.789276
[4644]	training's auc: 0.909006	valid_1's auc: 0.789276
[4645]	training's auc: 0.90902	valid_1's auc: 0.7

<IPython.core.display.Javascript object>

In [34]:
y_proba = clf.predict(X_val)
to_csv(y_proba, "lgbm_WHO_WILL_WIN")

<IPython.core.display.Javascript object>

## 3. Conclusions

1. When dealing with many to one relationship inside csv files, it would be more efficient to extract not only AVG and COUNT but MIN, MAX as well.
2. Sometimes advanced imputing techniques like IterativeImputer, KNNImputer and MiceImputer could create bias inside of datasets and SimpleImputer with constant filling outperforms them.
3. Creating different features out of existing ones was the crucial part of the project.
4. Clustering as a feature had the highest correlation coeficient with the target.
5. When dealing with big datasets, it is important to choose fast models. LightGBM helped a lot.

![](proof.png)