In [1]:
import numpy as np
import pandas as pd

import string
import re
import gc

In [2]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer



In [3]:
train = pd.read_csv('data/train.csv', encoding='utf-8')
train.dropna(inplace=True)
train = train[train.sentence_id != 492799]

#### Prepare full data

In [4]:
vectorizer = CountVectorizer (
    analyzer='char_wb',
    ngram_range=(1, 1),
    lowercase=False
)

In [5]:
%time data_ngrams = vectorizer.fit_transform(train['before'].values)

Wall time: 3min 6s


In [6]:
data_ngrams.shape

(10574501, 2659)

In [7]:
labels = np.array(train['after'].values != train['before'].values, dtype=np.uint8)

enc = LabelEncoder()
labels = enc.fit_transform(labels)

#### Prepare train data

In [8]:
sample_sizes = {
    "PUNCT" : 25000,
    "PLAIN" : 30000,
    "CARDINAL" : 20000,
    "LETTERS" : 20000,
    "DATE" : 20000,
    "ORDINAL" : 15000,
    "MEASURE" : 15000
}

train.reset_index(inplace=True)
train_data = pd.DataFrame(columns=train.columns)
sample_max_size = 10000

for c in train['class'].unique():
    class_sample = train[train['class'] == c]
    sample_size = sample_sizes[c] if c in sample_sizes else sample_max_size
    if len(class_sample) > sample_max_size:
        train_data = train_data.append(class_sample.sample(sample_size))
    else:
        train_data = train_data.append(class_sample)

In [9]:
train_ngrams = vectorizer.transform(train_data['before'].values)

In [10]:
labels_train = np.array(train_data['after'].values != train_data['before'].values, dtype=np.uint8)
labels_train = enc.transform(labels_train)

### Train XGBoost classifier 

In [11]:
xtr, xcv, ytr, ycv = train_test_split(train_ngrams, labels_train, test_size = 0.05, random_state = 42)
feature_names = ['f_{}'.format(i) for i in range(train_ngrams.shape[1])]

In [12]:
dtrain = xgb.DMatrix(xtr, label=ytr, feature_names=feature_names)
dvalid = xgb.DMatrix(xcv, label=ycv, feature_names=feature_names)

In [28]:
params = [("objective", "binary:logistic"),
          ("booster", "gbtree"),
          ("nthread", 3),
          ("eta", 0.01),
          ("max_depth", 6),
          ("subsample", 0.9),
          ("min_child_weight", 1),
          ("colsample_bytree", 0.7),
          ("eval_metric", 'error'),
          ("eval_metric", 'auc'),
         ]
num_rounds = 5000
stop = 40

In [20]:
watchlist = [(dtrain, 'train'), (dvalid, 'valid')]

In [29]:
gbm = xgb.train(params, dtrain, num_rounds, evals=watchlist, early_stopping_rounds=stop)

[0]	train-error:0.14725	train-auc:0.888141	valid-error:0.153066	valid-auc:0.883877
Multiple eval metrics have been passed: 'valid-auc' will be used for early stopping.

Will train until valid-auc hasn't improved in 40 rounds.
[1]	train-error:0.120473	train-auc:0.92562	valid-error:0.124546	valid-auc:0.922994
[2]	train-error:0.115964	train-auc:0.937068	valid-error:0.120167	valid-auc:0.933591
[3]	train-error:0.10603	train-auc:0.947352	valid-error:0.111408	valid-auc:0.944138
[4]	train-error:0.114739	train-auc:0.947325	valid-error:0.119526	valid-auc:0.944084
[5]	train-error:0.122553	train-auc:0.948638	valid-error:0.126896	valid-auc:0.945534
[6]	train-error:0.120552	train-auc:0.948544	valid-error:0.123478	valid-auc:0.945524
[7]	train-error:0.110989	train-auc:0.948558	valid-error:0.114612	valid-auc:0.945572
[8]	train-error:0.10743	train-auc:0.948574	valid-error:0.112156	valid-auc:0.945416
[9]	train-error:0.105058	train-auc:0.949266	valid-error:0.110553	valid-auc:0.946311
[10]	train-error:0.10

[96]	train-error:0.091739	train-auc:0.967339	valid-error:0.095172	valid-auc:0.966325
[97]	train-error:0.092076	train-auc:0.967351	valid-error:0.095172	valid-auc:0.966324
[98]	train-error:0.092138	train-auc:0.967365	valid-error:0.095279	valid-auc:0.966329
[99]	train-error:0.091554	train-auc:0.967358	valid-error:0.094638	valid-auc:0.966331
[100]	train-error:0.090941	train-auc:0.967365	valid-error:0.094211	valid-auc:0.966337
[101]	train-error:0.091042	train-auc:0.96736	valid-error:0.094424	valid-auc:0.966336
[102]	train-error:0.090946	train-auc:0.967373	valid-error:0.094317	valid-auc:0.966338
[103]	train-error:0.090941	train-auc:0.967384	valid-error:0.094211	valid-auc:0.966378
[104]	train-error:0.090946	train-auc:0.967381	valid-error:0.094211	valid-auc:0.966376
[105]	train-error:0.090952	train-auc:0.967383	valid-error:0.094211	valid-auc:0.966382
[106]	train-error:0.091042	train-auc:0.967397	valid-error:0.094424	valid-auc:0.966379
[107]	train-error:0.091452	train-auc:0.967397	valid-error:0

[192]	train-error:0.083784	train-auc:0.980304	valid-error:0.086306	valid-auc:0.979643
[193]	train-error:0.08379	train-auc:0.980308	valid-error:0.086306	valid-auc:0.979637
[194]	train-error:0.083807	train-auc:0.980308	valid-error:0.086306	valid-auc:0.979637
[195]	train-error:0.083812	train-auc:0.98031	valid-error:0.086306	valid-auc:0.979635
[196]	train-error:0.083812	train-auc:0.980306	valid-error:0.086306	valid-auc:0.979621
[197]	train-error:0.083812	train-auc:0.980267	valid-error:0.086306	valid-auc:0.979553
[198]	train-error:0.083823	train-auc:0.980272	valid-error:0.086306	valid-auc:0.979557
[199]	train-error:0.083807	train-auc:0.980278	valid-error:0.086306	valid-auc:0.97956
[200]	train-error:0.083818	train-auc:0.980284	valid-error:0.086306	valid-auc:0.979572
[201]	train-error:0.083784	train-auc:0.980338	valid-error:0.086306	valid-auc:0.979651
[202]	train-error:0.083784	train-auc:0.980417	valid-error:0.086306	valid-auc:0.979819
[203]	train-error:0.083773	train-auc:0.981231	valid-error

[288]	train-error:0.071601	train-auc:0.985186	valid-error:0.075198	valid-auc:0.984803
[289]	train-error:0.071601	train-auc:0.985192	valid-error:0.075198	valid-auc:0.984813
[290]	train-error:0.07159	train-auc:0.985191	valid-error:0.075198	valid-auc:0.984809
[291]	train-error:0.071573	train-auc:0.985197	valid-error:0.075198	valid-auc:0.984807
[292]	train-error:0.07159	train-auc:0.986024	valid-error:0.075198	valid-auc:0.985788
[293]	train-error:0.07159	train-auc:0.985999	valid-error:0.075198	valid-auc:0.98576
[294]	train-error:0.071427	train-auc:0.986004	valid-error:0.075091	valid-auc:0.985769
[295]	train-error:0.071427	train-auc:0.986021	valid-error:0.075091	valid-auc:0.985788
[296]	train-error:0.069397	train-auc:0.986026	valid-error:0.073061	valid-auc:0.985792
[297]	train-error:0.06865	train-auc:0.986029	valid-error:0.072314	valid-auc:0.985797
[298]	train-error:0.068526	train-auc:0.986065	valid-error:0.072314	valid-auc:0.985837
[299]	train-error:0.068644	train-auc:0.986071	valid-error:0

[384]	train-error:0.056905	train-auc:0.988045	valid-error:0.060457	valid-auc:0.98827
[385]	train-error:0.05622	train-auc:0.988049	valid-error:0.059603	valid-auc:0.988276
[386]	train-error:0.056231	train-auc:0.988525	valid-error:0.059603	valid-auc:0.988731
[387]	train-error:0.05613	train-auc:0.988525	valid-error:0.059282	valid-auc:0.988717
[388]	train-error:0.05419	train-auc:0.988529	valid-error:0.056825	valid-auc:0.988718
[389]	train-error:0.05419	train-auc:0.988532	valid-error:0.056825	valid-auc:0.988723
[390]	train-error:0.05419	train-auc:0.988958	valid-error:0.056825	valid-auc:0.989225
[391]	train-error:0.05419	train-auc:0.988956	valid-error:0.056825	valid-auc:0.989225
[392]	train-error:0.054179	train-auc:0.988958	valid-error:0.056825	valid-auc:0.989228
[393]	train-error:0.054179	train-auc:0.988963	valid-error:0.056825	valid-auc:0.989235
[394]	train-error:0.054291	train-auc:0.988966	valid-error:0.057146	valid-auc:0.989236
[395]	train-error:0.054201	train-auc:0.988968	valid-error:0.0

[480]	train-error:0.049293	train-auc:0.989753	valid-error:0.051592	valid-auc:0.990108
[481]	train-error:0.049293	train-auc:0.989758	valid-error:0.051592	valid-auc:0.990109
[482]	train-error:0.049293	train-auc:0.98976	valid-error:0.051592	valid-auc:0.990111
[483]	train-error:0.049282	train-auc:0.989763	valid-error:0.051592	valid-auc:0.990113
[484]	train-error:0.049327	train-auc:0.989766	valid-error:0.051592	valid-auc:0.990116
[485]	train-error:0.049327	train-auc:0.989772	valid-error:0.051592	valid-auc:0.990122
[486]	train-error:0.04926	train-auc:0.989773	valid-error:0.051592	valid-auc:0.990128
[487]	train-error:0.049203	train-auc:0.989774	valid-error:0.051592	valid-auc:0.990127
[488]	train-error:0.049203	train-auc:0.989777	valid-error:0.051592	valid-auc:0.990128
[489]	train-error:0.049254	train-auc:0.989777	valid-error:0.051485	valid-auc:0.990124
[490]	train-error:0.049192	train-auc:0.989782	valid-error:0.051485	valid-auc:0.990134
[491]	train-error:0.049209	train-auc:0.989796	valid-erro

[576]	train-error:0.045487	train-auc:0.992792	valid-error:0.048067	valid-auc:0.99304
[577]	train-error:0.045487	train-auc:0.992799	valid-error:0.048067	valid-auc:0.993041
[578]	train-error:0.045487	train-auc:0.9928	valid-error:0.048067	valid-auc:0.993044
[579]	train-error:0.045409	train-auc:0.992798	valid-error:0.04796	valid-auc:0.993046
[580]	train-error:0.045409	train-auc:0.9928	valid-error:0.04796	valid-auc:0.993045
[581]	train-error:0.044104	train-auc:0.992802	valid-error:0.04561	valid-auc:0.993048
[582]	train-error:0.044104	train-auc:0.992804	valid-error:0.04561	valid-auc:0.99305
[583]	train-error:0.04411	train-auc:0.992805	valid-error:0.04561	valid-auc:0.99305
[584]	train-error:0.04411	train-auc:0.992807	valid-error:0.04561	valid-auc:0.993049
[585]	train-error:0.04411	train-auc:0.992809	valid-error:0.04561	valid-auc:0.993048
[586]	train-error:0.044093	train-auc:0.992811	valid-error:0.045503	valid-auc:0.993052
[587]	train-error:0.044071	train-auc:0.992813	valid-error:0.045503	vali

[673]	train-error:0.039708	train-auc:0.993822	valid-error:0.039842	valid-auc:0.993894
[674]	train-error:0.039753	train-auc:0.993821	valid-error:0.039842	valid-auc:0.993891
[675]	train-error:0.039691	train-auc:0.993823	valid-error:0.039842	valid-auc:0.993897
[676]	train-error:0.039685	train-auc:0.993823	valid-error:0.039842	valid-auc:0.9939
[677]	train-error:0.039685	train-auc:0.993726	valid-error:0.039842	valid-auc:0.993826
[678]	train-error:0.039663	train-auc:0.993728	valid-error:0.039842	valid-auc:0.99383
[679]	train-error:0.03968	train-auc:0.99373	valid-error:0.039842	valid-auc:0.993824
[680]	train-error:0.03968	train-auc:0.993731	valid-error:0.039842	valid-auc:0.993823
[681]	train-error:0.039618	train-auc:0.993732	valid-error:0.039735	valid-auc:0.993824
[682]	train-error:0.039601	train-auc:0.993734	valid-error:0.039735	valid-auc:0.993828
[683]	train-error:0.03959	train-auc:0.993734	valid-error:0.039735	valid-auc:0.993825
[684]	train-error:0.039607	train-auc:0.993731	valid-error:0.0

[769]	train-error:0.038325	train-auc:0.994277	valid-error:0.037492	valid-auc:0.994308
[770]	train-error:0.038314	train-auc:0.994277	valid-error:0.037492	valid-auc:0.994307
[771]	train-error:0.03828	train-auc:0.994298	valid-error:0.037492	valid-auc:0.994331
[772]	train-error:0.03828	train-auc:0.994299	valid-error:0.037492	valid-auc:0.994334
[773]	train-error:0.038331	train-auc:0.994301	valid-error:0.037492	valid-auc:0.994334
[774]	train-error:0.038302	train-auc:0.994303	valid-error:0.037492	valid-auc:0.994337
[775]	train-error:0.038297	train-auc:0.994303	valid-error:0.037492	valid-auc:0.994338
[776]	train-error:0.038263	train-auc:0.994303	valid-error:0.037385	valid-auc:0.994339
[777]	train-error:0.038263	train-auc:0.994305	valid-error:0.037385	valid-auc:0.994363
[778]	train-error:0.038263	train-auc:0.994305	valid-error:0.037385	valid-auc:0.99436
[779]	train-error:0.038263	train-auc:0.994308	valid-error:0.037385	valid-auc:0.994366
[780]	train-error:0.036914	train-auc:0.994309	valid-error

[865]	train-error:0.036003	train-auc:0.994592	valid-error:0.034074	valid-auc:0.994557
[866]	train-error:0.036003	train-auc:0.994595	valid-error:0.034074	valid-auc:0.994563
[867]	train-error:0.036003	train-auc:0.994564	valid-error:0.034074	valid-auc:0.994542
[868]	train-error:0.035997	train-auc:0.994566	valid-error:0.034074	valid-auc:0.994537
[869]	train-error:0.033917	train-auc:0.994569	valid-error:0.032258	valid-auc:0.994552
[870]	train-error:0.035941	train-auc:0.994571	valid-error:0.034074	valid-auc:0.994549
[871]	train-error:0.035941	train-auc:0.994573	valid-error:0.034074	valid-auc:0.99455
[872]	train-error:0.035958	train-auc:0.994573	valid-error:0.034074	valid-auc:0.994552
[873]	train-error:0.035947	train-auc:0.994575	valid-error:0.034074	valid-auc:0.994556
[874]	train-error:0.035941	train-auc:0.994577	valid-error:0.034074	valid-auc:0.994561
[875]	train-error:0.0339	train-auc:0.994577	valid-error:0.032258	valid-auc:0.99455
[876]	train-error:0.033839	train-auc:0.994578	valid-error:

[961]	train-error:0.030949	train-auc:0.994873	valid-error:0.029267	valid-auc:0.99484
[962]	train-error:0.030949	train-auc:0.994873	valid-error:0.029267	valid-auc:0.994839
[963]	train-error:0.030949	train-auc:0.994875	valid-error:0.029267	valid-auc:0.994843
[964]	train-error:0.030949	train-auc:0.994874	valid-error:0.029267	valid-auc:0.994843
[965]	train-error:0.030949	train-auc:0.99489	valid-error:0.029267	valid-auc:0.994842
[966]	train-error:0.030949	train-auc:0.994894	valid-error:0.029267	valid-auc:0.994842
[967]	train-error:0.030881	train-auc:0.994894	valid-error:0.029054	valid-auc:0.994844
[968]	train-error:0.030881	train-auc:0.994897	valid-error:0.029054	valid-auc:0.994846
[969]	train-error:0.030881	train-auc:0.994897	valid-error:0.029054	valid-auc:0.994841
[970]	train-error:0.030718	train-auc:0.99492	valid-error:0.029054	valid-auc:0.994879
[971]	train-error:0.030713	train-auc:0.994922	valid-error:0.029054	valid-auc:0.99488
[972]	train-error:0.030673	train-auc:0.994924	valid-error:

[1057]	train-error:0.027413	train-auc:0.995223	valid-error:0.026597	valid-auc:0.995111
[1058]	train-error:0.027413	train-auc:0.995223	valid-error:0.026597	valid-auc:0.995112
[1059]	train-error:0.027413	train-auc:0.995224	valid-error:0.026597	valid-auc:0.995113
[1060]	train-error:0.027413	train-auc:0.995227	valid-error:0.026597	valid-auc:0.995115
[1061]	train-error:0.027413	train-auc:0.995227	valid-error:0.026597	valid-auc:0.995114
[1062]	train-error:0.027092	train-auc:0.995228	valid-error:0.02617	valid-auc:0.995116
[1063]	train-error:0.027087	train-auc:0.995228	valid-error:0.02617	valid-auc:0.995113
[1064]	train-error:0.027087	train-auc:0.995229	valid-error:0.02617	valid-auc:0.995116
[1065]	train-error:0.027087	train-auc:0.99523	valid-error:0.02617	valid-auc:0.995115
[1066]	train-error:0.027075	train-auc:0.995233	valid-error:0.02617	valid-auc:0.995118
[1067]	train-error:0.02707	train-auc:0.995233	valid-error:0.025956	valid-auc:0.995119
[1068]	train-error:0.02707	train-auc:0.995235	vali

[1152]	train-error:0.026153	train-auc:0.995577	valid-error:0.024995	valid-auc:0.99546
[1153]	train-error:0.026153	train-auc:0.995579	valid-error:0.024995	valid-auc:0.995462
[1154]	train-error:0.026153	train-auc:0.995579	valid-error:0.024995	valid-auc:0.995462
[1155]	train-error:0.026153	train-auc:0.995579	valid-error:0.024995	valid-auc:0.995465
[1156]	train-error:0.026125	train-auc:0.995581	valid-error:0.025101	valid-auc:0.995469
[1157]	train-error:0.026491	train-auc:0.995581	valid-error:0.025529	valid-auc:0.995467
[1158]	train-error:0.026485	train-auc:0.995582	valid-error:0.025529	valid-auc:0.995467
[1159]	train-error:0.026485	train-auc:0.995583	valid-error:0.025529	valid-auc:0.995468
[1160]	train-error:0.026485	train-auc:0.995585	valid-error:0.025529	valid-auc:0.995469
[1161]	train-error:0.026485	train-auc:0.995586	valid-error:0.025529	valid-auc:0.995469
[1162]	train-error:0.026479	train-auc:0.995586	valid-error:0.025529	valid-auc:0.995471
[1163]	train-error:0.026479	train-auc:0.9955

[1247]	train-error:0.025209	train-auc:0.995757	valid-error:0.024567	valid-auc:0.995599
[1248]	train-error:0.025203	train-auc:0.995757	valid-error:0.024567	valid-auc:0.995598
[1249]	train-error:0.025203	train-auc:0.995758	valid-error:0.024567	valid-auc:0.995594
[1250]	train-error:0.025203	train-auc:0.995759	valid-error:0.024567	valid-auc:0.995594
[1251]	train-error:0.025209	train-auc:0.995775	valid-error:0.024567	valid-auc:0.995607
[1252]	train-error:0.025203	train-auc:0.995774	valid-error:0.024567	valid-auc:0.995611
[1253]	train-error:0.025203	train-auc:0.995775	valid-error:0.024567	valid-auc:0.995611
[1254]	train-error:0.025203	train-auc:0.995778	valid-error:0.024567	valid-auc:0.995614
[1255]	train-error:0.025203	train-auc:0.99578	valid-error:0.024567	valid-auc:0.995616
[1256]	train-error:0.025203	train-auc:0.99578	valid-error:0.024567	valid-auc:0.995618
[1257]	train-error:0.025214	train-auc:0.995782	valid-error:0.024567	valid-auc:0.995616
[1258]	train-error:0.025214	train-auc:0.99578

[1342]	train-error:0.024793	train-auc:0.995907	valid-error:0.024247	valid-auc:0.995733
[1343]	train-error:0.024793	train-auc:0.995907	valid-error:0.024247	valid-auc:0.995731
[1344]	train-error:0.024793	train-auc:0.995908	valid-error:0.024247	valid-auc:0.995732
[1345]	train-error:0.024787	train-auc:0.995909	valid-error:0.024247	valid-auc:0.995733
[1346]	train-error:0.024787	train-auc:0.99591	valid-error:0.024247	valid-auc:0.995729
[1347]	train-error:0.024787	train-auc:0.99592	valid-error:0.024247	valid-auc:0.995728
[1348]	train-error:0.024787	train-auc:0.995921	valid-error:0.024247	valid-auc:0.995729
[1349]	train-error:0.024748	train-auc:0.995923	valid-error:0.024247	valid-auc:0.995728
[1350]	train-error:0.024753	train-auc:0.995924	valid-error:0.024247	valid-auc:0.995729
[1351]	train-error:0.02477	train-auc:0.995925	valid-error:0.024247	valid-auc:0.99573
[1352]	train-error:0.02477	train-auc:0.995925	valid-error:0.024247	valid-auc:0.99573
[1353]	train-error:0.02477	train-auc:0.995926	val

[1437]	train-error:0.023905	train-auc:0.996059	valid-error:0.023286	valid-auc:0.995824
[1438]	train-error:0.023905	train-auc:0.996059	valid-error:0.023286	valid-auc:0.995827
[1439]	train-error:0.023905	train-auc:0.996081	valid-error:0.023286	valid-auc:0.995852
[1440]	train-error:0.02391	train-auc:0.996082	valid-error:0.023286	valid-auc:0.995844
[1441]	train-error:0.02391	train-auc:0.996083	valid-error:0.023286	valid-auc:0.995844
[1442]	train-error:0.02391	train-auc:0.996083	valid-error:0.023286	valid-auc:0.995842
[1443]	train-error:0.02391	train-auc:0.996083	valid-error:0.023286	valid-auc:0.995844
[1444]	train-error:0.02391	train-auc:0.996084	valid-error:0.023286	valid-auc:0.995845
[1445]	train-error:0.023893	train-auc:0.996082	valid-error:0.023286	valid-auc:0.995844
[1446]	train-error:0.023893	train-auc:0.996082	valid-error:0.023286	valid-auc:0.995843
[1447]	train-error:0.023893	train-auc:0.996082	valid-error:0.023286	valid-auc:0.995845
[1448]	train-error:0.023893	train-auc:0.996082	v

[1532]	train-error:0.023399	train-auc:0.996174	valid-error:0.022752	valid-auc:0.995922
[1533]	train-error:0.023399	train-auc:0.996174	valid-error:0.022752	valid-auc:0.995921
[1534]	train-error:0.023399	train-auc:0.996174	valid-error:0.022752	valid-auc:0.995921
[1535]	train-error:0.023393	train-auc:0.996187	valid-error:0.022752	valid-auc:0.995927
[1536]	train-error:0.023393	train-auc:0.996187	valid-error:0.022752	valid-auc:0.995926
[1537]	train-error:0.023393	train-auc:0.99619	valid-error:0.022752	valid-auc:0.995929
[1538]	train-error:0.023382	train-auc:0.996189	valid-error:0.022645	valid-auc:0.995927
[1539]	train-error:0.023382	train-auc:0.99619	valid-error:0.022645	valid-auc:0.995929
[1540]	train-error:0.023382	train-auc:0.99619	valid-error:0.022645	valid-auc:0.995932
[1541]	train-error:0.023382	train-auc:0.99619	valid-error:0.022645	valid-auc:0.99593
[1542]	train-error:0.023382	train-auc:0.99619	valid-error:0.022645	valid-auc:0.99593
[1543]	train-error:0.023382	train-auc:0.996192	val

[1627]	train-error:0.022977	train-auc:0.996328	valid-error:0.022431	valid-auc:0.996005
[1628]	train-error:0.022977	train-auc:0.996328	valid-error:0.022431	valid-auc:0.996008
[1629]	train-error:0.022977	train-auc:0.996328	valid-error:0.022431	valid-auc:0.99601
[1630]	train-error:0.022977	train-auc:0.996329	valid-error:0.022431	valid-auc:0.996012
[1631]	train-error:0.022977	train-auc:0.996329	valid-error:0.022431	valid-auc:0.99601
[1632]	train-error:0.02296	train-auc:0.996329	valid-error:0.022431	valid-auc:0.996011
[1633]	train-error:0.02296	train-auc:0.99633	valid-error:0.022431	valid-auc:0.996011
[1634]	train-error:0.02296	train-auc:0.996331	valid-error:0.022431	valid-auc:0.996012
[1635]	train-error:0.02296	train-auc:0.996332	valid-error:0.022431	valid-auc:0.996013
[1636]	train-error:0.02296	train-auc:0.996332	valid-error:0.022431	valid-auc:0.99601
[1637]	train-error:0.02296	train-auc:0.996332	valid-error:0.022431	valid-auc:0.99601
[1638]	train-error:0.02296	train-auc:0.996335	valid-er

[1723]	train-error:0.022212	train-auc:0.996464	valid-error:0.02179	valid-auc:0.996154
[1724]	train-error:0.022212	train-auc:0.996466	valid-error:0.02179	valid-auc:0.996155
[1725]	train-error:0.022235	train-auc:0.996469	valid-error:0.02179	valid-auc:0.996156
[1726]	train-error:0.022235	train-auc:0.996469	valid-error:0.02179	valid-auc:0.996158
[1727]	train-error:0.022235	train-auc:0.996469	valid-error:0.02179	valid-auc:0.996161
[1728]	train-error:0.022235	train-auc:0.996471	valid-error:0.02179	valid-auc:0.996164
[1729]	train-error:0.022235	train-auc:0.996475	valid-error:0.02179	valid-auc:0.996164
[1730]	train-error:0.022246	train-auc:0.996473	valid-error:0.02179	valid-auc:0.996161
[1731]	train-error:0.022246	train-auc:0.996474	valid-error:0.02179	valid-auc:0.996162
[1732]	train-error:0.022246	train-auc:0.996474	valid-error:0.02179	valid-auc:0.996164
[1733]	train-error:0.022246	train-auc:0.996474	valid-error:0.02179	valid-auc:0.996166
[1734]	train-error:0.022246	train-auc:0.996475	valid-e

[1818]	train-error:0.021577	train-auc:0.996588	valid-error:0.021577	valid-auc:0.996221
[1819]	train-error:0.021577	train-auc:0.996589	valid-error:0.021577	valid-auc:0.996222
[1820]	train-error:0.021577	train-auc:0.996588	valid-error:0.021577	valid-auc:0.996222
[1821]	train-error:0.021577	train-auc:0.996589	valid-error:0.021577	valid-auc:0.996223
[1822]	train-error:0.021577	train-auc:0.996589	valid-error:0.021577	valid-auc:0.996223
[1823]	train-error:0.021577	train-auc:0.996604	valid-error:0.021577	valid-auc:0.996235
[1824]	train-error:0.021577	train-auc:0.996602	valid-error:0.021577	valid-auc:0.996235
[1825]	train-error:0.021577	train-auc:0.996602	valid-error:0.021577	valid-auc:0.996235
[1826]	train-error:0.021577	train-auc:0.996603	valid-error:0.021577	valid-auc:0.996236
[1827]	train-error:0.021577	train-auc:0.996602	valid-error:0.021577	valid-auc:0.996236
[1828]	train-error:0.021583	train-auc:0.996603	valid-error:0.021577	valid-auc:0.996236
[1829]	train-error:0.021583	train-auc:0.996

[1913]	train-error:0.020818	train-auc:0.996679	valid-error:0.020615	valid-auc:0.99626
[1914]	train-error:0.020818	train-auc:0.99668	valid-error:0.020615	valid-auc:0.99626
[1915]	train-error:0.020818	train-auc:0.996681	valid-error:0.020615	valid-auc:0.996259
[1916]	train-error:0.020818	train-auc:0.99668	valid-error:0.020615	valid-auc:0.996256
[1917]	train-error:0.020818	train-auc:0.996682	valid-error:0.020615	valid-auc:0.996256
[1918]	train-error:0.020829	train-auc:0.996683	valid-error:0.020722	valid-auc:0.996255
[1919]	train-error:0.020829	train-auc:0.996683	valid-error:0.020722	valid-auc:0.996256
[1920]	train-error:0.020829	train-auc:0.996683	valid-error:0.020722	valid-auc:0.996256
[1921]	train-error:0.020824	train-auc:0.996684	valid-error:0.020722	valid-auc:0.996258
[1922]	train-error:0.021049	train-auc:0.996685	valid-error:0.020936	valid-auc:0.996259
[1923]	train-error:0.021054	train-auc:0.996685	valid-error:0.020936	valid-auc:0.99626
[1924]	train-error:0.021054	train-auc:0.996685	v

[2008]	train-error:0.019952	train-auc:0.996755	valid-error:0.019761	valid-auc:0.996333
[2009]	train-error:0.019958	train-auc:0.996759	valid-error:0.019761	valid-auc:0.996333
[2010]	train-error:0.019958	train-auc:0.996759	valid-error:0.019761	valid-auc:0.996334
[2011]	train-error:0.019958	train-auc:0.996759	valid-error:0.019761	valid-auc:0.996334
[2012]	train-error:0.019958	train-auc:0.99676	valid-error:0.019761	valid-auc:0.996336
[2013]	train-error:0.019958	train-auc:0.996761	valid-error:0.019761	valid-auc:0.996336
[2014]	train-error:0.019958	train-auc:0.996762	valid-error:0.019761	valid-auc:0.996337
[2015]	train-error:0.019958	train-auc:0.996762	valid-error:0.019761	valid-auc:0.996337
[2016]	train-error:0.019958	train-auc:0.996763	valid-error:0.019761	valid-auc:0.996337
[2017]	train-error:0.019958	train-auc:0.996764	valid-error:0.019761	valid-auc:0.996337
[2018]	train-error:0.019952	train-auc:0.996765	valid-error:0.019761	valid-auc:0.996338
[2019]	train-error:0.019958	train-auc:0.9967

[2103]	train-error:0.019154	train-auc:0.996824	valid-error:0.018799	valid-auc:0.996398
[2104]	train-error:0.019154	train-auc:0.996826	valid-error:0.018799	valid-auc:0.9964
[2105]	train-error:0.019154	train-auc:0.996826	valid-error:0.018799	valid-auc:0.9964
[2106]	train-error:0.019154	train-auc:0.996827	valid-error:0.018799	valid-auc:0.996401
[2107]	train-error:0.019368	train-auc:0.996828	valid-error:0.018906	valid-auc:0.996401
[2108]	train-error:0.019368	train-auc:0.996828	valid-error:0.018906	valid-auc:0.996401
[2109]	train-error:0.019368	train-auc:0.996829	valid-error:0.018906	valid-auc:0.996402
[2110]	train-error:0.019362	train-auc:0.99683	valid-error:0.018906	valid-auc:0.996403
[2111]	train-error:0.019356	train-auc:0.99683	valid-error:0.018906	valid-auc:0.996404
[2112]	train-error:0.019356	train-auc:0.996831	valid-error:0.018906	valid-auc:0.9964
[2113]	train-error:0.019356	train-auc:0.996831	valid-error:0.018906	valid-auc:0.9964
[2114]	train-error:0.019362	train-auc:0.996831	valid-

[2198]	train-error:0.017923	train-auc:0.996888	valid-error:0.017304	valid-auc:0.996452
[2199]	train-error:0.017923	train-auc:0.996888	valid-error:0.017304	valid-auc:0.996453
[2200]	train-error:0.017917	train-auc:0.996888	valid-error:0.017304	valid-auc:0.996453
[2201]	train-error:0.017928	train-auc:0.996889	valid-error:0.017304	valid-auc:0.996453
[2202]	train-error:0.017928	train-auc:0.99689	valid-error:0.017304	valid-auc:0.996454
[2203]	train-error:0.017749	train-auc:0.99689	valid-error:0.016984	valid-auc:0.996454
[2204]	train-error:0.017754	train-auc:0.99689	valid-error:0.016984	valid-auc:0.996455
[2205]	train-error:0.017754	train-auc:0.996891	valid-error:0.016984	valid-auc:0.996455
[2206]	train-error:0.017754	train-auc:0.996891	valid-error:0.016984	valid-auc:0.996455
[2207]	train-error:0.017754	train-auc:0.996891	valid-error:0.016984	valid-auc:0.996455
[2208]	train-error:0.017743	train-auc:0.996891	valid-error:0.016984	valid-auc:0.996454
[2209]	train-error:0.017743	train-auc:0.996892

[2293]	train-error:0.016995	train-auc:0.996946	valid-error:0.016129	valid-auc:0.996502
[2294]	train-error:0.016995	train-auc:0.996946	valid-error:0.016129	valid-auc:0.996503
[2295]	train-error:0.016995	train-auc:0.996945	valid-error:0.016129	valid-auc:0.996503
[2296]	train-error:0.016995	train-auc:0.996946	valid-error:0.016129	valid-auc:0.996504
[2297]	train-error:0.016995	train-auc:0.996947	valid-error:0.016129	valid-auc:0.996504
[2298]	train-error:0.016995	train-auc:0.996948	valid-error:0.016129	valid-auc:0.996506
[2299]	train-error:0.016995	train-auc:0.996957	valid-error:0.016129	valid-auc:0.996504
[2300]	train-error:0.016995	train-auc:0.996957	valid-error:0.016129	valid-auc:0.996506
[2301]	train-error:0.016995	train-auc:0.996957	valid-error:0.016129	valid-auc:0.996507
[2302]	train-error:0.016995	train-auc:0.996958	valid-error:0.016129	valid-auc:0.996508
[2303]	train-error:0.016995	train-auc:0.996967	valid-error:0.016129	valid-auc:0.996519
[2304]	train-error:0.016995	train-auc:0.996

[2388]	train-error:0.01677	train-auc:0.997029	valid-error:0.015915	valid-auc:0.996612
[2389]	train-error:0.016782	train-auc:0.99703	valid-error:0.015915	valid-auc:0.996612
[2390]	train-error:0.016782	train-auc:0.99703	valid-error:0.015915	valid-auc:0.996612
[2391]	train-error:0.016782	train-auc:0.997033	valid-error:0.015915	valid-auc:0.996612
[2392]	train-error:0.01677	train-auc:0.997035	valid-error:0.015915	valid-auc:0.996612
[2393]	train-error:0.01677	train-auc:0.997036	valid-error:0.015915	valid-auc:0.996615
[2394]	train-error:0.01677	train-auc:0.997036	valid-error:0.015915	valid-auc:0.996615
[2395]	train-error:0.01677	train-auc:0.997037	valid-error:0.015915	valid-auc:0.996616
[2396]	train-error:0.01677	train-auc:0.997038	valid-error:0.015915	valid-auc:0.996616
[2397]	train-error:0.01677	train-auc:0.997038	valid-error:0.015915	valid-auc:0.996616
[2398]	train-error:0.01677	train-auc:0.997039	valid-error:0.015915	valid-auc:0.996617
[2399]	train-error:0.01677	train-auc:0.997039	valid-e

[2483]	train-error:0.016051	train-auc:0.997112	valid-error:0.015061	valid-auc:0.996653
[2484]	train-error:0.016051	train-auc:0.997112	valid-error:0.015061	valid-auc:0.996653
[2485]	train-error:0.016051	train-auc:0.997113	valid-error:0.015061	valid-auc:0.996654
[2486]	train-error:0.016051	train-auc:0.997115	valid-error:0.015061	valid-auc:0.996655
[2487]	train-error:0.016051	train-auc:0.997116	valid-error:0.015061	valid-auc:0.996656
[2488]	train-error:0.016051	train-auc:0.997117	valid-error:0.015061	valid-auc:0.996655
[2489]	train-error:0.016051	train-auc:0.997116	valid-error:0.015061	valid-auc:0.996657
[2490]	train-error:0.016051	train-auc:0.997116	valid-error:0.015061	valid-auc:0.996658
[2491]	train-error:0.016051	train-auc:0.997116	valid-error:0.015061	valid-auc:0.996658
[2492]	train-error:0.016051	train-auc:0.997117	valid-error:0.015061	valid-auc:0.996661
[2493]	train-error:0.016051	train-auc:0.997117	valid-error:0.015061	valid-auc:0.996661
[2494]	train-error:0.016051	train-auc:0.997

[2578]	train-error:0.01586	train-auc:0.997158	valid-error:0.014954	valid-auc:0.996685
[2579]	train-error:0.01586	train-auc:0.997159	valid-error:0.014954	valid-auc:0.996686
[2580]	train-error:0.01586	train-auc:0.997159	valid-error:0.014954	valid-auc:0.996687
[2581]	train-error:0.01586	train-auc:0.99716	valid-error:0.014954	valid-auc:0.996686
[2582]	train-error:0.015865	train-auc:0.99716	valid-error:0.014954	valid-auc:0.996686
[2583]	train-error:0.015865	train-auc:0.99716	valid-error:0.014954	valid-auc:0.996686
[2584]	train-error:0.015685	train-auc:0.997161	valid-error:0.01474	valid-auc:0.996687
[2585]	train-error:0.015685	train-auc:0.997161	valid-error:0.01474	valid-auc:0.996687
[2586]	train-error:0.015685	train-auc:0.997161	valid-error:0.01474	valid-auc:0.996688
[2587]	train-error:0.015685	train-auc:0.997162	valid-error:0.01474	valid-auc:0.996688
[2588]	train-error:0.015685	train-auc:0.997162	valid-error:0.01474	valid-auc:0.996688
[2589]	train-error:0.015685	train-auc:0.997162	valid-er

[2673]	train-error:0.01478	train-auc:0.997216	valid-error:0.013993	valid-auc:0.99675
[2674]	train-error:0.01478	train-auc:0.997216	valid-error:0.013993	valid-auc:0.99675
[2675]	train-error:0.01478	train-auc:0.997216	valid-error:0.013993	valid-auc:0.99675
[2676]	train-error:0.014775	train-auc:0.997216	valid-error:0.013993	valid-auc:0.996749
[2677]	train-error:0.014775	train-auc:0.997217	valid-error:0.013993	valid-auc:0.99675
[2678]	train-error:0.01478	train-auc:0.997217	valid-error:0.013993	valid-auc:0.996749
[2679]	train-error:0.014656	train-auc:0.997217	valid-error:0.013993	valid-auc:0.99675
[2680]	train-error:0.014656	train-auc:0.997217	valid-error:0.013993	valid-auc:0.99675
[2681]	train-error:0.014656	train-auc:0.997218	valid-error:0.013993	valid-auc:0.99675
[2682]	train-error:0.014656	train-auc:0.997218	valid-error:0.013993	valid-auc:0.996751
[2683]	train-error:0.014808	train-auc:0.997218	valid-error:0.0141	valid-auc:0.996751
[2684]	train-error:0.014808	train-auc:0.997219	valid-err

[2768]	train-error:0.01433	train-auc:0.997271	valid-error:0.013672	valid-auc:0.996784
[2769]	train-error:0.014325	train-auc:0.997272	valid-error:0.013672	valid-auc:0.996784
[2770]	train-error:0.014325	train-auc:0.997272	valid-error:0.013672	valid-auc:0.996784
[2771]	train-error:0.014325	train-auc:0.997272	valid-error:0.013672	valid-auc:0.996785
[2772]	train-error:0.014342	train-auc:0.997272	valid-error:0.013672	valid-auc:0.996784
[2773]	train-error:0.014342	train-auc:0.997272	valid-error:0.013672	valid-auc:0.996784
[2774]	train-error:0.014342	train-auc:0.997272	valid-error:0.013672	valid-auc:0.996784
[2775]	train-error:0.014342	train-auc:0.997273	valid-error:0.013672	valid-auc:0.996785
[2776]	train-error:0.014336	train-auc:0.997273	valid-error:0.013672	valid-auc:0.996785
[2777]	train-error:0.014336	train-auc:0.997274	valid-error:0.013672	valid-auc:0.996785
[2778]	train-error:0.014336	train-auc:0.997274	valid-error:0.013672	valid-auc:0.996786
[2779]	train-error:0.014342	train-auc:0.9972

[2863]	train-error:0.014257	train-auc:0.997308	valid-error:0.013779	valid-auc:0.996811
[2864]	train-error:0.01419	train-auc:0.997309	valid-error:0.013779	valid-auc:0.996812
[2865]	train-error:0.01419	train-auc:0.997309	valid-error:0.013779	valid-auc:0.996812
[2866]	train-error:0.014179	train-auc:0.997309	valid-error:0.013779	valid-auc:0.996812
[2867]	train-error:0.014179	train-auc:0.997309	valid-error:0.013779	valid-auc:0.996812
[2868]	train-error:0.014173	train-auc:0.99731	valid-error:0.013779	valid-auc:0.996813
[2869]	train-error:0.014173	train-auc:0.99731	valid-error:0.013779	valid-auc:0.996813
[2870]	train-error:0.014173	train-auc:0.997311	valid-error:0.013779	valid-auc:0.996813
[2871]	train-error:0.014173	train-auc:0.997311	valid-error:0.013779	valid-auc:0.996813
[2872]	train-error:0.014173	train-auc:0.997311	valid-error:0.013779	valid-auc:0.996813
[2873]	train-error:0.014173	train-auc:0.997312	valid-error:0.013779	valid-auc:0.996814
[2874]	train-error:0.014173	train-auc:0.997312	

[2958]	train-error:0.013796	train-auc:0.997365	valid-error:0.013352	valid-auc:0.996859
[2959]	train-error:0.013796	train-auc:0.997365	valid-error:0.013352	valid-auc:0.996859
[2960]	train-error:0.013796	train-auc:0.997366	valid-error:0.013352	valid-auc:0.996859
[2961]	train-error:0.013796	train-auc:0.997366	valid-error:0.013352	valid-auc:0.996858
[2962]	train-error:0.013791	train-auc:0.997365	valid-error:0.013352	valid-auc:0.996858
[2963]	train-error:0.013791	train-auc:0.997368	valid-error:0.013352	valid-auc:0.996858
[2964]	train-error:0.013791	train-auc:0.997368	valid-error:0.013352	valid-auc:0.996857
[2965]	train-error:0.013791	train-auc:0.997368	valid-error:0.013352	valid-auc:0.996857
[2966]	train-error:0.013791	train-auc:0.997368	valid-error:0.013352	valid-auc:0.996857
[2967]	train-error:0.013791	train-auc:0.997368	valid-error:0.013352	valid-auc:0.996857
[2968]	train-error:0.013791	train-auc:0.997368	valid-error:0.013352	valid-auc:0.996857
[2969]	train-error:0.013791	train-auc:0.997

[3053]	train-error:0.01365	train-auc:0.997406	valid-error:0.013138	valid-auc:0.99688
[3054]	train-error:0.01365	train-auc:0.997406	valid-error:0.013138	valid-auc:0.996879
[3055]	train-error:0.013628	train-auc:0.997406	valid-error:0.013138	valid-auc:0.996878
[3056]	train-error:0.01365	train-auc:0.997407	valid-error:0.013138	valid-auc:0.996879
[3057]	train-error:0.01365	train-auc:0.997407	valid-error:0.013138	valid-auc:0.996878
[3058]	train-error:0.013628	train-auc:0.997407	valid-error:0.013138	valid-auc:0.996878
[3059]	train-error:0.013678	train-auc:0.997407	valid-error:0.013138	valid-auc:0.996879
[3060]	train-error:0.013628	train-auc:0.997408	valid-error:0.013138	valid-auc:0.996879
[3061]	train-error:0.013678	train-auc:0.997409	valid-error:0.013138	valid-auc:0.996879
[3062]	train-error:0.013678	train-auc:0.997409	valid-error:0.013138	valid-auc:0.996879
[3063]	train-error:0.013678	train-auc:0.997409	valid-error:0.013138	valid-auc:0.996879
[3064]	train-error:0.013678	train-auc:0.997409	v

[3148]	train-error:0.012385	train-auc:0.997452	valid-error:0.012284	valid-auc:0.996926
[3149]	train-error:0.012385	train-auc:0.997452	valid-error:0.012284	valid-auc:0.996927
[3150]	train-error:0.012391	train-auc:0.997452	valid-error:0.012284	valid-auc:0.996927
[3151]	train-error:0.012391	train-auc:0.997452	valid-error:0.012284	valid-auc:0.996927
[3152]	train-error:0.012391	train-auc:0.997452	valid-error:0.012284	valid-auc:0.996927
[3153]	train-error:0.012413	train-auc:0.997453	valid-error:0.012284	valid-auc:0.996929
[3154]	train-error:0.012413	train-auc:0.997453	valid-error:0.012284	valid-auc:0.996929
[3155]	train-error:0.012413	train-auc:0.997454	valid-error:0.012284	valid-auc:0.996929
[3156]	train-error:0.012425	train-auc:0.997455	valid-error:0.012284	valid-auc:0.996935
[3157]	train-error:0.012419	train-auc:0.997455	valid-error:0.012284	valid-auc:0.996935
[3158]	train-error:0.012425	train-auc:0.997456	valid-error:0.012284	valid-auc:0.996936
[3159]	train-error:0.012425	train-auc:0.997

[3243]	train-error:0.012363	train-auc:0.997492	valid-error:0.012284	valid-auc:0.996957
[3244]	train-error:0.012363	train-auc:0.997493	valid-error:0.012284	valid-auc:0.996959
[3245]	train-error:0.012363	train-auc:0.997493	valid-error:0.012284	valid-auc:0.996959
[3246]	train-error:0.012368	train-auc:0.997493	valid-error:0.012284	valid-auc:0.996959
[3247]	train-error:0.012368	train-auc:0.997493	valid-error:0.012284	valid-auc:0.996959
[3248]	train-error:0.012368	train-auc:0.997495	valid-error:0.012284	valid-auc:0.996959
[3249]	train-error:0.012374	train-auc:0.997495	valid-error:0.012284	valid-auc:0.996958
[3250]	train-error:0.012374	train-auc:0.997495	valid-error:0.012284	valid-auc:0.996958
[3251]	train-error:0.012374	train-auc:0.997496	valid-error:0.012284	valid-auc:0.996957
[3252]	train-error:0.012374	train-auc:0.997496	valid-error:0.012284	valid-auc:0.996957
[3253]	train-error:0.012374	train-auc:0.997496	valid-error:0.012284	valid-auc:0.996957
[3254]	train-error:0.012374	train-auc:0.997

[3338]	train-error:0.012335	train-auc:0.997526	valid-error:0.012284	valid-auc:0.996979
[3339]	train-error:0.012335	train-auc:0.997526	valid-error:0.012284	valid-auc:0.996979
[3340]	train-error:0.012329	train-auc:0.997526	valid-error:0.012284	valid-auc:0.996978
[3341]	train-error:0.012329	train-auc:0.997529	valid-error:0.012284	valid-auc:0.996976
[3342]	train-error:0.012329	train-auc:0.997529	valid-error:0.012284	valid-auc:0.996976
[3343]	train-error:0.012329	train-auc:0.99753	valid-error:0.012284	valid-auc:0.996976
[3344]	train-error:0.012318	train-auc:0.99753	valid-error:0.012284	valid-auc:0.996976
[3345]	train-error:0.012318	train-auc:0.99753	valid-error:0.012284	valid-auc:0.996979
[3346]	train-error:0.012318	train-auc:0.997531	valid-error:0.012284	valid-auc:0.99698
[3347]	train-error:0.012318	train-auc:0.997531	valid-error:0.012284	valid-auc:0.99698
[3348]	train-error:0.012318	train-auc:0.997531	valid-error:0.012284	valid-auc:0.996979
[3349]	train-error:0.012318	train-auc:0.997532	v

[3434]	train-error:0.012065	train-auc:0.99756	valid-error:0.011963	valid-auc:0.997032
[3435]	train-error:0.01207	train-auc:0.997559	valid-error:0.011963	valid-auc:0.997034
[3436]	train-error:0.01207	train-auc:0.997559	valid-error:0.011963	valid-auc:0.997034
[3437]	train-error:0.012065	train-auc:0.997562	valid-error:0.011963	valid-auc:0.997038
[3438]	train-error:0.012059	train-auc:0.997564	valid-error:0.011963	valid-auc:0.99704
[3439]	train-error:0.012059	train-auc:0.997564	valid-error:0.011963	valid-auc:0.99704
[3440]	train-error:0.012059	train-auc:0.997564	valid-error:0.011963	valid-auc:0.99704
[3441]	train-error:0.011947	train-auc:0.997565	valid-error:0.011856	valid-auc:0.99704
[3442]	train-error:0.011947	train-auc:0.997565	valid-error:0.011856	valid-auc:0.99704
[3443]	train-error:0.011941	train-auc:0.997566	valid-error:0.011856	valid-auc:0.997042
[3444]	train-error:0.011941	train-auc:0.997565	valid-error:0.011856	valid-auc:0.997038
[3445]	train-error:0.011941	train-auc:0.997567	vali

[3529]	train-error:0.0118	train-auc:0.997605	valid-error:0.011536	valid-auc:0.997079
[3530]	train-error:0.011795	train-auc:0.997605	valid-error:0.011536	valid-auc:0.997079
[3531]	train-error:0.011795	train-auc:0.997605	valid-error:0.011536	valid-auc:0.997079
[3532]	train-error:0.011795	train-auc:0.997606	valid-error:0.011536	valid-auc:0.99708
[3533]	train-error:0.011795	train-auc:0.997607	valid-error:0.011536	valid-auc:0.997081
[3534]	train-error:0.011795	train-auc:0.997607	valid-error:0.011536	valid-auc:0.997081
[3535]	train-error:0.011795	train-auc:0.99761	valid-error:0.011536	valid-auc:0.997082
[3536]	train-error:0.011795	train-auc:0.99761	valid-error:0.011536	valid-auc:0.997081
[3537]	train-error:0.011795	train-auc:0.99761	valid-error:0.011536	valid-auc:0.997081
[3538]	train-error:0.0118	train-auc:0.997611	valid-error:0.011536	valid-auc:0.997081
[3539]	train-error:0.0118	train-auc:0.997611	valid-error:0.011536	valid-auc:0.997081
[3540]	train-error:0.0118	train-auc:0.997613	valid-er

[3626]	train-error:0.011548	train-auc:0.997641	valid-error:0.011216	valid-auc:0.997106
[3627]	train-error:0.011548	train-auc:0.997641	valid-error:0.011216	valid-auc:0.997107
[3628]	train-error:0.011542	train-auc:0.997641	valid-error:0.011216	valid-auc:0.997107
[3629]	train-error:0.011542	train-auc:0.997643	valid-error:0.011216	valid-auc:0.99711
[3630]	train-error:0.011508	train-auc:0.997643	valid-error:0.011109	valid-auc:0.997111
[3631]	train-error:0.011508	train-auc:0.997644	valid-error:0.011109	valid-auc:0.997111
[3632]	train-error:0.011508	train-auc:0.997644	valid-error:0.011109	valid-auc:0.997111
[3633]	train-error:0.011407	train-auc:0.997644	valid-error:0.011109	valid-auc:0.997111
[3634]	train-error:0.011418	train-auc:0.997646	valid-error:0.011109	valid-auc:0.99711
[3635]	train-error:0.011418	train-auc:0.997646	valid-error:0.011109	valid-auc:0.997111
[3636]	train-error:0.011418	train-auc:0.997646	valid-error:0.011109	valid-auc:0.997111
[3637]	train-error:0.011418	train-auc:0.99764

[3721]	train-error:0.01121	train-auc:0.997674	valid-error:0.011002	valid-auc:0.997127
[3722]	train-error:0.01121	train-auc:0.997674	valid-error:0.011002	valid-auc:0.997127
[3723]	train-error:0.01121	train-auc:0.997674	valid-error:0.011002	valid-auc:0.997127
[3724]	train-error:0.01121	train-auc:0.997674	valid-error:0.011002	valid-auc:0.997127
[3725]	train-error:0.01121	train-auc:0.997674	valid-error:0.011002	valid-auc:0.997127
[3726]	train-error:0.011182	train-auc:0.997677	valid-error:0.011002	valid-auc:0.997128
[3727]	train-error:0.011171	train-auc:0.997677	valid-error:0.011002	valid-auc:0.997127
[3728]	train-error:0.011171	train-auc:0.997677	valid-error:0.011002	valid-auc:0.997128
[3729]	train-error:0.011171	train-auc:0.997677	valid-error:0.011002	valid-auc:0.997128
[3730]	train-error:0.011171	train-auc:0.997677	valid-error:0.011002	valid-auc:0.997128
[3731]	train-error:0.011176	train-auc:0.997679	valid-error:0.011002	valid-auc:0.997129
[3732]	train-error:0.011176	train-auc:0.997682	v

[3816]	train-error:0.011013	train-auc:0.997705	valid-error:0.010895	valid-auc:0.997147
[3817]	train-error:0.011013	train-auc:0.997705	valid-error:0.010895	valid-auc:0.997147
[3818]	train-error:0.011013	train-auc:0.997705	valid-error:0.010895	valid-auc:0.997146
[3819]	train-error:0.011013	train-auc:0.997705	valid-error:0.010895	valid-auc:0.997146
[3820]	train-error:0.011013	train-auc:0.997705	valid-error:0.010895	valid-auc:0.997147
[3821]	train-error:0.011013	train-auc:0.997706	valid-error:0.010895	valid-auc:0.997147
[3822]	train-error:0.011013	train-auc:0.997706	valid-error:0.010895	valid-auc:0.997149
[3823]	train-error:0.011013	train-auc:0.997706	valid-error:0.010895	valid-auc:0.997149
[3824]	train-error:0.011008	train-auc:0.997708	valid-error:0.010895	valid-auc:0.997175
[3825]	train-error:0.011008	train-auc:0.997708	valid-error:0.010895	valid-auc:0.997176
[3826]	train-error:0.011008	train-auc:0.997708	valid-error:0.010895	valid-auc:0.997176
[3827]	train-error:0.01112	train-auc:0.9977

[3911]	train-error:0.010361	train-auc:0.997738	valid-error:0.010681	valid-auc:0.997196
[3912]	train-error:0.010367	train-auc:0.997737	valid-error:0.010681	valid-auc:0.997195
[3913]	train-error:0.010367	train-auc:0.997738	valid-error:0.010681	valid-auc:0.997198
[3914]	train-error:0.010344	train-auc:0.997739	valid-error:0.010681	valid-auc:0.997198
[3915]	train-error:0.010344	train-auc:0.997739	valid-error:0.010681	valid-auc:0.997199
[3916]	train-error:0.010344	train-auc:0.99774	valid-error:0.010681	valid-auc:0.9972
[3917]	train-error:0.010333	train-auc:0.99774	valid-error:0.010681	valid-auc:0.9972
[3918]	train-error:0.010333	train-auc:0.99774	valid-error:0.010681	valid-auc:0.9972
[3919]	train-error:0.010333	train-auc:0.99774	valid-error:0.010681	valid-auc:0.9972
[3920]	train-error:0.010333	train-auc:0.99774	valid-error:0.010681	valid-auc:0.9972
[3921]	train-error:0.010333	train-auc:0.99774	valid-error:0.010681	valid-auc:0.997201
[3922]	train-error:0.010333	train-auc:0.99774	valid-error:0

[4006]	train-error:0.010311	train-auc:0.99777	valid-error:0.010681	valid-auc:0.997216
[4007]	train-error:0.010311	train-auc:0.99777	valid-error:0.010681	valid-auc:0.997216
[4008]	train-error:0.010311	train-auc:0.99777	valid-error:0.010681	valid-auc:0.997216
[4009]	train-error:0.010311	train-auc:0.997771	valid-error:0.010681	valid-auc:0.997216
[4010]	train-error:0.010311	train-auc:0.997771	valid-error:0.010681	valid-auc:0.997215
[4011]	train-error:0.010311	train-auc:0.997771	valid-error:0.010681	valid-auc:0.997216
[4012]	train-error:0.010311	train-auc:0.997771	valid-error:0.010681	valid-auc:0.997216
[4013]	train-error:0.010311	train-auc:0.997771	valid-error:0.010681	valid-auc:0.997216
[4014]	train-error:0.010311	train-auc:0.997774	valid-error:0.010681	valid-auc:0.997217
[4015]	train-error:0.010311	train-auc:0.997774	valid-error:0.010681	valid-auc:0.997217
[4016]	train-error:0.010311	train-auc:0.997774	valid-error:0.010681	valid-auc:0.99722
[4017]	train-error:0.010311	train-auc:0.997774	

[4101]	train-error:0.00972	train-auc:0.997799	valid-error:0.010147	valid-auc:0.997234
[4102]	train-error:0.00972	train-auc:0.997799	valid-error:0.010147	valid-auc:0.997234
[4103]	train-error:0.00972	train-auc:0.997799	valid-error:0.010147	valid-auc:0.997234
[4104]	train-error:0.00972	train-auc:0.997799	valid-error:0.010147	valid-auc:0.997234
[4105]	train-error:0.00972	train-auc:0.997799	valid-error:0.010147	valid-auc:0.997232
[4106]	train-error:0.00972	train-auc:0.9978	valid-error:0.010147	valid-auc:0.99723
[4107]	train-error:0.00972	train-auc:0.9978	valid-error:0.010147	valid-auc:0.99723
[4108]	train-error:0.00972	train-auc:0.9978	valid-error:0.010147	valid-auc:0.997231
[4109]	train-error:0.00972	train-auc:0.997801	valid-error:0.010147	valid-auc:0.997232
[4110]	train-error:0.00972	train-auc:0.9978	valid-error:0.010147	valid-auc:0.997232
[4111]	train-error:0.00972	train-auc:0.997802	valid-error:0.010147	valid-auc:0.997232
[4112]	train-error:0.00972	train-auc:0.997802	valid-error:0.0101

[4196]	train-error:0.009242	train-auc:0.997829	valid-error:0.009827	valid-auc:0.997263
[4197]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997262
[4198]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997263
[4199]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997264
[4200]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997265
[4201]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997262
[4202]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997263
[4203]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997262
[4204]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997262
[4205]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997262
[4206]	train-error:0.009242	train-auc:0.99783	valid-error:0.009827	valid-auc:0.997264
[4207]	train-error:0.009242	train-auc:0.997831	valid-

[4291]	train-error:0.009068	train-auc:0.997862	valid-error:0.0094	valid-auc:0.997278
[4292]	train-error:0.009068	train-auc:0.997863	valid-error:0.0094	valid-auc:0.997278
[4293]	train-error:0.009068	train-auc:0.997863	valid-error:0.0094	valid-auc:0.997278
[4294]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997286
[4295]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997286
[4296]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997283
[4297]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997283
[4298]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997283
[4299]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997285
[4300]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997285
[4301]	train-error:0.009068	train-auc:0.997864	valid-error:0.0094	valid-auc:0.997283
[4302]	train-error:0.009068	train-auc:0.997865	valid-error:0.0094

### Validate classification accuracy

In [22]:
dtest = xgb.DMatrix(data_ngrams, label=labels, feature_names=feature_names)

In [30]:
%time predictions = gbm.predict(dtest)

Wall time: 30min 4s


In [31]:
predictions

array([ 0.03599444,  0.00104367,  0.03005324, ...,  0.03163639,
        0.00250797,  0.09092531], dtype=float32)

In [32]:
from sklearn.metrics import accuracy_score

for i in range(10):
    predictions_current = predictions > i / 10.0
    print ("0.{}: {}".format(i, accuracy_score(labels, predictions_current)))

0.0: 0.12508240341553706
0.1: 0.9505958720889052
0.2: 0.9646726592583423
0.3: 0.9797295399565427
0.4: 0.990523051631467
0.5: 0.9917951683961257
0.6: 0.9912946246825264
0.7: 0.9910463860185932
0.8: 0.9911289431056841
0.9: 0.9889257185752784


In [41]:
import sys

p = 0.5
for n in range(1, 10000):
    left = n * p - (1 - p)
    right = n * p + p
    sys.stdout.write("\r [{}, {}]".format(left / n, right / n))
    sys.stdout.flush()

 [0.49994999499949994, 0.5000500050005]4]]