In [54]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import cross_val_score
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, fbeta_score
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
from imblearn.over_sampling import RandomOverSampler

In [107]:
df = pd.read_csv('train.csv', sep = '\t', encoding='latin')
df.head()

Unnamed: 0,pdb_chain,DSSR,xray,resol,chainlen,protein,alpham2,betam2,gammam2,deltam2,...,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3,mg
0,1b23.cif1_R,R.G.1.,1,2.6,74,1,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1b23.cif1_R,R.G.1.,1,2.6,74,1,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1b23.cif1_R,R.G.1.,1,2.6,74,1,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1b23.cif1_R,R.G.1.,1,2.6,74,1,,,,,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1b23.cif1_R,R.G.1.,1,2.6,74,1,,,,,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [108]:
df_test = pd.read_csv('test.csv', encoding='latin')
df_test.head()

Unnamed: 0,Id,index,pdb_chain,DSSR,xray,resol,chainlen,protein,alpham2,betam2,...,atomO2,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3
0,0,28,1feu.cif1_F,F.C.93.,1,2.3,21,1,-73.3,174.9,...,0,0,0,0,0,0,0,1,0,0
1,1,29,1feu.cif1_F,F.C.93.,1,2.3,21,1,-73.3,174.9,...,0,0,0,0,0,0,0,0,1,0
2,2,30,1feu.cif1_F,F.C.93.,1,2.3,21,1,-73.3,174.9,...,0,0,0,0,0,1,0,0,0,0
3,3,31,1feu.cif1_F,F.C.93.,1,2.3,21,1,-73.3,174.9,...,0,0,0,0,1,0,0,0,0,0
4,4,32,1feu.cif1_F,F.C.93.,1,2.3,21,1,-73.3,174.9,...,0,0,1,0,0,0,0,0,0,0


In [109]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5370 entries, 0 to 5369
Columns: 386 entries, pdb_chain to mg
dtypes: float64(381), int64(3), object(2)
memory usage: 15.8+ MB


Сначала посмотрим, есть ли у нас признаки, которые совпадают на всех объектах обучающей выборки.

In [110]:
columns = []
for i, column in enumerate(df.columns):
    if len(np.unique(np.array(df[column]))) == 1:
        columns.append(column)
        
print(columns)

['xray']


Удалим этот признак из обоих датасетов. (В тестовой выборке этот признак тоже такой же)

In [111]:
for column in columns:
    df.drop(column, axis=1, inplace=True)
    df_test.drop(column, axis=1, inplace=True)
df.head()

Unnamed: 0,pdb_chain,DSSR,resol,chainlen,protein,alpham2,betam2,gammam2,deltam2,epsilonm2,...,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3,mg
0,1b23.cif1_R,R.G.1.,2.6,74,1,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1b23.cif1_R,R.G.1.,2.6,74,1,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1b23.cif1_R,R.G.1.,2.6,74,1,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1b23.cif1_R,R.G.1.,2.6,74,1,,,,,,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1b23.cif1_R,R.G.1.,2.6,74,1,,,,,,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


Удалим первые 2 колонки в тестовом датасете, так как первая из них это индекс стоки, а второй нет в обучающем датасете.

In [112]:
df_test.drop(['Id', 'index'], axis=1, inplace=True)
df_test.head()

Unnamed: 0,pdb_chain,DSSR,resol,chainlen,protein,alpham2,betam2,gammam2,deltam2,epsilonm2,...,atomO2,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3
0,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,0,0,0,1,0,0
1,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,0,0,0,0,1,0
2,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,0,1,0,0,0,0
3,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,1,0,0,0,0,0
4,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,1,0,0,0,0,0,0,0


Также видим, что в наших данных много NaN. Заменим их на среднее по столбцу.

In [113]:
##удаление их
df.dropna(inplace=True)

In [77]:
#для обучающего
for column in df.columns:
    if df[column].isnull().values.any():
        df[column].fillna('to_replace', inplace=True)
        temp = np.array(df[column])
        mean = np.mean(temp[temp!='to_replace'])
        temp[temp=='to_replace'] = mean
        df[column] = temp
        
#для тестового
for column in df_test.columns:
    if df_test[column].isnull().values.any():
        df_test[column].fillna('to_replace', inplace=True)
        temp = np.array(df_test[column])
        mean = np.mean(temp[temp!='to_replace'])
        temp[temp=='to_replace'] = mean
        df_test[column] = temp

df.head()

Unnamed: 0,pdb_chain,DSSR,resol,chainlen,protein,alpham2,betam2,gammam2,deltam2,epsilonm2,...,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3,mg
0,1b23.cif1_R,R.G.1.,2.6,74,1,-34.2217,56.1031,48.2511,89.1473,-132.533,...,0,0,0,0,0,0,0,0,1,0
1,1b23.cif1_R,R.G.1.,2.6,74,1,-34.2217,56.1031,48.2511,89.1473,-132.533,...,0,0,0,0,0,0,1,0,0,0
2,1b23.cif1_R,R.G.1.,2.6,74,1,-34.2217,56.1031,48.2511,89.1473,-132.533,...,0,0,0,0,0,0,0,1,0,0
3,1b23.cif1_R,R.G.1.,2.6,74,1,-34.2217,56.1031,48.2511,89.1473,-132.533,...,0,0,0,0,1,0,0,0,0,0
4,1b23.cif1_R,R.G.1.,2.6,74,1,-34.2217,56.1031,48.2511,89.1473,-132.533,...,0,0,0,1,0,0,0,0,0,0


In [79]:
df_test.head()

Unnamed: 0,pdb_chain,DSSR,resol,chainlen,protein,alpham2,betam2,gammam2,deltam2,epsilonm2,...,atomO2,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3
0,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,0,0,0,1,0,0
1,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,0,0,0,0,1,0
2,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,0,1,0,0,0,0
3,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,0,0,1,0,0,0,0,0
4,1feu.cif1_F,F.C.93.,2.3,21,1,-73.3,174.9,53.7,83.5,-154.8,...,0,0,1,0,0,0,0,0,0,0


Удалим второй признак, так как он неинформативный

In [114]:
df.drop('DSSR', axis=1, inplace=True)
df_test.drop('DSSR', axis=1, inplace=True)
df.head()

Unnamed: 0,pdb_chain,resol,chainlen,protein,alpham2,betam2,gammam2,deltam2,epsilonm2,zetam2,...,atomO2p,atomO3p,atomO4,atomO4p,atomO5p,atomO6,atomOP1,atomOP2,atomOP3,mg
35,1b23.cif1_R,2.6,74,1,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
36,1b23.cif1_R,2.6,74,1,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
37,1b23.cif1_R,2.6,74,1,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
38,1b23.cif1_R,2.6,74,1,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
39,1b23.cif1_R,2.6,74,1,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Видим, что у нас есть 2 категориальных признака (первые 2 признака). Закодируем их используя OneHot encoder

In [115]:
#индексы категориальных признаков
indeces = np.array([0])

#применяем LabelEncoder
int_encoder = LabelEncoder()
for ind in indeces:
    values = np.hstack((np.array(df[df.columns[ind]]), np.array(df_test[df.columns[ind]])))
    int_encoder.fit(values)
    df[df.columns[ind]] = int_encoder.transform(df[df.columns[ind]])
    df_test[df.columns[ind]] = int_encoder.transform(df_test[df.columns[ind]])


X = np.array(df.iloc[:,:-1])
y = np.array(df.iloc[:,-1], dtype=int)
X_test = np.array(df_test)
pd.DataFrame(X).head()
y[y > 0.5] = 1
y[y < 0.5] = 0

#применяем OneHotEncoder
bin_encoder = OneHotEncoder()
values = np.vstack((X[:,[0]], X_test[:,[0]]))
bin_encoder.fit(values)

category_data = bin_encoder.transform(X[:,[0]]).toarray()
non_category_data = X[:, 1:]
X = np.hstack((category_data, non_category_data))

category_data_test = bin_encoder.transform(X_test[:,[0]]).toarray()
non_category_data_test = X_test[:, 1:]
X_test = np.hstack((category_data_test, non_category_data_test))

pd.DataFrame(X).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,373,374,375,376,377,378,379,380,381,382
0,0.0,2.6,74.0,1.0,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,2.6,74.0,1.0,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,2.6,74.0,1.0,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,2.6,74.0,1.0,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,2.6,74.0,1.0,-60.0,177.7,50.4,84.8,-150.6,-79.4,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [116]:
pd.DataFrame(X_test).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,373,374,375,376,377,378,379,380,381,382
0,12.0,2.3,21.0,1.0,-73.3,174.9,53.7,83.5,-154.8,-71.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,12.0,2.3,21.0,1.0,-73.3,174.9,53.7,83.5,-154.8,-71.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,12.0,2.3,21.0,1.0,-73.3,174.9,53.7,83.5,-154.8,-71.2,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,12.0,2.3,21.0,1.0,-73.3,174.9,53.7,83.5,-154.8,-71.2,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,12.0,2.3,21.0,1.0,-73.3,174.9,53.7,83.5,-154.8,-71.2,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Теперь заметим, что в обущающей выборке классы очень несбалансированы

In [131]:
len(y[y==0]), len(y[y==1])

(3845, 3845)

In [118]:
sampler = RandomOverSampler()
X, y = sampler.fit_sample(X, y)

**PCA**

Попробуем применить PCA, чтоб получить только 100 признаков

In [466]:
pca = PCA(n_components=150, random_state=0)
fit = pca.fit(X, y)
features = fit.transform(X)

Обучаем лес и посмотрим его score на кросс валидации

In [55]:
clf = RandomForestClassifier(n_estimators=1000)
score = cross_val_score(clf, X, y, scoring='f1', n_jobs=-1)
score.mean(), score.min(), score.max()

(0.9394185494137951, 0.9343898573692552, 0.9431416639897205)

CatBoostClassifier

In [23]:
from catboost import CatBoostClassifier

In [27]:
X_eval = X[-100:]
y_eval = y[-100:]
X_train = X[:-100]
y_train = y[:-100]
model = CatBoostClassifier(iterations=100, learning_rate=1, depth=9, loss_function='Logloss', od_type='Iter', od_wait=7, )
fit_model = model.fit(X_train, y_train, eval_set = (X_eval, y_eval), cat_features=[0])



0:	learn: 0.4617512	test: 0.3734848	best: 0.3734848 (0)	total: 198ms	remaining: 19.6s
1:	learn: 0.4223229	test: 0.3717471	best: 0.3717471 (1)	total: 383ms	remaining: 18.8s
2:	learn: 0.3703146	test: 0.2932695	best: 0.2932695 (2)	total: 574ms	remaining: 18.6s
3:	learn: 0.3292073	test: 0.2631491	best: 0.2631491 (3)	total: 754ms	remaining: 18.1s
4:	learn: 0.2879123	test: 0.2308965	best: 0.2308965 (4)	total: 943ms	remaining: 17.9s
5:	learn: 0.2605058	test: 0.1984256	best: 0.1984256 (5)	total: 1.13s	remaining: 17.7s
6:	learn: 0.2370358	test: 0.1825994	best: 0.1825994 (6)	total: 1.31s	remaining: 17.4s
7:	learn: 0.2064470	test: 0.1519830	best: 0.1519830 (7)	total: 1.49s	remaining: 17.1s
8:	learn: 0.1831178	test: 0.1345219	best: 0.1345219 (8)	total: 1.67s	remaining: 16.9s
9:	learn: 0.1743801	test: 0.1357739	best: 0.1345219 (8)	total: 1.86s	remaining: 16.7s
10:	learn: 0.1670916	test: 0.1294289	best: 0.1294289 (10)	total: 2.04s	remaining: 16.5s
11:	learn: 0.1579687	test: 0.1353933	best: 0.1294289

95:	learn: 0.0205123	test: 0.0197089	best: 0.0197089 (95)	total: 18.9s	remaining: 789ms
96:	learn: 0.0201475	test: 0.0191091	best: 0.0191091 (96)	total: 19.1s	remaining: 591ms
97:	learn: 0.0200135	test: 0.0193330	best: 0.0191091 (96)	total: 19.3s	remaining: 393ms
98:	learn: 0.0189090	test: 0.0185938	best: 0.0185938 (98)	total: 19.5s	remaining: 197ms
99:	learn: 0.0163942	test: 0.0160426	best: 0.0160426 (99)	total: 19.6s	remaining: 0us

bestTest = 0.0160425958
bestIteration = 99



**перебор по параметрам**

In [480]:
from catboost import CatBoostClassifier
lrs = [1, 0.9, 0.8,0.7, 0.6]
depths = np.arange(5, 15)
X_eval = features[-100:]
y_eval = y[-100:]
X_train = features[:-100]
y_train = y[:-100]
max_score = 0
for lr in lrs:
    for depth in depths:
        model = CatBoostClassifier(iterations=70, learning_rate=lr, depth=depth, loss_function='Logloss', od_type='Iter', od_wait=7)
        fit_model = model.fit(X_train, y_train, eval_set = (X_eval, y_eval))
        predictions_eval = fit_model.predict(X_eval)
        score = f1_score(y_eval, predictions_eval)
        if max_score < score:
            max_score = score
            params = (lr, depth)
        print('lr: {}, depth: {} --- score: {}'.format(lr, depth, score))

# fit_model = mod

0:	learn: 0.6002145	test: 0.6563076	best: 0.6563076 (0)	total: 32.7ms	remaining: 2.26s
1:	learn: 0.5431428	test: 0.5594729	best: 0.5594729 (1)	total: 64.7ms	remaining: 2.2s
2:	learn: 0.5032616	test: 0.5473425	best: 0.5473425 (2)	total: 91.4ms	remaining: 2.04s
3:	learn: 0.4703057	test: 0.5207616	best: 0.5207616 (3)	total: 124ms	remaining: 2.05s
4:	learn: 0.4464523	test: 0.4996891	best: 0.4996891 (4)	total: 155ms	remaining: 2.01s
5:	learn: 0.4195402	test: 0.4760883	best: 0.4760883 (5)	total: 186ms	remaining: 1.98s
6:	learn: 0.4019941	test: 0.4198383	best: 0.4198383 (6)	total: 212ms	remaining: 1.91s
7:	learn: 0.3785317	test: 0.4169869	best: 0.4169869 (7)	total: 258ms	remaining: 2s
8:	learn: 0.3653891	test: 0.3807553	best: 0.3807553 (8)	total: 374ms	remaining: 2.53s
9:	learn: 0.3478420	test: 0.3651221	best: 0.3651221 (9)	total: 405ms	remaining: 2.43s
10:	learn: 0.3272355	test: 0.3559541	best: 0.3559541 (10)	total: 436ms	remaining: 2.34s
11:	learn: 0.3103297	test: 0.3473984	best: 0.3473984 

58:	learn: 0.0905847	test: 0.2102547	best: 0.2015427 (50)	total: 3.15s	remaining: 587ms
Stopped by overfitting detector  (7 iterations wait)

bestTest = 0.2015426948
bestIteration = 50

lr: 1, depth: 6 --- score: 0.9444444444444444
0:	learn: 0.5800916	test: 0.5575692	best: 0.5575692 (0)	total: 66.6ms	remaining: 4.6s
1:	learn: 0.5065568	test: 0.4867389	best: 0.4867389 (1)	total: 146ms	remaining: 4.97s
2:	learn: 0.4561801	test: 0.4297197	best: 0.4297197 (2)	total: 223ms	remaining: 4.98s
3:	learn: 0.4146549	test: 0.4269459	best: 0.4269459 (3)	total: 299ms	remaining: 4.93s
4:	learn: 0.3759005	test: 0.4380443	best: 0.4269459 (3)	total: 377ms	remaining: 4.91s
5:	learn: 0.3519471	test: 0.4207145	best: 0.4207145 (5)	total: 445ms	remaining: 4.75s
6:	learn: 0.3130875	test: 0.3951154	best: 0.3951154 (6)	total: 529ms	remaining: 4.76s
7:	learn: 0.2872265	test: 0.3504494	best: 0.3504494 (7)	total: 596ms	remaining: 4.62s
8:	learn: 0.2698730	test: 0.3473315	best: 0.3473315 (8)	total: 702ms	remaining: 

27:	learn: 0.0802186	test: 0.2027487	best: 0.1918829 (21)	total: 5.48s	remaining: 8.22s
28:	learn: 0.0784233	test: 0.1906957	best: 0.1906957 (28)	total: 5.69s	remaining: 8.04s
29:	learn: 0.0784227	test: 0.1905617	best: 0.1905617 (29)	total: 5.7s	remaining: 7.6s
30:	learn: 0.0766070	test: 0.2025288	best: 0.1905617 (29)	total: 5.91s	remaining: 7.43s
31:	learn: 0.0761074	test: 0.2025199	best: 0.1905617 (29)	total: 6.11s	remaining: 7.25s
32:	learn: 0.0731310	test: 0.2043611	best: 0.1905617 (29)	total: 6.3s	remaining: 7.07s
33:	learn: 0.0731214	test: 0.2043733	best: 0.1905617 (29)	total: 6.32s	remaining: 6.69s
34:	learn: 0.0691612	test: 0.1895683	best: 0.1895683 (34)	total: 6.52s	remaining: 6.52s
35:	learn: 0.0682913	test: 0.1833923	best: 0.1833923 (35)	total: 6.75s	remaining: 6.38s
36:	learn: 0.0665853	test: 0.1743356	best: 0.1743356 (36)	total: 6.95s	remaining: 6.2s
37:	learn: 0.0664380	test: 0.1712960	best: 0.1712960 (37)	total: 7.17s	remaining: 6.04s
38:	learn: 0.0651110	test: 0.1687277

2:	learn: 0.3029944	test: 0.3980499	best: 0.3980499 (2)	total: 2.58s	remaining: 57.7s
3:	learn: 0.2727554	test: 0.3595515	best: 0.3595515 (3)	total: 3.42s	remaining: 56.4s
4:	learn: 0.2009523	test: 0.2677698	best: 0.2677698 (4)	total: 4.25s	remaining: 55.3s
5:	learn: 0.1850665	test: 0.2409581	best: 0.2409581 (5)	total: 5.11s	remaining: 54.5s
6:	learn: 0.1700630	test: 0.2252593	best: 0.2252593 (6)	total: 5.93s	remaining: 53.3s
7:	learn: 0.1550371	test: 0.2216334	best: 0.2216334 (7)	total: 6.75s	remaining: 52.4s
8:	learn: 0.1447338	test: 0.2052598	best: 0.2052598 (8)	total: 7.58s	remaining: 51.4s
9:	learn: 0.1419536	test: 0.2008897	best: 0.2008897 (9)	total: 8.42s	remaining: 50.5s
10:	learn: 0.1359871	test: 0.2015380	best: 0.2008897 (9)	total: 9.24s	remaining: 49.6s
11:	learn: 0.1298297	test: 0.1921921	best: 0.1921921 (11)	total: 10.1s	remaining: 48.8s
12:	learn: 0.1214024	test: 0.1945715	best: 0.1921921 (11)	total: 10.9s	remaining: 48s
13:	learn: 0.1070521	test: 0.1929447	best: 0.192192

15:	learn: 0.0735554	test: 0.2319947	best: 0.2319947 (15)	total: 57s	remaining: 3m 12s
16:	learn: 0.0538255	test: 0.2237833	best: 0.2237833 (16)	total: 1m	remaining: 3m 9s
17:	learn: 0.0519757	test: 0.2167018	best: 0.2167018 (17)	total: 1m 4s	remaining: 3m 6s
18:	learn: 0.0501387	test: 0.2160216	best: 0.2160216 (18)	total: 1m 8s	remaining: 3m 2s
19:	learn: 0.0479124	test: 0.2175127	best: 0.2160216 (18)	total: 1m 11s	remaining: 2m 59s
20:	learn: 0.0451588	test: 0.2183331	best: 0.2160216 (18)	total: 1m 15s	remaining: 2m 57s
21:	learn: 0.0434883	test: 0.2180892	best: 0.2160216 (18)	total: 1m 19s	remaining: 2m 53s
22:	learn: 0.0408296	test: 0.2089812	best: 0.2089812 (22)	total: 1m 23s	remaining: 2m 50s
23:	learn: 0.0408272	test: 0.2086878	best: 0.2086878 (23)	total: 1m 23s	remaining: 2m 39s
24:	learn: 0.0312771	test: 0.2098204	best: 0.2086878 (23)	total: 1m 26s	remaining: 2m 36s
25:	learn: 0.0306027	test: 0.2018833	best: 0.2018833 (25)	total: 1m 30s	remaining: 2m 33s
26:	learn: 0.0305967	t

39:	learn: 0.1691732	test: 0.2617502	best: 0.2594709 (35)	total: 1.34s	remaining: 1s
40:	learn: 0.1657137	test: 0.2584944	best: 0.2584944 (40)	total: 1.37s	remaining: 968ms
41:	learn: 0.1605359	test: 0.2629761	best: 0.2584944 (40)	total: 1.4s	remaining: 933ms
42:	learn: 0.1564459	test: 0.2621505	best: 0.2584944 (40)	total: 1.44s	remaining: 902ms
43:	learn: 0.1533902	test: 0.2622988	best: 0.2584944 (40)	total: 1.47s	remaining: 867ms
44:	learn: 0.1516079	test: 0.2632930	best: 0.2584944 (40)	total: 1.49s	remaining: 830ms
45:	learn: 0.1496574	test: 0.2627026	best: 0.2584944 (40)	total: 1.52s	remaining: 796ms
46:	learn: 0.1484077	test: 0.2622143	best: 0.2584944 (40)	total: 1.56s	remaining: 764ms
47:	learn: 0.1457903	test: 0.2689560	best: 0.2584944 (40)	total: 1.59s	remaining: 730ms
48:	learn: 0.1424775	test: 0.2780437	best: 0.2584944 (40)	total: 1.62s	remaining: 696ms
Stopped by overfitting detector  (7 iterations wait)

bestTest = 0.2584944035
bestIteration = 40

lr: 0.9, depth: 5 --- scor

26:	learn: 0.1538063	test: 0.3103119	best: 0.3041295 (25)	total: 1.91s	remaining: 3.04s
27:	learn: 0.1510075	test: 0.3011357	best: 0.3011357 (27)	total: 1.97s	remaining: 2.96s
28:	learn: 0.1458596	test: 0.2957012	best: 0.2957012 (28)	total: 2.04s	remaining: 2.88s
29:	learn: 0.1458515	test: 0.2956437	best: 0.2956437 (29)	total: 2.05s	remaining: 2.73s
30:	learn: 0.1388453	test: 0.3001787	best: 0.2956437 (29)	total: 2.14s	remaining: 2.69s
31:	learn: 0.1341060	test: 0.3048239	best: 0.2956437 (29)	total: 2.21s	remaining: 2.62s
32:	learn: 0.1275266	test: 0.2988624	best: 0.2956437 (29)	total: 2.27s	remaining: 2.54s
33:	learn: 0.1173262	test: 0.2866760	best: 0.2866760 (33)	total: 2.35s	remaining: 2.49s
34:	learn: 0.1161993	test: 0.2747534	best: 0.2747534 (34)	total: 2.43s	remaining: 2.43s
35:	learn: 0.1161991	test: 0.2747050	best: 0.2747050 (35)	total: 2.44s	remaining: 2.3s
36:	learn: 0.1161991	test: 0.2747002	best: 0.2747002 (36)	total: 2.45s	remaining: 2.19s
37:	learn: 0.1161991	test: 0.2746

66:	learn: 0.0518556	test: 0.1484355	best: 0.1484355 (66)	total: 6.56s	remaining: 294ms
67:	learn: 0.0518553	test: 0.1483742	best: 0.1483742 (67)	total: 6.58s	remaining: 193ms
68:	learn: 0.0502437	test: 0.1562671	best: 0.1483742 (67)	total: 6.7s	remaining: 97.1ms
69:	learn: 0.0500482	test: 0.1561095	best: 0.1483742 (67)	total: 6.81s	remaining: 0us

bestTest = 0.148374247
bestIteration = 67

lr: 0.9, depth: 8 --- score: 0.9532710280373832
0:	learn: 0.5499088	test: 0.5712027	best: 0.5712027 (0)	total: 197ms	remaining: 13.6s
1:	learn: 0.4671579	test: 0.5156096	best: 0.5156096 (1)	total: 417ms	remaining: 14.2s
2:	learn: 0.3932564	test: 0.4588597	best: 0.4588597 (2)	total: 612ms	remaining: 13.7s
3:	learn: 0.3609806	test: 0.4338906	best: 0.4338906 (3)	total: 813ms	remaining: 13.4s
4:	learn: 0.3195394	test: 0.4135970	best: 0.4135970 (4)	total: 1.03s	remaining: 13.4s
5:	learn: 0.2890986	test: 0.4351400	best: 0.4135970 (4)	total: 1.23s	remaining: 13.1s
6:	learn: 0.2634359	test: 0.3832221	best: 

16:	learn: 0.1015553	test: 0.1913029	best: 0.1913029 (16)	total: 14.2s	remaining: 44.4s
17:	learn: 0.0942658	test: 0.1910010	best: 0.1910010 (17)	total: 15.1s	remaining: 43.6s
18:	learn: 0.0903844	test: 0.1884462	best: 0.1884462 (18)	total: 15.9s	remaining: 42.6s
19:	learn: 0.0892805	test: 0.1902862	best: 0.1884462 (18)	total: 16.7s	remaining: 41.8s
20:	learn: 0.0892393	test: 0.1904167	best: 0.1884462 (18)	total: 17.6s	remaining: 41s
21:	learn: 0.0835690	test: 0.1974896	best: 0.1884462 (18)	total: 18.4s	remaining: 40.1s
22:	learn: 0.0748249	test: 0.1969692	best: 0.1884462 (18)	total: 19.2s	remaining: 39.3s
23:	learn: 0.0737507	test: 0.1896412	best: 0.1884462 (18)	total: 20s	remaining: 38.4s
24:	learn: 0.0718707	test: 0.1963958	best: 0.1884462 (18)	total: 20.9s	remaining: 37.6s
25:	learn: 0.0698051	test: 0.1888479	best: 0.1884462 (18)	total: 21.7s	remaining: 36.7s
26:	learn: 0.0697566	test: 0.1882977	best: 0.1882977 (26)	total: 22.5s	remaining: 35.8s
27:	learn: 0.0584765	test: 0.1828872

12:	learn: 0.0791149	test: 0.2201078	best: 0.2033466 (11)	total: 47.3s	remaining: 3m 27s
13:	learn: 0.0722221	test: 0.2118521	best: 0.2033466 (11)	total: 50.9s	remaining: 3m 23s
14:	learn: 0.0699449	test: 0.2095448	best: 0.2033466 (11)	total: 54.6s	remaining: 3m 20s
15:	learn: 0.0690675	test: 0.2120571	best: 0.2033466 (11)	total: 58.2s	remaining: 3m 16s
16:	learn: 0.0642620	test: 0.2009704	best: 0.2009704 (16)	total: 1m 1s	remaining: 3m 12s
17:	learn: 0.0637136	test: 0.1988587	best: 0.1988587 (17)	total: 1m 5s	remaining: 3m 8s
18:	learn: 0.0629127	test: 0.1967610	best: 0.1967610 (18)	total: 1m 8s	remaining: 3m 5s
19:	learn: 0.0572768	test: 0.1958044	best: 0.1958044 (19)	total: 1m 12s	remaining: 3m 1s
20:	learn: 0.0566433	test: 0.1995001	best: 0.1958044 (19)	total: 1m 16s	remaining: 2m 57s
21:	learn: 0.0502430	test: 0.1821069	best: 0.1821069 (21)	total: 1m 19s	remaining: 2m 53s
22:	learn: 0.0495275	test: 0.1829515	best: 0.1821069 (21)	total: 1m 23s	remaining: 2m 50s
23:	learn: 0.0490840

11:	learn: 0.3567652	test: 0.3932983	best: 0.3787180 (7)	total: 373ms	remaining: 1.8s
12:	learn: 0.3431571	test: 0.3791416	best: 0.3787180 (7)	total: 405ms	remaining: 1.78s
13:	learn: 0.3315281	test: 0.3720165	best: 0.3720165 (13)	total: 436ms	remaining: 1.74s
14:	learn: 0.3184978	test: 0.3514090	best: 0.3514090 (14)	total: 467ms	remaining: 1.71s
15:	learn: 0.3042743	test: 0.3289286	best: 0.3289286 (15)	total: 495ms	remaining: 1.67s
16:	learn: 0.2955989	test: 0.3175105	best: 0.3175105 (16)	total: 526ms	remaining: 1.64s
17:	learn: 0.2845469	test: 0.3146731	best: 0.3146731 (17)	total: 557ms	remaining: 1.61s
18:	learn: 0.2788635	test: 0.3154468	best: 0.3146731 (17)	total: 596ms	remaining: 1.6s
19:	learn: 0.2717394	test: 0.3175596	best: 0.3146731 (17)	total: 628ms	remaining: 1.57s
20:	learn: 0.2624013	test: 0.3325690	best: 0.3146731 (17)	total: 659ms	remaining: 1.54s
21:	learn: 0.2579013	test: 0.3266589	best: 0.3146731 (17)	total: 686ms	remaining: 1.5s
22:	learn: 0.2524246	test: 0.3296547	

8:	learn: 0.2945537	test: 0.3920315	best: 0.3920315 (8)	total: 626ms	remaining: 4.24s
9:	learn: 0.2771857	test: 0.3835175	best: 0.3835175 (9)	total: 694ms	remaining: 4.17s
10:	learn: 0.2617511	test: 0.3914920	best: 0.3835175 (9)	total: 756ms	remaining: 4.06s
11:	learn: 0.2530153	test: 0.3871335	best: 0.3835175 (9)	total: 817ms	remaining: 3.95s
12:	learn: 0.2391431	test: 0.3738077	best: 0.3738077 (12)	total: 907ms	remaining: 3.98s
13:	learn: 0.2314632	test: 0.3760937	best: 0.3738077 (12)	total: 980ms	remaining: 3.92s
14:	learn: 0.2153269	test: 0.3655857	best: 0.3655857 (14)	total: 1.07s	remaining: 3.91s
15:	learn: 0.2039783	test: 0.3456448	best: 0.3456448 (15)	total: 1.15s	remaining: 3.88s
16:	learn: 0.1975901	test: 0.3344297	best: 0.3344297 (16)	total: 1.21s	remaining: 3.79s
17:	learn: 0.1901794	test: 0.3182228	best: 0.3182228 (17)	total: 1.28s	remaining: 3.7s
18:	learn: 0.1840553	test: 0.3042866	best: 0.3042866 (18)	total: 1.34s	remaining: 3.61s
19:	learn: 0.1760905	test: 0.3008766	be

43:	learn: 0.0774613	test: 0.1686491	best: 0.1686491 (43)	total: 4.84s	remaining: 2.86s
44:	learn: 0.0749152	test: 0.1655003	best: 0.1655003 (44)	total: 4.95s	remaining: 2.75s
45:	learn: 0.0730247	test: 0.1624214	best: 0.1624214 (45)	total: 5.07s	remaining: 2.64s
46:	learn: 0.0708909	test: 0.1575088	best: 0.1575088 (46)	total: 5.18s	remaining: 2.53s
47:	learn: 0.0666885	test: 0.1473194	best: 0.1473194 (47)	total: 5.29s	remaining: 2.42s
48:	learn: 0.0665748	test: 0.1468942	best: 0.1468942 (48)	total: 5.39s	remaining: 2.31s
49:	learn: 0.0665746	test: 0.1468627	best: 0.1468627 (49)	total: 5.4s	remaining: 2.16s
50:	learn: 0.0650278	test: 0.1539872	best: 0.1468627 (49)	total: 5.52s	remaining: 2.06s
51:	learn: 0.0634777	test: 0.1536275	best: 0.1468627 (49)	total: 5.62s	remaining: 1.95s
52:	learn: 0.0627165	test: 0.1526535	best: 0.1468627 (49)	total: 5.74s	remaining: 1.84s
53:	learn: 0.0627133	test: 0.1525425	best: 0.1468627 (49)	total: 5.75s	remaining: 1.7s
54:	learn: 0.0589473	test: 0.14978

9:	learn: 0.1685426	test: 0.3091670	best: 0.2990658 (4)	total: 8.3s	remaining: 49.8s
10:	learn: 0.1580185	test: 0.2905427	best: 0.2905427 (10)	total: 9.13s	remaining: 49s
11:	learn: 0.1489188	test: 0.2964508	best: 0.2905427 (10)	total: 9.95s	remaining: 48.1s
12:	learn: 0.1353211	test: 0.2872796	best: 0.2872796 (12)	total: 10.8s	remaining: 47.3s
13:	learn: 0.1312412	test: 0.2792931	best: 0.2792931 (13)	total: 11.6s	remaining: 46.3s
14:	learn: 0.1237327	test: 0.2792707	best: 0.2792707 (14)	total: 12.4s	remaining: 45.4s
15:	learn: 0.1220786	test: 0.2783056	best: 0.2783056 (15)	total: 13.2s	remaining: 44.5s
16:	learn: 0.1166146	test: 0.2723747	best: 0.2723747 (16)	total: 14s	remaining: 43.6s
17:	learn: 0.1163606	test: 0.2688679	best: 0.2688679 (17)	total: 14.8s	remaining: 42.8s
18:	learn: 0.1155770	test: 0.2684290	best: 0.2684290 (18)	total: 15.6s	remaining: 41.9s
19:	learn: 0.1110121	test: 0.2702527	best: 0.2684290 (18)	total: 16.4s	remaining: 41s
20:	learn: 0.1042776	test: 0.2573430	best

13:	learn: 0.1144375	test: 0.1934396	best: 0.1887873 (12)	total: 51.2s	remaining: 3m 24s
14:	learn: 0.1144343	test: 0.1934074	best: 0.1887873 (12)	total: 51.2s	remaining: 3m 7s
15:	learn: 0.1144343	test: 0.1933998	best: 0.1887873 (12)	total: 51.2s	remaining: 2m 52s
16:	learn: 0.0800398	test: 0.1729616	best: 0.1729616 (16)	total: 55s	remaining: 2m 51s
17:	learn: 0.0781384	test: 0.1705386	best: 0.1705386 (17)	total: 58.6s	remaining: 2m 49s
18:	learn: 0.0774733	test: 0.1709884	best: 0.1705386 (17)	total: 1m 2s	remaining: 2m 46s
19:	learn: 0.0720960	test: 0.1709419	best: 0.1705386 (17)	total: 1m 5s	remaining: 2m 44s
20:	learn: 0.0718181	test: 0.1701086	best: 0.1701086 (20)	total: 1m 9s	remaining: 2m 42s
21:	learn: 0.0718153	test: 0.1700166	best: 0.1700166 (21)	total: 1m 9s	remaining: 2m 31s
22:	learn: 0.0650405	test: 0.1647871	best: 0.1647871 (22)	total: 1m 13s	remaining: 2m 29s
23:	learn: 0.0643076	test: 0.1689445	best: 0.1647871 (22)	total: 1m 16s	remaining: 2m 26s
24:	learn: 0.0643062	t

36:	learn: 0.0214358	test: 0.1244934	best: 0.1244934 (36)	total: 4m 28s	remaining: 3m 59s
37:	learn: 0.0209009	test: 0.1206004	best: 0.1206004 (37)	total: 4m 37s	remaining: 3m 53s
38:	learn: 0.0207433	test: 0.1242340	best: 0.1206004 (37)	total: 4m 45s	remaining: 3m 46s
39:	learn: 0.0207416	test: 0.1240925	best: 0.1206004 (37)	total: 4m 45s	remaining: 3m 33s
40:	learn: 0.0207416	test: 0.1240558	best: 0.1206004 (37)	total: 4m 45s	remaining: 3m 21s
41:	learn: 0.0184038	test: 0.1222406	best: 0.1206004 (37)	total: 4m 53s	remaining: 3m 15s
42:	learn: 0.0183989	test: 0.1215273	best: 0.1206004 (37)	total: 4m 53s	remaining: 3m 4s
43:	learn: 0.0183987	test: 0.1213821	best: 0.1206004 (37)	total: 4m 53s	remaining: 2m 53s
44:	learn: 0.0172503	test: 0.1247616	best: 0.1206004 (37)	total: 5m 1s	remaining: 2m 47s
45:	learn: 0.0172224	test: 0.1235422	best: 0.1206004 (37)	total: 5m 9s	remaining: 2m 41s
Stopped by overfitting detector  (7 iterations wait)

bestTest = 0.1206003608
bestIteration = 37

lr: 0

22:	learn: 0.1971580	test: 0.3037236	best: 0.2981747 (19)	total: 1.07s	remaining: 2.19s
23:	learn: 0.1882468	test: 0.3113065	best: 0.2981747 (19)	total: 1.12s	remaining: 2.15s
24:	learn: 0.1836721	test: 0.3021215	best: 0.2981747 (19)	total: 1.17s	remaining: 2.1s
25:	learn: 0.1820017	test: 0.2990292	best: 0.2981747 (19)	total: 1.21s	remaining: 2.05s
26:	learn: 0.1766230	test: 0.3062161	best: 0.2981747 (19)	total: 1.26s	remaining: 2.02s
27:	learn: 0.1724649	test: 0.3010029	best: 0.2981747 (19)	total: 1.31s	remaining: 1.97s
Stopped by overfitting detector  (7 iterations wait)

bestTest = 0.2981746533
bestIteration = 19

lr: 0.7, depth: 6 --- score: 0.897196261682243
0:	learn: 0.5842840	test: 0.6238038	best: 0.6238038 (0)	total: 62.9ms	remaining: 4.34s
1:	learn: 0.5141832	test: 0.5766734	best: 0.5766734 (1)	total: 130ms	remaining: 4.42s
2:	learn: 0.4648259	test: 0.5285675	best: 0.5285675 (2)	total: 201ms	remaining: 4.5s
3:	learn: 0.4292855	test: 0.5144802	best: 0.5144802 (3)	total: 267ms	r

17:	learn: 0.1719766	test: 0.2731008	best: 0.2731008 (17)	total: 2.06s	remaining: 5.96s
18:	learn: 0.1670083	test: 0.2761499	best: 0.2731008 (17)	total: 2.17s	remaining: 5.84s
19:	learn: 0.1620713	test: 0.2771034	best: 0.2731008 (17)	total: 2.29s	remaining: 5.73s
20:	learn: 0.1599831	test: 0.2796368	best: 0.2731008 (17)	total: 2.4s	remaining: 5.6s
21:	learn: 0.1533081	test: 0.2725326	best: 0.2725326 (21)	total: 2.52s	remaining: 5.49s
22:	learn: 0.1382926	test: 0.2309627	best: 0.2309627 (22)	total: 2.63s	remaining: 5.37s
23:	learn: 0.1299195	test: 0.2277306	best: 0.2277306 (23)	total: 2.74s	remaining: 5.25s
24:	learn: 0.1241876	test: 0.2161761	best: 0.2161761 (24)	total: 2.84s	remaining: 5.11s
25:	learn: 0.1218762	test: 0.2052873	best: 0.2052873 (25)	total: 2.95s	remaining: 5s
26:	learn: 0.1191549	test: 0.1999286	best: 0.1999286 (26)	total: 3.06s	remaining: 4.87s
27:	learn: 0.1155588	test: 0.1978222	best: 0.1978222 (27)	total: 3.17s	remaining: 4.75s
28:	learn: 0.1153176	test: 0.1980452	

16:	learn: 0.1158367	test: 0.2404567	best: 0.2305456 (15)	total: 6.8s	remaining: 21.2s
17:	learn: 0.1145874	test: 0.2360854	best: 0.2305456 (15)	total: 7.18s	remaining: 20.7s
18:	learn: 0.1088321	test: 0.2322410	best: 0.2305456 (15)	total: 7.57s	remaining: 20.3s
19:	learn: 0.1050192	test: 0.2286850	best: 0.2286850 (19)	total: 7.95s	remaining: 19.9s
20:	learn: 0.0995404	test: 0.2197308	best: 0.2197308 (20)	total: 8.33s	remaining: 19.4s
21:	learn: 0.0940339	test: 0.2157717	best: 0.2157717 (21)	total: 8.71s	remaining: 19s
22:	learn: 0.0914859	test: 0.2110618	best: 0.2110618 (22)	total: 9.09s	remaining: 18.6s
23:	learn: 0.0914759	test: 0.2105124	best: 0.2105124 (23)	total: 9.1s	remaining: 17.4s
24:	learn: 0.0914742	test: 0.2103991	best: 0.2103991 (24)	total: 9.11s	remaining: 16.4s
25:	learn: 0.0866134	test: 0.2064956	best: 0.2064956 (25)	total: 9.5s	remaining: 16.1s
26:	learn: 0.0866099	test: 0.2067068	best: 0.2064956 (25)	total: 9.52s	remaining: 15.2s
27:	learn: 0.0866096	test: 0.2067714	

17:	learn: 0.0908467	test: 0.1985452	best: 0.1985452 (17)	total: 28.8s	remaining: 1m 23s
18:	learn: 0.0809127	test: 0.1841496	best: 0.1841496 (18)	total: 30.5s	remaining: 1m 21s
19:	learn: 0.0783486	test: 0.1795765	best: 0.1795765 (19)	total: 32.2s	remaining: 1m 20s
20:	learn: 0.0755590	test: 0.1674930	best: 0.1674930 (20)	total: 33.9s	remaining: 1m 19s
21:	learn: 0.0746593	test: 0.1633276	best: 0.1633276 (21)	total: 35.6s	remaining: 1m 17s
22:	learn: 0.0746543	test: 0.1632238	best: 0.1632238 (22)	total: 35.6s	remaining: 1m 12s
23:	learn: 0.0746541	test: 0.1632428	best: 0.1632238 (22)	total: 35.6s	remaining: 1m 8s
24:	learn: 0.0686760	test: 0.1535421	best: 0.1535421 (24)	total: 37.3s	remaining: 1m 7s
25:	learn: 0.0680636	test: 0.1535356	best: 0.1535356 (25)	total: 39s	remaining: 1m 6s
26:	learn: 0.0673604	test: 0.1542142	best: 0.1535356 (25)	total: 40.7s	remaining: 1m 4s
27:	learn: 0.0649641	test: 0.1549854	best: 0.1535356 (25)	total: 42.4s	remaining: 1m 3s
28:	learn: 0.0626603	test: 0

11:	learn: 0.3601433	test: 0.4215866	best: 0.4215866 (11)	total: 456ms	remaining: 2.2s
12:	learn: 0.3492223	test: 0.4254937	best: 0.4215866 (11)	total: 491ms	remaining: 2.15s
13:	learn: 0.3381293	test: 0.4087798	best: 0.4087798 (13)	total: 522ms	remaining: 2.09s
14:	learn: 0.3279973	test: 0.4036381	best: 0.4036381 (14)	total: 555ms	remaining: 2.03s
15:	learn: 0.3176650	test: 0.3787192	best: 0.3787192 (15)	total: 591ms	remaining: 1.99s
16:	learn: 0.3056618	test: 0.3509648	best: 0.3509648 (16)	total: 625ms	remaining: 1.95s
17:	learn: 0.2970518	test: 0.3469354	best: 0.3469354 (17)	total: 656ms	remaining: 1.9s
18:	learn: 0.2894766	test: 0.3301704	best: 0.3301704 (18)	total: 690ms	remaining: 1.85s
19:	learn: 0.2851002	test: 0.3252170	best: 0.3252170 (19)	total: 716ms	remaining: 1.79s
20:	learn: 0.2751318	test: 0.3213950	best: 0.3213950 (20)	total: 747ms	remaining: 1.74s
21:	learn: 0.2696797	test: 0.3155635	best: 0.3155635 (21)	total: 774ms	remaining: 1.69s
22:	learn: 0.2635036	test: 0.31370

35:	learn: 0.1601425	test: 0.2353645	best: 0.2353645 (35)	total: 1.73s	remaining: 1.64s
36:	learn: 0.1565790	test: 0.2319363	best: 0.2319363 (36)	total: 1.78s	remaining: 1.59s
37:	learn: 0.1540247	test: 0.2238214	best: 0.2238214 (37)	total: 1.82s	remaining: 1.53s
38:	learn: 0.1491930	test: 0.2245449	best: 0.2238214 (37)	total: 1.87s	remaining: 1.49s
39:	learn: 0.1462905	test: 0.2230423	best: 0.2230423 (39)	total: 1.91s	remaining: 1.43s
40:	learn: 0.1431653	test: 0.2185602	best: 0.2185602 (40)	total: 1.97s	remaining: 1.4s
41:	learn: 0.1393118	test: 0.2066819	best: 0.2066819 (41)	total: 2.01s	remaining: 1.34s
42:	learn: 0.1377997	test: 0.2019448	best: 0.2019448 (42)	total: 2.06s	remaining: 1.29s
43:	learn: 0.1339640	test: 0.1992396	best: 0.1992396 (43)	total: 2.11s	remaining: 1.25s
44:	learn: 0.1313406	test: 0.1986946	best: 0.1986946 (44)	total: 2.15s	remaining: 1.2s
45:	learn: 0.1293128	test: 0.1969281	best: 0.1969281 (45)	total: 2.2s	remaining: 1.15s
46:	learn: 0.1271355	test: 0.194162

32:	learn: 0.1150823	test: 0.2066229	best: 0.2051810 (28)	total: 3.83s	remaining: 4.29s
33:	learn: 0.1102269	test: 0.2031527	best: 0.2031527 (33)	total: 3.95s	remaining: 4.18s
34:	learn: 0.1094981	test: 0.2012426	best: 0.2012426 (34)	total: 4.05s	remaining: 4.05s
35:	learn: 0.1057751	test: 0.2103632	best: 0.2012426 (34)	total: 4.17s	remaining: 3.94s
36:	learn: 0.1035427	test: 0.2072302	best: 0.2012426 (34)	total: 4.28s	remaining: 3.82s
37:	learn: 0.1008700	test: 0.2040970	best: 0.2012426 (34)	total: 4.39s	remaining: 3.7s
38:	learn: 0.0953275	test: 0.2053454	best: 0.2012426 (34)	total: 4.5s	remaining: 3.58s
39:	learn: 0.0925790	test: 0.2031803	best: 0.2012426 (34)	total: 4.61s	remaining: 3.46s
40:	learn: 0.0900601	test: 0.1996400	best: 0.1996400 (40)	total: 4.73s	remaining: 3.35s
41:	learn: 0.0872379	test: 0.1926802	best: 0.1926802 (41)	total: 4.84s	remaining: 3.23s
42:	learn: 0.0856322	test: 0.1936136	best: 0.1926802 (41)	total: 4.95s	remaining: 3.11s
43:	learn: 0.0854641	test: 0.19225

6:	learn: 0.2180132	test: 0.3011093	best: 0.3011093 (6)	total: 2.85s	remaining: 25.6s
7:	learn: 0.2088869	test: 0.2695911	best: 0.2695911 (7)	total: 3.24s	remaining: 25.1s
8:	learn: 0.2021238	test: 0.2537449	best: 0.2537449 (8)	total: 3.62s	remaining: 24.6s
9:	learn: 0.1878222	test: 0.2428875	best: 0.2428875 (9)	total: 4.02s	remaining: 24.1s
10:	learn: 0.1766171	test: 0.2216374	best: 0.2216374 (10)	total: 4.41s	remaining: 23.7s
11:	learn: 0.1685650	test: 0.2225352	best: 0.2216374 (10)	total: 4.8s	remaining: 23.2s
12:	learn: 0.1641513	test: 0.2180585	best: 0.2180585 (12)	total: 5.18s	remaining: 22.7s
13:	learn: 0.1567005	test: 0.2115951	best: 0.2115951 (13)	total: 5.56s	remaining: 22.2s
14:	learn: 0.1481519	test: 0.2033505	best: 0.2033505 (14)	total: 5.95s	remaining: 21.8s
15:	learn: 0.1369857	test: 0.2026031	best: 0.2026031 (15)	total: 6.34s	remaining: 21.4s
16:	learn: 0.1284059	test: 0.2083344	best: 0.2026031 (15)	total: 6.72s	remaining: 21s
17:	learn: 0.1164211	test: 0.2125221	best: 

3:	learn: 0.2649973	test: 0.4006037	best: 0.4006037 (3)	total: 7.36s	remaining: 2m 1s
4:	learn: 0.2419269	test: 0.3395699	best: 0.3395699 (4)	total: 9.15s	remaining: 1m 58s
5:	learn: 0.2067053	test: 0.2864184	best: 0.2864184 (5)	total: 11s	remaining: 1m 57s
6:	learn: 0.1840477	test: 0.2543821	best: 0.2543821 (6)	total: 12.7s	remaining: 1m 54s
7:	learn: 0.1746972	test: 0.2513998	best: 0.2513998 (7)	total: 14.5s	remaining: 1m 52s
8:	learn: 0.1654779	test: 0.2407831	best: 0.2407831 (8)	total: 16.2s	remaining: 1m 49s
9:	learn: 0.1599234	test: 0.2410175	best: 0.2407831 (8)	total: 17.9s	remaining: 1m 47s
10:	learn: 0.1528732	test: 0.2342284	best: 0.2342284 (10)	total: 19.6s	remaining: 1m 45s
11:	learn: 0.1449405	test: 0.2317416	best: 0.2317416 (11)	total: 21.3s	remaining: 1m 42s
12:	learn: 0.1449106	test: 0.2318161	best: 0.2317416 (11)	total: 21.3s	remaining: 1m 33s
13:	learn: 0.1428895	test: 0.2294619	best: 0.2294619 (13)	total: 23s	remaining: 1m 32s
14:	learn: 0.1344627	test: 0.2329726	bes

50:	learn: 0.0302235	test: 0.1449534	best: 0.1449534 (50)	total: 2m 33s	remaining: 57s
51:	learn: 0.0302197	test: 0.1446936	best: 0.1446936 (51)	total: 2m 33s	remaining: 53.1s
52:	learn: 0.0283172	test: 0.1407562	best: 0.1407562 (52)	total: 2m 37s	remaining: 50.5s
53:	learn: 0.0270477	test: 0.1438594	best: 0.1407562 (52)	total: 2m 41s	remaining: 47.9s
54:	learn: 0.0268810	test: 0.1437159	best: 0.1407562 (52)	total: 2m 45s	remaining: 45.2s
55:	learn: 0.0251739	test: 0.1400460	best: 0.1400460 (55)	total: 2m 49s	remaining: 42.4s
56:	learn: 0.0249365	test: 0.1410011	best: 0.1400460 (55)	total: 2m 54s	remaining: 39.7s
57:	learn: 0.0249349	test: 0.1407751	best: 0.1400460 (55)	total: 2m 54s	remaining: 36s
58:	learn: 0.0249334	test: 0.1407042	best: 0.1400460 (55)	total: 2m 54s	remaining: 32.5s
59:	learn: 0.0237758	test: 0.1426582	best: 0.1400460 (55)	total: 2m 58s	remaining: 29.8s
60:	learn: 0.0233619	test: 0.1422824	best: 0.1400460 (55)	total: 3m 3s	remaining: 27.1s
61:	learn: 0.0223971	test:

Смотрим на параметры

In [481]:
max_score, params

(0.9714285714285714, (1, 9))

In [447]:
print(fit_model.get_params())

{'od_type': 'Iter', 'od_wait': 7, 'loss_function': 'Logloss', 'depth': 7, 'learning_rate': 0.6, 'iterations': 70}


In [82]:

predictions_eval = fit_model.predict(X_eval)
f1_score(y_eval, predictions_eval)

0.962962962962963

Засылаем попытку

In [83]:
predictions = fit_model.predict(X_test)

In [84]:
answers1 = pd.read_csv('sample_submission.csv')
answers1['mg'] = np.array(predictions, dtype=int)
answers1.to_csv('attempt1.csv',  index=False)
answers1.head()


Unnamed: 0,Id,mg
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [85]:
predictions.sum()

712.0

random forest

In [86]:
clf = RandomForestClassifier(n_estimators=1000)
clf.fit(X, y)
pred_4 = clf.predict(X_test)


In [501]:
answers4 = pd.read_csv('sample_submission.csv')
answers4['mg'] = np.array(predictions, dtype=int)
answers4.to_csv('attempt4.csv',  index=False)
answers4.head()

Unnamed: 0,Id,mg
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


**XGBoost, подбор параметров**

In [43]:
import warnings
warnings.simplefilter("ignore")

In [121]:
lrs = [1, 0.9, 0.8,0.7, 0.6]
depths = np.arange(5, 15)
iters = np.arange(30, 150, 5)
X_eval = X[-100:]
y_eval = y[-100:]
X_train = X[:-100]
y_train = y[:-100]
max_score = 0
for lr in lrs:
    for depth in depths:
        for it in iters:
            model = XGBClassifier(max_depth=depth, learning_rate=lr, n_estimators=it)
            score = cross_val_score(model, X, y, scoring='f1')
#             model.fit(X_train, y_train)
#             pred_eval = model.predict(X_eval)
#             score = f1_score(pred_eval, y_eval)
            if max_score < score.mean():
                max_score = score.mean()
                params = (lr, depth, it)
            print('lr: {}, depth: {}, it: {} --- score: {}'.format(lr, depth, it, score.mean()))

lr: 1, depth: 5, it: 30 --- score: 0.9521307824615105
lr: 1, depth: 5, it: 35 --- score: 0.9543291633641836
lr: 1, depth: 5, it: 40 --- score: 0.9573090695652122
lr: 1, depth: 5, it: 45 --- score: 0.957103874859771
lr: 1, depth: 5, it: 50 --- score: 0.9588428782758106
lr: 1, depth: 5, it: 55 --- score: 0.9590715417414488
lr: 1, depth: 5, it: 60 --- score: 0.959905152800864
lr: 1, depth: 5, it: 65 --- score: 0.9592254742733207
lr: 1, depth: 5, it: 70 --- score: 0.959679998060227
lr: 1, depth: 5, it: 75 --- score: 0.9595639732818925
lr: 1, depth: 5, it: 80 --- score: 0.9594478490459556
lr: 1, depth: 5, it: 85 --- score: 0.9595632881653735
lr: 1, depth: 5, it: 90 --- score: 0.9601298721838588
lr: 1, depth: 5, it: 95 --- score: 0.9603621135836365
lr: 1, depth: 5, it: 100 --- score: 0.9603599885298667
lr: 1, depth: 5, it: 105 --- score: 0.9613903629968684
lr: 1, depth: 5, it: 110 --- score: 0.9612742637851759
lr: 1, depth: 5, it: 115 --- score: 0.9601285540111569
lr: 1, depth: 5, it: 120 --

lr: 1, depth: 11, it: 65 --- score: 0.9588870325937777
lr: 1, depth: 11, it: 70 --- score: 0.9593440691688676
lr: 1, depth: 11, it: 75 --- score: 0.9589996860559565
lr: 1, depth: 11, it: 80 --- score: 0.9593362613676022
lr: 1, depth: 11, it: 85 --- score: 0.9591094679512381
lr: 1, depth: 11, it: 90 --- score: 0.9595654102467565
lr: 1, depth: 11, it: 95 --- score: 0.9593381253094949
lr: 1, depth: 11, it: 100 --- score: 0.9596806345551211
lr: 1, depth: 11, it: 105 --- score: 0.9596806345551211
lr: 1, depth: 11, it: 110 --- score: 0.9599141677970185
lr: 1, depth: 11, it: 115 --- score: 0.959686106502485
lr: 1, depth: 11, it: 120 --- score: 0.9596806345551211
lr: 1, depth: 11, it: 125 --- score: 0.9602514372094092
lr: 1, depth: 11, it: 130 --- score: 0.9599118125391808
lr: 1, depth: 11, it: 135 --- score: 0.9601378386127953
lr: 1, depth: 11, it: 140 --- score: 0.9610468347755092
lr: 1, depth: 11, it: 145 --- score: 0.9608167264053943
lr: 1, depth: 12, it: 30 --- score: 0.9592132039739919
l

KeyboardInterrupt: 

In [117]:
print(max_score, params)

0.9904761904761905 (1, 5, 110)


## Adaboost c удалением nan

In [63]:

X_eval = X[-100:]
y_eval = y[-100:]
X_train = X[:-100]
y_train = y[:-100]
model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=7), n_estimators=40, 
                           random_state=0, learning_rate=0.8)
model.fit(X, y)
predictions7 = model.predict(X_test)

In [64]:
predictions7.sum()

352

## xgboost 

In [65]:
model = XGBClassifier(max_depth=9, learning_rate=0.8, n_estimators=40)
model.fit(X, y)
predictions_xg = model.predict(X_test)

  if diff:


In [66]:
predictions_xg.sum()

490

In [67]:
res_predictions_10 = np.zeros(len(X_test))
res_predictions_10[predictions7 == 1] = 1
res_predictions_10[predictions_xg == 1] = 1
res_predictions_10.sum()

733.0

In [68]:
answers10 = pd.read_csv('sample_submission.csv')
answers10['mg'] = np.array(res_predictions_10, dtype=int)
answers10.to_csv('attempt10.csv',  index=False)
answers10.head()

Unnamed: 0,Id,mg
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


## adaboost c предсказаниями вероятностей

In [47]:
X_eval = X[-100:]
y_eval = y[-100:]
X_train = X[:-100]
y_train = y[:-100]
model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=7), n_estimators=40, 
                           random_state=0, learning_rate=0.8)
model.fit(X, y)
predictions8 = model.predict_proba(X_test)
# score = f1_score(y_eval, model.predict(X_eval))


In [48]:
predictions8
res_pred_8 = predictions8[:,1]

In [49]:
res_pred_8[res_pred_8>=0.35] = 1
res_pred_8[res_pred_8<0.35] = 0
res_pred_8.sum()

587.0

In [51]:
answers8 = pd.read_csv('sample_submission.csv')
answers8['mg'] = np.array(res_pred_8, dtype=int)
answers8.to_csv('attempt8.csv',  index=False)
answers8.head()

Unnamed: 0,Id,mg
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


**посылка 5 **- на 0.29437
удаляем nan
+xgboost XGBClassifier(max_depth=9, learning_rate=0.8, n_estimators=40)


## Catboost c удалением nan и только с labelencoding c catfeatures

In [39]:
model = CatBoostClassifier(iterations=180, learning_rate=0.9, depth=9, loss_function='Logloss', od_type='Iter', od_wait=7, )
fit_model = model.fit(X, y, cat_features=[0])
predictions_9 = model.predict(X_test)

0:	learn: 0.4665867	total: 199ms	remaining: 35.6s
1:	learn: 0.3545668	total: 392ms	remaining: 34.9s
2:	learn: 0.3108495	total: 571ms	remaining: 33.7s
3:	learn: 0.2860330	total: 740ms	remaining: 32.6s
4:	learn: 0.2565041	total: 929ms	remaining: 32.5s
5:	learn: 0.2240362	total: 1.11s	remaining: 32.2s
6:	learn: 0.2203625	total: 1.29s	remaining: 32s
7:	learn: 0.1877264	total: 1.49s	remaining: 32s
8:	learn: 0.1816740	total: 1.67s	remaining: 31.6s
9:	learn: 0.1660743	total: 1.85s	remaining: 31.4s
10:	learn: 0.1497205	total: 2.04s	remaining: 31.4s
11:	learn: 0.1465941	total: 2.22s	remaining: 31.1s
12:	learn: 0.1443160	total: 2.41s	remaining: 31s
13:	learn: 0.1312149	total: 2.6s	remaining: 30.8s
14:	learn: 0.1187630	total: 2.79s	remaining: 30.7s
15:	learn: 0.1164855	total: 2.97s	remaining: 30.5s
16:	learn: 0.1129910	total: 3.16s	remaining: 30.3s
17:	learn: 0.1099436	total: 3.35s	remaining: 30.1s
18:	learn: 0.1095976	total: 3.53s	remaining: 29.9s
19:	learn: 0.1080785	total: 3.7s	remaining: 29.6

161:	learn: 0.0054283	total: 30.2s	remaining: 3.35s
162:	learn: 0.0053266	total: 30.4s	remaining: 3.17s
163:	learn: 0.0053017	total: 30.6s	remaining: 2.98s
164:	learn: 0.0051749	total: 30.7s	remaining: 2.79s
165:	learn: 0.0051270	total: 30.9s	remaining: 2.61s
166:	learn: 0.0051061	total: 31.1s	remaining: 2.42s
167:	learn: 0.0050540	total: 31.3s	remaining: 2.23s
168:	learn: 0.0049668	total: 31.5s	remaining: 2.05s
169:	learn: 0.0049608	total: 31.7s	remaining: 1.86s
170:	learn: 0.0049474	total: 31.8s	remaining: 1.68s
171:	learn: 0.0048953	total: 32s	remaining: 1.49s
172:	learn: 0.0047680	total: 32.2s	remaining: 1.3s
173:	learn: 0.0047066	total: 32.4s	remaining: 1.12s
174:	learn: 0.0046927	total: 32.6s	remaining: 931ms
175:	learn: 0.0046766	total: 32.8s	remaining: 745ms
176:	learn: 0.0046582	total: 32.9s	remaining: 558ms
177:	learn: 0.0046512	total: 33.1s	remaining: 372ms
178:	learn: 0.0045295	total: 33.3s	remaining: 186ms
179:	learn: 0.0044483	total: 33.5s	remaining: 0us


In [40]:
predictions_9.sum()

849.0

In [41]:
answers9 = pd.read_csv('sample_submission.csv')
answers9['mg'] = np.array(predictions_9, dtype=int)
answers9.to_csv('attempt9.csv',  index=False)
answers9.head()

Unnamed: 0,Id,mg
0,0,0
1,1,1
2,2,0
3,3,0
4,4,0


## xgboost с удалением nan и xgboost со средним по столбцам

In [69]:
model = XGBClassifier(max_depth=9, learning_rate=0.8, n_estimators=40)
model.fit(X, y)
predictions_nan = model.predict(X_test)

  if diff:


In [70]:
predictions_nan.sum()

490

In [85]:
model = XGBClassifier(max_depth=9, learning_rate=0.8, n_estimators=40)
model.fit(X, y)
predictions_mean = model.predict(X_test)

  if diff:


In [86]:
predictions_mean.sum()

398

In [87]:
res_predictions_11 = np.zeros(len(X_test))
res_predictions_11[predictions_nan == 1] = 1
res_predictions_11[predictions_mean == 1] = 1
res_predictions_11.sum()

726.0

In [89]:
answers11 = pd.read_csv('sample_submission.csv')
answers11['mg'] = np.array(res_predictions_11, dtype=int)
answers11.to_csv('attempt11.csv',  index=False)
answers11.head()

Unnamed: 0,Id,mg
0,0,0
1,1,1
2,2,0
3,3,0
4,4,0


## xgboost с удалением nan и xgboost со nan c  разными параметрами

In [119]:
model = XGBClassifier(max_depth=9, learning_rate=0.8, n_estimators=40)
model.fit(X, y)
predictions_nan1 = model.predict(X_test)

  if diff:


In [120]:
predictions_nan1.sum()

489

In [127]:
model = XGBClassifier(max_depth=10, learning_rate=0.8, n_estimators=30)
model.fit(X, y)
predictions_nan2 = model.predict(X_test)

  if diff:


In [128]:
predictions_nan2.sum()

459

In [129]:
res_predictions_12 = np.zeros(len(X_test))
res_predictions_12[predictions_nan1 == 1] = 1
res_predictions_12[predictions_nan2 == 1] = 1
res_predictions_12.sum()

721.0

In [130]:
answers12 = pd.read_csv('sample_submission.csv')
answers12['mg'] = np.array(res_predictions_12, dtype=int)
answers12.to_csv('attempt12.csv',  index=False)
answers12.head()

Unnamed: 0,Id,mg
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
