In [91]:
import numpy as np
import pandas as pd
import pickle

In [92]:
PATH = "/nmnt/x01-hdd/HCP/data/"

with open(PATH + "not_normed_connectomes", 'rb') as f:
    matrices = pickle.load(f)
    
with open(PATH + "new_roi_thick", 'rb') as f:
    roi_thick = pickle.load(f)
with open(PATH + "new_roi_area", 'rb') as f:
    roi_area = pickle.load(f)
with open(PATH + "new_roi_vol", 'rb') as f:
    roi_vol = pickle.load(f)
    
whole_areas = roi_area.sum(axis=1).reshape(-1,1)
#roi_area /= whole_areas

whole_volumes = roi_vol.sum(axis=1).reshape(-1,1)
#roi_vol /= whole_volumes

In [93]:
def iterate_rows(matrices, ):
    
    '''
    Iterates over whole matrices (n_subjects x n_nodes x n_nodes) and yield:
    
    k - overall idexes 0..n_subjects * n_nodes * (n_nodes-1)/2
    subject_id - 0..n_subjects
    i,j - rows, columns idexes of a single matrix from matrices
    '''
    
    k=-1
    for subject_id in range(matrices.shape[0]):
        u, v = np.triu_indices(matrices.shape[1], k=1)
        for i,j in zip(u,v):
            k+=1
            yield k, subject_id, i, j
            
            
def gen_data(matrices, volume, thickness, area, return_df=True):
    
    '''
    Generates DataFrame for basic training 
    '''
    
    n_subjects, n_regions, _ = matrices.shape
    length = int(n_subjects * n_regions * (n_regions-1) / 2)
    data = np.zeros((length, 9)) # <--- 9 initial columns of the dataframe
    
    for idx, subject_id, i, j in iterate_rows(matrices):
        
        vol_1, thick_1, area_1 = volume[subject_id][i], thickness[subject_id][i], area[subject_id][i]
        vol_2, thick_2, area_2 = volume[subject_id][j], thickness[subject_id][j], area[subject_id][j]
        
        data[idx] = [subject_id, i, j,
                     vol_1, thick_1, area_1,
                     vol_2, thick_2, area_2]
        
    if return_df is True:
        df = pd.DataFrame(data=data, columns = [['subject_id', 'i', 'j',
                                                 'vol1', 'thick1', 'area1',
                                                 'vol2', 'thick2', 'area2']])
        
        cols = [ 'vol1', 'thick1', 'area1', 'vol2', 'thick2', 'area2']
        
        #df[cols] =(df[cols] -  df[cols].mean()) / df[cols].std()
        return df
    
    return data

        
def gen_features(df, ):
    
    df['ar1_th1'] = df['area1'] * df['thick1']
    df['vol1_th1'] = df['vol1'] / df['thick1']
    df['vol1_area1'] = df['vol1'] / df['area1']
    df['vol1_bias'] = df['ar1_th1'] - df['vol1']
    df['area1_bias'] = df['vol1_th1'] - df['area1']   
    df['thick1_bias'] = df['vol1_area1'] - df['thick1']
    
    df['ar2_th2'] = df['area2'] * df['thick2']
    df['vol2_th2'] = df['vol2'] / df['thick2']
    df['vol2_area2'] = df['vol2'] / df['area2']
    df['vol2_bias'] = df['ar2_th2'] - df['vol2']
    df['area2_bias'] = df['vol2_th2'] - df['area2']   
    df['thick2_bias'] = df['vol2_area2'] - df['thick2']
    
def gen_target(df, matrices):
    
    y = []
    for idx, subject_id, i, j in iterate_rows(matrices):
        edge = matrices[subject_id, i, j]
        y.append(edge)
        
        
    y_bin = np.array(y)
    y_bin[y_bin > 0] = 1
    
    y_bin5 = np.array(y)
    y_bin5[y_bin5 < 5] = 0
    y_bin5[y_bin5 >= 5] = 1
    
    
    log_y = np.log1p(y)
    # add within subject binarization ?? it makes no sense
    
    df['edge_weight'] = y
    df['log_edge_weight'] = log_y
    df['edge'] = y_bin
    df['edge5'] = y_bin5
    
    
def subject_split(data, n_splits=3, test_size=.33, random_state=23):
    '''
    yielding indexes of data table
    same subject_id is either in train or test 
    '''
    
    subject_unique = np.arange(data.subject_id.unique().shape[0])
    split = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
    
    for train,test in split.split(subject_unique):
        inds_train, inds_test = data.subject_id.isin(train), data.subject_id.isin(test)
        yield inds_train, inds_test

In [94]:
data = gen_data(matrices, roi_vol, roi_thick, roi_area)
gen_features(data)
gen_target(data, matrices)

In [95]:
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, accuracy_score
from catboost import CatBoostRegressor
from catboost import CatBoostClassifier

In [96]:
f_cols = ['i', 'j', 'vol1', 'thick1', 'area1', 'vol2', 'thick2',
          'area2', 'ar1_th1', 'vol1_th1', 'vol1_area1', 'vol1_bias', 'area1_bias',
          'thick1_bias', 'ar2_th2', 'vol2_th2', 'vol2_area2', 'vol2_bias',
          'area2_bias', 'thick2_bias']

f_cols2 = ['i', 'j', 'vol1', 'thick1', 'area1', 'vol2', 'thick2',
          'area2']

t_cols = ['edge_weight', 'edge', 'log_edge_weight']

In [38]:
regressor = SGDClassifier(loss='log', n_jobs=-1, l1_ratio=1.0, alpha=0.01)

for train, test in subject_split(data, n_splits=1):
    
    target = data['edge']
    X = data[f_cols2]
    X_train, X_test, y_train, y_test = X[train], X[test], target[train], target[test]
    
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict_proba(X_test)
    
    
    y_pred_train = regressor.predict_proba(X_train)
    
    
    print(roc_auc_score(y_train, y_pred_train[:, 1]))
    
    print(roc_auc_score(y_test, y_pred[:, 1]))



0.716039106909
0.712217162337


In [39]:
for n, w in zip(f_cols, regressor.coef_[0]):
    print(n, '   ', w)

i     0.0342383044427
j     -0.0321116728942
vol1     0.071619734537
thick1     -1.1343137848
area1     0.0663978500437
vol2     0.094052354638
thick2     -1.1960707322
area2     0.0884122616606


In [36]:
regressor.coef_

array([[ 0.03098655, -0.02919532]])

In [78]:
from sklearn.metrics import precision_score, recall_score,confusion_matrix

In [98]:
regressor = LogisticRegression(penalty='l2', )

for train, test in subject_split(data, n_splits=1):
    
    target = data['edge']
    X = data[f_cols]
    X_train, X_test, y_train, y_test = X[train], X[test], target[train], target[test]
    
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict_proba(X_test)
    
    
    y_pred_train = regressor.predict_proba(X_train)


t = 0.5
print(t)
print('AUC',roc_auc_score(y_train, y_pred_train[:, 1]), roc_auc_score(y_test, y_pred[:, 1]))
print('Accuracy',accuracy_score(y_train, y_pred_train[:, 1]>t), accuracy_score(y_test, y_pred[:, 1]>t))
print('Precision',precision_score(y_train, y_pred_train[:, 1]>t),
      precision_score(y_test, y_pred[:, 1]>t) )

print('Recall',recall_score(y_train, y_pred_train[:, 1]>t),
      recall_score(y_test, y_pred[:, 1]>t) )

0.5
AUC 0.793093045038 0.793391795351
Accuracy 0.832865252614 0.832118985868
Precision 0.847543972924 0.846452936577
Recall 0.970417782435 0.970794226723


In [88]:
t = 0.7
print(t)
print('AUC',roc_auc_score(y_train, y_pred_train[:, 1]), roc_auc_score(y_test, y_pred[:, 1]))
print('Accuracy',accuracy_score(y_train, y_pred_train[:, 1]>t), accuracy_score(y_test, y_pred[:, 1]>t))
print('Precision',precision_score(y_train, y_pred_train[:, 1]>t),
      precision_score(y_test, y_pred[:, 1]>t) )

print('Recall',recall_score(y_train, y_pred_train[:, 1]>t),
      recall_score(y_test, y_pred[:, 1]>t) )

0.7
AUC 0.813702147178 0.811334839054
Accuracy 0.811498157608 0.808424073009
Precision 0.894894225319 0.893889375606
Recall 0.872201692414 0.868852728772


In [86]:
confusion_matrix(y_test, y_pred[:, 1]>t) / 594558

array([[ 0.12981744,  0.052481  ],
       [ 0.20722789,  0.61047366]])

In [76]:
y_test.shape, y_train.shape

((594558,), (1202784,))

In [81]:
roc_auc_score(y_test, np.ones(594558))

0.5

In [82]:
y_test.mean()

0.8177015781402588

In [None]:
print('AUC',roc_auc_score(y_train, y_pred_train[:, 1]), roc_auc_score(y_test, y_pred[:, 1]))

In [73]:
print('AUC',roc_auc_score(y_train, y_pred_train[:, 1]), roc_auc_score(y_test, y_pred[:, 1]))
print('Accuracy',accuracy_score(y_train, y_pred_train[:, 1]>t), accuracy_score(y_test, y_pred[:, 1]>t))
print('Precision',precision_score(y_train, y_pred_train[:, 1]>t),
      precision_score(y_test, y_pred[:, 1]>t) )

print('Recall',recall_score(y_train, y_pred_train[:, 1]>t),
      recall_score(y_test, y_pred[:, 1]>t) )

IndexError: too many indices for array

In [61]:
f_cols = ['i', 'j', 'vol1', 'thick1', 'area1', 'vol2', 'thick2',
          'area2', 'ar1_th1', 'vol1_th1', 'vol1_area1', 'vol1_bias', 'area1_bias',
          'thick1_bias', 'ar2_th2', 'vol2_th2', 'vol2_area2', 'vol2_bias',
          'area2_bias', 'thick2_bias', 'edge_pred']


t_cols = ['edge_weight', 'edge', 'log_edge_weight']

In [57]:
data['edge_pred'] = 0
data['edge_pred'][test] = y_pred
data['edge_pred'][train] = y_pred_train

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [67]:
data['w_edge_pred'] = 0
data['w_edge_pred'][test] = y_pred
data['w_edge_pred'][train] = y_pred_train

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [71]:
mean_squared_error(data.edge_weight, data.edge_pred * data.w_edge_pred)

3034283.4966090727

In [66]:
regressor = CatBoostRegressor(thread_count=8)

for train, test in subject_split(data, n_splits=1):
    
    target = data['edge_weight']
    X = data[f_cols]
    X_train, X_test, y_train, y_test = X[train], X[test], target[train], target[test]

    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    
    
    y_pred_train = regressor.predict(X_train)
    print('TRAIN mse: {:.2f} TRAIN r2: {:.5f}'.format(mean_squared_error(y_train, y_pred_train), 
                                                  r2_score(y_train, y_pred_train)))
    
    print('TEST mse: {:.2f} TEST r2: {:.5f}'.format(mean_squared_error(y_test, y_pred), 
                                                  r2_score(y_test, y_pred)))

0:	learn: 2154.8387591	total: 186ms	remaining: 3m 5s
1:	learn: 2136.7216047	total: 348ms	remaining: 2m 53s
2:	learn: 2120.1823060	total: 487ms	remaining: 2m 41s
3:	learn: 2102.6346376	total: 612ms	remaining: 2m 32s
4:	learn: 2087.4183338	total: 748ms	remaining: 2m 28s
5:	learn: 2072.7742254	total: 884ms	remaining: 2m 26s
6:	learn: 2058.8940691	total: 1.02s	remaining: 2m 24s
7:	learn: 2045.9379602	total: 1.16s	remaining: 2m 23s
8:	learn: 2031.6263421	total: 1.29s	remaining: 2m 22s
9:	learn: 2018.6564972	total: 1.42s	remaining: 2m 20s
10:	learn: 2007.1487034	total: 1.55s	remaining: 2m 19s
11:	learn: 1994.9592296	total: 1.69s	remaining: 2m 18s
12:	learn: 1982.6605101	total: 1.82s	remaining: 2m 18s
13:	learn: 1972.1901142	total: 1.95s	remaining: 2m 17s
14:	learn: 1962.4679205	total: 2.08s	remaining: 2m 16s
15:	learn: 1951.4988227	total: 2.22s	remaining: 2m 16s
16:	learn: 1941.2839649	total: 2.35s	remaining: 2m 15s
17:	learn: 1932.7901372	total: 2.48s	remaining: 2m 15s
18:	learn: 1924.66341

148:	learn: 1618.7043794	total: 22s	remaining: 2m 6s
149:	learn: 1617.5487615	total: 22.1s	remaining: 2m 6s
150:	learn: 1616.5718950	total: 22.3s	remaining: 2m 6s
151:	learn: 1615.8205099	total: 22.4s	remaining: 2m 5s
152:	learn: 1615.2595175	total: 22.6s	remaining: 2m 5s
153:	learn: 1613.8064543	total: 22.7s	remaining: 2m 5s
154:	learn: 1612.8438833	total: 22.8s	remaining: 2m 5s
155:	learn: 1611.5518739	total: 22.9s	remaining: 2m 4s
156:	learn: 1610.3714508	total: 23.1s	remaining: 2m 4s
157:	learn: 1609.3798953	total: 23.2s	remaining: 2m 4s
158:	learn: 1608.9616142	total: 23.3s	remaining: 2m 4s
159:	learn: 1608.1681360	total: 23.5s	remaining: 2m 3s
160:	learn: 1607.2490862	total: 23.6s	remaining: 2m 3s
161:	learn: 1606.3008160	total: 23.7s	remaining: 2m 3s
162:	learn: 1605.3017242	total: 23.8s	remaining: 2m 3s
163:	learn: 1604.4536244	total: 24s	remaining: 2m 2s
164:	learn: 1603.7953023	total: 24.1s	remaining: 2m 2s
165:	learn: 1602.9402367	total: 24.2s	remaining: 2m 2s
166:	learn: 16

296:	learn: 1516.9375983	total: 41.4s	remaining: 1m 38s
297:	learn: 1516.5512564	total: 41.5s	remaining: 1m 38s
298:	learn: 1515.5778898	total: 41.7s	remaining: 1m 37s
299:	learn: 1514.9592787	total: 41.8s	remaining: 1m 37s
300:	learn: 1514.1085548	total: 41.9s	remaining: 1m 37s
301:	learn: 1513.7512044	total: 42s	remaining: 1m 37s
302:	learn: 1513.0588353	total: 42.2s	remaining: 1m 37s
303:	learn: 1512.1785470	total: 42.3s	remaining: 1m 37s
304:	learn: 1511.6917908	total: 42.4s	remaining: 1m 37s
305:	learn: 1511.0109498	total: 42.6s	remaining: 1m 36s
306:	learn: 1510.3742864	total: 42.7s	remaining: 1m 36s
307:	learn: 1510.0601405	total: 42.8s	remaining: 1m 36s
308:	learn: 1509.4615654	total: 43s	remaining: 1m 36s
309:	learn: 1509.0674627	total: 43.1s	remaining: 1m 36s
310:	learn: 1508.4919725	total: 43.3s	remaining: 1m 36s
311:	learn: 1507.7987577	total: 43.4s	remaining: 1m 35s
312:	learn: 1507.3123727	total: 43.5s	remaining: 1m 35s
313:	learn: 1507.0069136	total: 43.7s	remaining: 1m 

444:	learn: 1446.0981137	total: 1m 1s	remaining: 1m 16s
445:	learn: 1445.7793914	total: 1m 1s	remaining: 1m 16s
446:	learn: 1445.1220427	total: 1m 1s	remaining: 1m 16s
447:	learn: 1444.5587077	total: 1m 1s	remaining: 1m 16s
448:	learn: 1444.3041979	total: 1m 1s	remaining: 1m 16s
449:	learn: 1443.6843512	total: 1m 2s	remaining: 1m 15s
450:	learn: 1443.3862448	total: 1m 2s	remaining: 1m 15s
451:	learn: 1442.9096015	total: 1m 2s	remaining: 1m 15s
452:	learn: 1442.4515982	total: 1m 2s	remaining: 1m 15s
453:	learn: 1441.9091804	total: 1m 2s	remaining: 1m 15s
454:	learn: 1441.5544707	total: 1m 2s	remaining: 1m 15s
455:	learn: 1440.9901452	total: 1m 2s	remaining: 1m 15s
456:	learn: 1440.2781108	total: 1m 2s	remaining: 1m 14s
457:	learn: 1439.5232935	total: 1m 3s	remaining: 1m 14s
458:	learn: 1438.8508856	total: 1m 3s	remaining: 1m 14s
459:	learn: 1438.5232093	total: 1m 3s	remaining: 1m 14s
460:	learn: 1438.3445191	total: 1m 3s	remaining: 1m 14s
461:	learn: 1438.1146799	total: 1m 3s	remaining:

592:	learn: 1390.2602937	total: 1m 20s	remaining: 55.5s
593:	learn: 1390.0853198	total: 1m 20s	remaining: 55.4s
594:	learn: 1389.8171032	total: 1m 21s	remaining: 55.2s
595:	learn: 1389.4726465	total: 1m 21s	remaining: 55.1s
596:	learn: 1389.1472852	total: 1m 21s	remaining: 55s
597:	learn: 1388.6054354	total: 1m 21s	remaining: 54.8s
598:	learn: 1388.3946361	total: 1m 21s	remaining: 54.7s
599:	learn: 1388.1443732	total: 1m 21s	remaining: 54.5s
600:	learn: 1387.8029894	total: 1m 21s	remaining: 54.4s
601:	learn: 1387.2937872	total: 1m 21s	remaining: 54.3s
602:	learn: 1387.0128534	total: 1m 22s	remaining: 54.1s
603:	learn: 1386.7091329	total: 1m 22s	remaining: 54s
604:	learn: 1386.0132987	total: 1m 22s	remaining: 53.9s
605:	learn: 1385.6708625	total: 1m 22s	remaining: 53.7s
606:	learn: 1385.5723283	total: 1m 22s	remaining: 53.6s
607:	learn: 1385.3022026	total: 1m 22s	remaining: 53.4s
608:	learn: 1385.1010656	total: 1m 22s	remaining: 53.3s
609:	learn: 1384.7899556	total: 1m 23s	remaining: 53

740:	learn: 1344.8573038	total: 1m 40s	remaining: 35.2s
741:	learn: 1344.6464068	total: 1m 40s	remaining: 35.1s
742:	learn: 1344.1668969	total: 1m 40s	remaining: 34.9s
743:	learn: 1343.9681197	total: 1m 40s	remaining: 34.8s
744:	learn: 1343.6246668	total: 1m 41s	remaining: 34.7s
745:	learn: 1343.2814426	total: 1m 41s	remaining: 34.5s
746:	learn: 1343.1398767	total: 1m 41s	remaining: 34.4s
747:	learn: 1342.8217037	total: 1m 41s	remaining: 34.2s
748:	learn: 1342.7472907	total: 1m 41s	remaining: 34.1s
749:	learn: 1342.5508588	total: 1m 41s	remaining: 34s
750:	learn: 1342.4619290	total: 1m 41s	remaining: 33.8s
751:	learn: 1342.3497666	total: 1m 42s	remaining: 33.7s
752:	learn: 1341.7887674	total: 1m 42s	remaining: 33.6s
753:	learn: 1341.6203910	total: 1m 42s	remaining: 33.4s
754:	learn: 1341.3132643	total: 1m 42s	remaining: 33.3s
755:	learn: 1341.1605149	total: 1m 42s	remaining: 33.2s
756:	learn: 1340.8849281	total: 1m 42s	remaining: 33s
757:	learn: 1340.7033968	total: 1m 42s	remaining: 32

888:	learn: 1307.4369518	total: 2m	remaining: 15s
889:	learn: 1307.2783115	total: 2m	remaining: 14.9s
890:	learn: 1307.1544206	total: 2m	remaining: 14.8s
891:	learn: 1307.0375788	total: 2m	remaining: 14.6s
892:	learn: 1306.8987805	total: 2m	remaining: 14.5s
893:	learn: 1306.6170930	total: 2m	remaining: 14.4s
894:	learn: 1306.4634522	total: 2m 1s	remaining: 14.2s
895:	learn: 1306.2556265	total: 2m 1s	remaining: 14.1s
896:	learn: 1305.9462355	total: 2m 1s	remaining: 13.9s
897:	learn: 1305.8671271	total: 2m 1s	remaining: 13.8s
898:	learn: 1305.6496236	total: 2m 1s	remaining: 13.7s
899:	learn: 1305.4714513	total: 2m 1s	remaining: 13.5s
900:	learn: 1305.2570652	total: 2m 1s	remaining: 13.4s
901:	learn: 1305.0481732	total: 2m 1s	remaining: 13.3s
902:	learn: 1304.8687817	total: 2m 2s	remaining: 13.1s
903:	learn: 1304.7174907	total: 2m 2s	remaining: 13s
904:	learn: 1304.5119027	total: 2m 2s	remaining: 12.9s
905:	learn: 1304.3410428	total: 2m 2s	remaining: 12.7s
906:	learn: 1304.1526309	total: 

In [64]:
regressor = CatBoostClassifier(thread_count=8)

for train, test in subject_split(data, n_splits=1):
    
    target = data['edge']
    X = data[f_cols]
    X_train, X_test, y_train, y_test = X[train], X[test], target[train], target[test]

    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    
    
    y_pred_train = regressor.predict(X_train)
#     print('TRAIN mse: {:.2f} TRAIN r2: {:.5f}'.format(mean_squared_error(y_train, y_pred_train), 
#                                                   r2_score(y_train, y_pred_train)))
    
#     print('TEST mse: {:.2f} TEST r2: {:.5f}'.format(mean_squared_error(y_test, y_pred), 
#                                                   r2_score(y_test, y_pred)))

0:	learn: 0.6654261	total: 252ms	remaining: 4m 11s
1:	learn: 0.6412194	total: 464ms	remaining: 3m 51s
2:	learn: 0.6191850	total: 673ms	remaining: 3m 43s
3:	learn: 0.5975227	total: 902ms	remaining: 3m 44s
4:	learn: 0.5789328	total: 1.13s	remaining: 3m 45s
5:	learn: 0.5610770	total: 1.36s	remaining: 3m 44s
6:	learn: 0.5445232	total: 1.57s	remaining: 3m 43s
7:	learn: 0.5300333	total: 1.78s	remaining: 3m 40s
8:	learn: 0.5162610	total: 1.98s	remaining: 3m 38s
9:	learn: 0.5036572	total: 2.21s	remaining: 3m 38s
10:	learn: 0.4917173	total: 2.42s	remaining: 3m 38s
11:	learn: 0.4812093	total: 2.64s	remaining: 3m 37s
12:	learn: 0.4710573	total: 2.85s	remaining: 3m 36s
13:	learn: 0.4619284	total: 3.05s	remaining: 3m 35s
14:	learn: 0.4542476	total: 3.25s	remaining: 3m 33s
15:	learn: 0.4462466	total: 3.46s	remaining: 3m 32s
16:	learn: 0.4391626	total: 3.66s	remaining: 3m 31s
17:	learn: 0.4317651	total: 3.86s	remaining: 3m 30s
18:	learn: 0.4250425	total: 4.07s	remaining: 3m 30s
19:	learn: 0.4182759	t

159:	learn: 0.2905640	total: 32.6s	remaining: 2m 51s
160:	learn: 0.2903638	total: 32.9s	remaining: 2m 51s
161:	learn: 0.2901861	total: 33.1s	remaining: 2m 51s
162:	learn: 0.2898955	total: 33.3s	remaining: 2m 50s
163:	learn: 0.2896481	total: 33.5s	remaining: 2m 50s
164:	learn: 0.2893654	total: 33.7s	remaining: 2m 50s
165:	learn: 0.2892107	total: 33.9s	remaining: 2m 50s
166:	learn: 0.2890534	total: 34.1s	remaining: 2m 49s
167:	learn: 0.2888672	total: 34.3s	remaining: 2m 49s
168:	learn: 0.2885904	total: 34.5s	remaining: 2m 49s
169:	learn: 0.2883779	total: 34.7s	remaining: 2m 49s
170:	learn: 0.2881560	total: 34.9s	remaining: 2m 49s
171:	learn: 0.2880153	total: 35.1s	remaining: 2m 48s
172:	learn: 0.2877875	total: 35.3s	remaining: 2m 48s
173:	learn: 0.2875885	total: 35.5s	remaining: 2m 48s
174:	learn: 0.2872944	total: 35.7s	remaining: 2m 48s
175:	learn: 0.2871556	total: 35.9s	remaining: 2m 48s
176:	learn: 0.2870297	total: 36.1s	remaining: 2m 48s
177:	learn: 0.2868248	total: 36.3s	remaining: 

315:	learn: 0.2671099	total: 1m 4s	remaining: 2m 20s
316:	learn: 0.2670158	total: 1m 4s	remaining: 2m 19s
317:	learn: 0.2669185	total: 1m 5s	remaining: 2m 19s
318:	learn: 0.2668485	total: 1m 5s	remaining: 2m 19s
319:	learn: 0.2667598	total: 1m 5s	remaining: 2m 19s
320:	learn: 0.2666398	total: 1m 5s	remaining: 2m 19s
321:	learn: 0.2664824	total: 1m 5s	remaining: 2m 18s
322:	learn: 0.2663798	total: 1m 6s	remaining: 2m 18s
323:	learn: 0.2662806	total: 1m 6s	remaining: 2m 18s
324:	learn: 0.2660796	total: 1m 6s	remaining: 2m 18s
325:	learn: 0.2659993	total: 1m 6s	remaining: 2m 17s
326:	learn: 0.2659118	total: 1m 6s	remaining: 2m 17s
327:	learn: 0.2658153	total: 1m 7s	remaining: 2m 17s
328:	learn: 0.2656970	total: 1m 7s	remaining: 2m 17s
329:	learn: 0.2656205	total: 1m 7s	remaining: 2m 17s
330:	learn: 0.2655154	total: 1m 7s	remaining: 2m 16s
331:	learn: 0.2654026	total: 1m 7s	remaining: 2m 16s
332:	learn: 0.2652856	total: 1m 8s	remaining: 2m 16s
333:	learn: 0.2651775	total: 1m 8s	remaining: 

469:	learn: 0.2530034	total: 1m 35s	remaining: 1m 48s
470:	learn: 0.2529237	total: 1m 36s	remaining: 1m 47s
471:	learn: 0.2528615	total: 1m 36s	remaining: 1m 47s
472:	learn: 0.2527659	total: 1m 36s	remaining: 1m 47s
473:	learn: 0.2527177	total: 1m 36s	remaining: 1m 47s
474:	learn: 0.2526625	total: 1m 36s	remaining: 1m 47s
475:	learn: 0.2525802	total: 1m 37s	remaining: 1m 46s
476:	learn: 0.2525314	total: 1m 37s	remaining: 1m 46s
477:	learn: 0.2524849	total: 1m 37s	remaining: 1m 46s
478:	learn: 0.2524129	total: 1m 37s	remaining: 1m 46s
479:	learn: 0.2523146	total: 1m 37s	remaining: 1m 45s
480:	learn: 0.2522282	total: 1m 38s	remaining: 1m 45s
481:	learn: 0.2521814	total: 1m 38s	remaining: 1m 45s
482:	learn: 0.2521313	total: 1m 38s	remaining: 1m 45s
483:	learn: 0.2520582	total: 1m 38s	remaining: 1m 45s
484:	learn: 0.2520133	total: 1m 38s	remaining: 1m 44s
485:	learn: 0.2519713	total: 1m 39s	remaining: 1m 44s
486:	learn: 0.2519246	total: 1m 39s	remaining: 1m 44s
487:	learn: 0.2518635	total:

622:	learn: 0.2432456	total: 2m 7s	remaining: 1m 16s
623:	learn: 0.2431965	total: 2m 7s	remaining: 1m 16s
624:	learn: 0.2431477	total: 2m 7s	remaining: 1m 16s
625:	learn: 0.2431037	total: 2m 7s	remaining: 1m 16s
626:	learn: 0.2430430	total: 2m 8s	remaining: 1m 16s
627:	learn: 0.2430093	total: 2m 8s	remaining: 1m 15s
628:	learn: 0.2429452	total: 2m 8s	remaining: 1m 15s
629:	learn: 0.2428825	total: 2m 8s	remaining: 1m 15s
630:	learn: 0.2428231	total: 2m 8s	remaining: 1m 15s
631:	learn: 0.2427935	total: 2m 9s	remaining: 1m 15s
632:	learn: 0.2427540	total: 2m 9s	remaining: 1m 14s
633:	learn: 0.2427070	total: 2m 9s	remaining: 1m 14s
634:	learn: 0.2426745	total: 2m 9s	remaining: 1m 14s
635:	learn: 0.2426383	total: 2m 9s	remaining: 1m 14s
636:	learn: 0.2426017	total: 2m 10s	remaining: 1m 14s
637:	learn: 0.2425572	total: 2m 10s	remaining: 1m 13s
638:	learn: 0.2425248	total: 2m 10s	remaining: 1m 13s
639:	learn: 0.2424830	total: 2m 10s	remaining: 1m 13s
640:	learn: 0.2424529	total: 2m 10s	remain

777:	learn: 0.2365669	total: 2m 38s	remaining: 45.3s
778:	learn: 0.2365169	total: 2m 38s	remaining: 45.1s
779:	learn: 0.2364767	total: 2m 39s	remaining: 44.9s
780:	learn: 0.2364520	total: 2m 39s	remaining: 44.7s
781:	learn: 0.2364234	total: 2m 39s	remaining: 44.5s
782:	learn: 0.2363863	total: 2m 39s	remaining: 44.2s
783:	learn: 0.2363320	total: 2m 39s	remaining: 44s
784:	learn: 0.2362988	total: 2m 40s	remaining: 43.8s
785:	learn: 0.2362632	total: 2m 40s	remaining: 43.6s
786:	learn: 0.2362164	total: 2m 40s	remaining: 43.4s
787:	learn: 0.2361739	total: 2m 40s	remaining: 43.2s
788:	learn: 0.2361431	total: 2m 40s	remaining: 43s
789:	learn: 0.2360919	total: 2m 41s	remaining: 42.8s
790:	learn: 0.2360507	total: 2m 41s	remaining: 42.6s
791:	learn: 0.2360193	total: 2m 41s	remaining: 42.4s
792:	learn: 0.2359514	total: 2m 41s	remaining: 42.2s
793:	learn: 0.2358654	total: 2m 41s	remaining: 42s
794:	learn: 0.2358157	total: 2m 42s	remaining: 41.8s
795:	learn: 0.2357936	total: 2m 42s	remaining: 41.6s

935:	learn: 0.2310600	total: 3m 10s	remaining: 13s
936:	learn: 0.2310329	total: 3m 10s	remaining: 12.8s
937:	learn: 0.2309705	total: 3m 10s	remaining: 12.6s
938:	learn: 0.2309362	total: 3m 10s	remaining: 12.4s
939:	learn: 0.2309128	total: 3m 11s	remaining: 12.2s
940:	learn: 0.2308914	total: 3m 11s	remaining: 12s
941:	learn: 0.2308435	total: 3m 11s	remaining: 11.8s
942:	learn: 0.2308048	total: 3m 11s	remaining: 11.6s
943:	learn: 0.2307612	total: 3m 11s	remaining: 11.4s
944:	learn: 0.2307322	total: 3m 12s	remaining: 11.2s
945:	learn: 0.2307072	total: 3m 12s	remaining: 11s
946:	learn: 0.2306933	total: 3m 12s	remaining: 10.8s
947:	learn: 0.2306753	total: 3m 12s	remaining: 10.6s
948:	learn: 0.2306353	total: 3m 12s	remaining: 10.4s
949:	learn: 0.2306121	total: 3m 13s	remaining: 10.2s
950:	learn: 0.2305934	total: 3m 13s	remaining: 9.96s
951:	learn: 0.2305551	total: 3m 13s	remaining: 9.75s
952:	learn: 0.2305341	total: 3m 13s	remaining: 9.55s
953:	learn: 0.2305081	total: 3m 13s	remaining: 9.35s

In [65]:
print('TRAIN AUC: {:.2f}'.format(roc_auc_score(y_train, y_pred_train), ))

print('TEST AUC: {:.2f}'.format(roc_auc_score(y_test, y_pred), ))

TRAIN AUC: 0.79
TEST AUC: 0.79
