# Non-NN models

We should study these notebooks:

https://www.kaggle.com/code/jeroenvdd/tpsapr22-best-non-dl-model-tsflex-powershap?scriptVersionId=94240450

https://www.kaggle.com/code/ambrosm/tpsapr22-best-model-without-nn

In [1]:
input_path = '../input/tabular-playground-series-apr-2022/'
output_path = './'

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score

def load_raw_data(train_or_test='train'):
    file_name = f'{input_path}/{train_or_test}.csv'
    df = pd.read_csv(file_name)
    return df

def load_label(train_or_test='train'):
    file_name = input_path + ('train_labels.csv' if train_or_test=='train' else 'sample_submission.csv')
    df = pd.read_csv(file_name)
    return df['state'].values

def competition_metric(y_true, y_score):
    return roc_auc_score(y_true, y_score)

def evaluate(model, X, y):
    return competition_metric(y, model.predict_proba(X)[:, 1])

def submit(arr):
    df = pd.read_csv(f'{input_path}/sample_submission.csv')
    df['state'] = arr
    df.to_csv(f'{output_path}/submission.csv', index=False)

In [3]:
from sklearn.base import TransformerMixin, BaseEstimator
from scipy.stats import kurtosis
from tsfresh.feature_extraction.extraction import extract_features

def group_splitter(df, nfold=5, random_state=None):
    subject_nums = df['subject'].unique()
    rng = np.random.default_rng(random_state)
    subject_to_setnum = rng.integers(0, nfold, subject_nums.shape[0])
    for i in range(nfold):
        val_subjects = subject_nums[subject_to_setnum == i]
        mask_df_val = df['subject'].isin(val_subjects)
        mask_y_val = mask_df_val.iloc[::60]
        yield mask_df_val, mask_y_val

In [4]:
from sklearn.base import TransformerMixin, BaseEstimator

! pip install pyts

Collecting pyts
  Downloading pyts-0.12.0-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyts
Successfully installed pyts-0.12.0
[0m

In [5]:
from tbr_apr_mds import ElementaryExtractor ,  TsfreshExtractor, MBOP, PartialClassifier 

In [6]:
df = load_raw_data('train')
y = load_label('train')
# df = df.loc[df.sequence<5000]
# y = y[:5000]
zf = df.copy()
zf.iloc[:,3:]=(df.iloc[:,3:]==None).astype(int)
print(zf.head())

   sequence  subject  step  sensor_00  sensor_01  sensor_02  sensor_03  \
0         0       47     0          0          0          0          0   
1         0       47     1          0          0          0          0   
2         0       47     2          0          0          0          0   
3         0       47     3          0          0          0          0   
4         0       47     4          0          0          0          0   

   sensor_04  sensor_05  sensor_06  sensor_07  sensor_08  sensor_09  \
0          0          0          0          0          0          0   
1          0          0          0          0          0          0   
2          0          0          0          0          0          0   
3          0          0          0          0          0          0   
4          0          0          0          0          0          0   

   sensor_10  sensor_11  sensor_12  
0          0          0          0  
1          0          0          0  
2          0     

In [7]:

from lightgbm import LGBMClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.pipeline import make_union
from sklearn.metrics import classification_report
cv_scores = []

extractors = [ElementaryExtractor(), TsfreshExtractor(), MBOP(window_size=4,word_size=4,n_bins=10)]
extractor = make_union(*extractors)

for mask_df_val, mask_y_val in group_splitter(df, nfold=5, random_state=42):
    df_train, y_train = df[~mask_df_val], y[~mask_y_val]
    df_val, y_val = df[mask_df_val], y[mask_y_val]
    X_train = extractor.fit_transform(df_train)
    partition=[]
    k=0
    for t_name,transformer in extractor.transformer_list: #producing partition of indices
        zt = transformer.transform(zf[~mask_df_val])
        partition.append((k,k+zt.shape[1]-1))
        k+=zt.shape[1]
    X_val = extractor.transform(df_val)
    print(partition)
    print(X_train.shape, X_val.shape)
    clfs = []
    for tp in partition:
        clfs.append(PartialClassifier(n_estimators=200, random_state=42,max_depth=10,indices=tp))
    for tp in partition:
        clfs.append(PartialClassifier(n_estimators=200, random_state=42,max_depth=-1,boosting_type="dart",indices=tp))
    clfs.append(LGBMClassifier(n_estimators=200,random_state=42,max_depth=-1))
    estimators = []
    for i in range(len(clfs)):#Stacking Classifier needs name for each estimators 
        estimators.append((str(i),clfs[i]))

    clf = StackingClassifier(
        estimators=estimators, final_estimator=LGBMClassifier(n_estimators=300,random_state=42,max_depth=-1))
    clf.fit(X_train, y_train)
    print(evaluate(clf, X_train, y_train))
    print(evaluate(clf, X_val, y_val))
    print(classification_report(y_val, (clf.predict(X_val) >= 0.5).astype(int), digits=4 ))
    cv_scores.append(evaluate(clf, X_val, y_val))
    print(f'5-fold CV score: {np.mean(cv_scores):.4f}')

0
1


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6
7
8


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
0-th machine fitted
1-th machine fitted
2-th machine fitted
3-th machine fitted
4-th machine fitted
5-th machine fitted
6-th machine fitted
7-th machine fitted
8-th machine fitted
9-th machine fitted
10-th machine fitted
11-th machine fitted
12-th machine fitted
reducing
0-th channel finished
Amount of reduction for 0 is: 79 to 24
1-th channel finished
Amount of reduction for 1 is: 84 to 24
2-th channel finished
Amount of reduction for 2 is: 170 to 73
3-th channel finished
Amount of reduction for 3 is: 80 to 24
4-th channel finished
Amount of reduction for 4 is: 1988 to 24
5-th channel finished
Amount of reduction for 5 is: 435 to 24
6-th channel finished
Amount of reduction for 6 is: 85 to 24
7-th channel finished


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6
7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(20817, 411)
0
1
2


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


3
4
5
6


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(5151, 411)
[(0, 107), (108, 320), (321, 731)]
(20817, 732) (5151, 732)
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on fe

  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
0-th machine fitted
1-th machine fitted
2-th machine fitted
3-th machine fitted
4-th machine fitted
5-th machine fitted
6-th machine fitted
7-th machine fitted
8-th machine fitted
9-th machine fitted
10-th machine fitted
11-th machine fitted
12-th machine fitted
reducing
0-th channel finished
Amount of reduction for 0 is: 80 to 24
1-th channel finished
Amount of reduction for 1 is: 90 to 24
2-th channel finished
Amount of reduction for 2 is: 174 to 73
3-th channel finished
Amount of reduction for 3 is: 90 to 24
4-th channel finished
Amount of reduction for 4 is: 1254 to 24
5-th channel finished
Amount of reduction for 5 is: 245 to 24
6-th channel finished
Amount of reduction for 6 is: 83 to 24
7-th channel finished


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


1
2
3
4
5
6
7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(21369, 411)
0
1
2
3


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


4
5
6
7
8
9
10
11


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(4599, 411)
[(0, 107), (108, 320), (321, 731)]
(21369, 732) (4599, 732)
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on fe

  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
0-th machine fitted
1-th machine fitted
2-th machine fitted
3-th machine fitted
4-th machine fitted
5-th machine fitted
6-th machine fitted
7-th machine fitted
8-th machine fitted
9-th machine fitted
10-th machine fitted
11-th machine fitted
12-th machine fitted
reducing
0-th channel finished
Amount of reduction for 0 is: 78 to 24
1-th channel finished
Amount of reduction for 1 is: 90 to 24
2-th channel finished
Amount of reduction for 2 is: 172 to 73
3-th channel finished
Amount of reduction for 3 is: 89 to 24
4-th channel finished
Amount of reduction for 4 is: 2019 to 24
5-th channel finished
Amount of reduction for 5 is: 457 to 24
6-th channel finished
Amount of reduction for 6 is: 84 to 24
7-th channel finished


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


1
2
3
4
5
6
7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(19964, 411)
0
1
2


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


3
4
5
6
7
8


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(6004, 411)
[(0, 107), (108, 320), (321, 731)]
(19964, 732) (6004, 732)
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on fe

  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
0-th machine fitted
1-th machine fitted
2-th machine fitted
3-th machine fitted
4-th machine fitted
5-th machine fitted
6-th machine fitted
7-th machine fitted
8-th machine fitted
9-th machine fitted
10-th machine fitted
11-th machine fitted
12-th machine fitted
reducing
0-th channel finished
Amount of reduction for 0 is: 82 to 24
1-th channel finished
Amount of reduction for 1 is: 84 to 24
2-th channel finished
Amount of reduction for 2 is: 172 to 73
3-th channel finished
Amount of reduction for 3 is: 89 to 24
4-th channel finished
Amount of reduction for 4 is: 2205 to 24
5-th channel finished
Amount of reduction for 5 is: 469 to 24
6-th channel finished
Amount of reduction for 6 is: 85 to 24
7-th channel finished


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


1
2
3
4
5
6
7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(20714, 411)
0
1
2
3


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


4
5
6
7
8
9
10
11


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(5254, 411)
[(0, 107), (108, 320), (321, 731)]
(20714, 732) (5254, 732)
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on fe

  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
0-th machine fitted
1-th machine fitted
2-th machine fitted
3-th machine fitted
4-th machine fitted
5-th machine fitted
6-th machine fitted
7-th machine fitted
8-th machine fitted
9-th machine fitted
10-th machine fitted
11-th machine fitted
12-th machine fitted
reducing
0-th channel finished
Amount of reduction for 0 is: 80 to 24
1-th channel finished
Amount of reduction for 1 is: 90 to 24
2-th channel finished
Amount of reduction for 2 is: 173 to 73
3-th channel finished
Amount of reduction for 3 is: 91 to 24
4-th channel finished
Amount of reduction for 4 is: 2096 to 24
5-th channel finished
Amount of reduction for 5 is: 465 to 24
6-th channel finished
Amount of reduction for 6 is: 77 to 24
7-th channel finished


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


2
3
4
5
6
7
8
9
10
11
12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(21008, 411)
0
1
2
3


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


4
5
6
7
8
9
10
11


  features[f'sm_{i:0>2}'] = np.nan_to_num(features[f'std_{i:0>2}'] / np.abs(np.mean(channel, axis=1))).clip(-1e30, 1e30)


12
El. finished


  features[f'down_mean_02'] = np.nan_to_num(features[f'down_min_02'] / features[f'down_count_02'], neginf=-40)


0 feature extracted
1 feature extracted
2 feature extracted
3 feature extracted
4 feature extracted
5 feature extracted
6 feature extracted
7 feature extracted
8 feature extracted
9 feature extracted
10 feature extracted
11 feature extracted
12 feature extracted
ts finished
shape=(4960, 411)
[(0, 107), (108, 320), (321, 731)]
(21008, 732) (4960, 732)
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 108 to 320
model fitting on feature 321 to 731
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on feature 0 to 107
model fitting on fe

In [8]:
# clf = LGBMClassifier(num_leaves=31, max_depth=4, n_estimators=100)

# df_train_final = df
# y_train_final = y
# X_train_final = extractor.fit_transform(df_train_final)
# clf.fit(X_train_final, y_train_final)

# df_test_final = load_raw_data('test')
# X_test_final = extractor.transform(df_test_final)
# y_pred = clf.predict_proba(X_test_final)[:, 1]
# submit(y_pred)