In [1]:
import numpy as np 
import pandas as pd 
from catboost import CatBoostClassifier 
from sklearn.model_selection import train_test_split, StratifiedKFold
from tqdm import tqdm
import tensorflow as tf 
import random
from scipy.stats import kurtosis, skew
import time  

In [2]:
train = pd.read_csv('train_features.csv') 
train_labels = pd.read_csv('train_labels.csv') 
test = pd.read_csv('test_features.csv') 
submission = pd.read_csv('sample_submission.csv')

In [3]:
X = tf.reshape(np.array(train.iloc[:,2:]),[-1, 600, 6])
X = np.asarray(X)

In [4]:
y = train_labels['label'].values 

In [5]:
# we have 3125 labels for the original train set 
labels = np.arange(0,3125)

In [6]:
X.shape, y.shape, labels.shape

((3125, 600, 6), (3125,), (3125,))

In [10]:
k = 10 
models = [] 
kfold = StratifiedKFold(n_splits = k, shuffle = True, random_state = 20210808) 
for n_fold, (train_idx, val_idx) in enumerate(kfold.split(X, y)): 
    X_train, X_val = X[train_idx], X[val_idx] 
    y_train, y_val = y[train_idx], y[val_idx] 
    l_train, l_val = labels[train_idx], labels[val_idx]  
        
    ##### augment data #####
    print("... Augmenting Data ...")
    X_augmented = [] 
    y_augmented = []  
    l_augmented = [] 
    idx = 3125
    for i in tqdm(range(X_train.shape[0]), position = 0, leave = True): 
        for j in range(10): 
            shifted = np.roll(X_train[i], int(random.random() * 600)) 
            X_augmented.append(shifted) 
            y_augmented.append(y_train[i])     
            l_augmented.append(idx) 
            idx += 1 
    X_augmented = np.asarray(X_augmented) 
    y_augmented = np.asarray(y_augmented)
    l_augmented = np.asarray(l_augmented) 
    X_train = np.concatenate([X_train, X_augmented]) 
    y_train = np.concatenate([y_train, y_augmented])
    l_train = np.concatenate([l_train, l_augmented])  
    
    
    '''
    ##### feature engineering data ##### 
    print("... Feature Engineering ...")
    X_fourier_real = [] 
    X_fourier_imag = [] 
    for i in tqdm(range(X_train.shape[0]), position = 0, leave = True):  
        real_part = np.fft.fft(X_train[i]).real 
        imag_part = np.fft.fft(X_train[i]).imag 
        X_fourier_real.append(real_part)
        X_fourier_imag.append(imag_part) 
    
    X_fourier_real = np.asarray(X_fourier_real)  
    X_fourier_imag = np.asarray(X_fourier_imag)
    X_train = np.concatenate([X_train, X_fourier_real, X_fourier_imag], axis = 2)   
    
    
    X_val_fourier_real = [] 
    X_val_fourier_imag = [] 
    for i in tqdm(range(X_val.shape[0]), position = 0, leave = True):
        real_part = np.fft.fft(X_val[i]).real 
        imag_part = np.fft.fft(X_val[i]).imag 
        X_val_fourier_real.append(real_part) 
        X_val_fourier_imag.append(imag_part)
    
    X_val_fourier_real = np.asarray(X_val_fourier_real) 
    X_val_fourier_imag = np.asarray(X_val_fourier_imag)
    X_val = np.concatenate([X_val, X_val_fourier_real, X_val_fourier_imag], axis = 2)  
    ''' 
    
    
    ##### create dataframe for X_train and X_val ##### 
    print("... Creating Dataframe ...")
    l_train_flat = [] 
    for i in tqdm(range(len(l_train))):
        for j in range(600): 
            l_train_flat.append(l_train[i]) 
    l_train_flat = np.asarray(l_train_flat)
    
    l_val_flat = [] 
    for i in tqdm(range(len(l_val))): 
        for j in range(600): 
            l_val_flat.append(l_val[i]) 
    l_val_flat = np.asarray(l_val_flat)
    
    start = time.time() 
    print("... Aggregating Train DataFrame ...")
    train_df = pd.DataFrame(np.concatenate(X_train)) 
    train_df.insert(0,'id',l_train_flat) 
    # features = ['id', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
    features = ['id',0,1,2,3,4,5]
    train_df_processed = train_df[features].groupby('id').agg(['max', 'min', 'mean', 'std', 'skew']) 
    
    print("... Aggregating Val DataFrame ...") 
    val_df = pd.DataFrame(np.concatenate(X_val)) 
    val_df.insert(0, 'id', l_val_flat)  
    val_df_processed = val_df[features].groupby('id').agg(['max', 'min', 'mean', 'std', 'skew'])   
    
    print("total pre-processing time = {}".format(time.time() - start))  
    
    
    print("... Begin Training ...") 
    print("Validating on fold {}".format(n_fold))

    model = CatBoostClassifier(iterations = 20000, 
                               learning_rate = 0.01, 
                               l2_leaf_reg = 3.5,
                               depth = 8, 
                               loss_function = 'MultiClass',
                               eval_metric = 'MultiClass',
                               use_best_model = True, 
                               task_type = 'GPU',
                               verbose = 200,
                               random_seed = 88888,
                               classes_count = 61) 
    
    
    model.fit(train_df_processed, y_train, eval_set = (val_df_processed, y_val))
    
    model.save_model('augmented_catboost_' + str(n_fold))  # save model checkpoint just in case 
    models.append(model) 


  9%|▉         | 257/2812 [00:00<00:00, 2564.32it/s]

... Augmenting Data ...


100%|██████████| 2812/2812 [00:01<00:00, 2691.32it/s]
  2%|▏         | 753/30932 [00:00<00:04, 7522.24it/s]

... Creating Dataframe ...


100%|██████████| 30932/30932 [00:04<00:00, 7544.79it/s]
100%|██████████| 313/313 [00:00<00:00, 7623.35it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 43.76103615760803
... Begin Training ...
Validating on fold 0
0:	learn: 3.9230942	test: 3.9301867	best: 3.9301867 (0)	total: 48.7ms	remaining: 16m 14s




200:	learn: 1.8543712	test: 1.8255264	best: 1.8255264 (200)	total: 8.55s	remaining: 14m 2s
400:	learn: 1.4494270	test: 1.5577905	best: 1.5577905 (400)	total: 17.7s	remaining: 14m 26s
600:	learn: 1.1723527	test: 1.3830990	best: 1.3830990 (600)	total: 26.8s	remaining: 14m 24s
800:	learn: 0.9717152	test: 1.2708054	best: 1.2708054 (800)	total: 35.5s	remaining: 14m 9s
1000:	learn: 0.8236964	test: 1.1975164	best: 1.1975164 (1000)	total: 44.3s	remaining: 14m 1s
1200:	learn: 0.7071188	test: 1.1450898	best: 1.1450898 (1200)	total: 53.1s	remaining: 13m 51s
1400:	learn: 0.6132337	test: 1.1055810	best: 1.1055810 (1400)	total: 1m 1s	remaining: 13m 42s
1600:	learn: 0.5363238	test: 1.0748266	best: 1.0748266 (1600)	total: 1m 10s	remaining: 13m 31s
1800:	learn: 0.4734472	test: 1.0512019	best: 1.0512019 (1800)	total: 1m 19s	remaining: 13m 21s
2000:	learn: 0.4183920	test: 1.0304012	best: 1.0304012 (2000)	total: 1m 28s	remaining: 13m 12s
2200:	learn: 0.3733191	test: 1.0156154	best: 1.0156154 (2200)	total:

17600:	learn: 0.0186467	test: 0.9711881	best: 0.9352655 (6505)	total: 12m 55s	remaining: 1m 45s
17800:	learn: 0.0183422	test: 0.9720040	best: 0.9352655 (6505)	total: 13m 4s	remaining: 1m 36s
18000:	learn: 0.0180428	test: 0.9727031	best: 0.9352655 (6505)	total: 13m 12s	remaining: 1m 28s
18200:	learn: 0.0177531	test: 0.9730887	best: 0.9352655 (6505)	total: 13m 21s	remaining: 1m 19s
18400:	learn: 0.0174604	test: 0.9739314	best: 0.9352655 (6505)	total: 13m 29s	remaining: 1m 10s
18600:	learn: 0.0171828	test: 0.9748362	best: 0.9352655 (6505)	total: 13m 38s	remaining: 1m 1s
18800:	learn: 0.0168998	test: 0.9757236	best: 0.9352655 (6505)	total: 13m 47s	remaining: 52.7s
19000:	learn: 0.0166396	test: 0.9764342	best: 0.9352655 (6505)	total: 13m 55s	remaining: 43.9s
19200:	learn: 0.0163888	test: 0.9769689	best: 0.9352655 (6505)	total: 14m 4s	remaining: 35.1s
19400:	learn: 0.0161421	test: 0.9776088	best: 0.9352655 (6505)	total: 14m 13s	remaining: 26.3s
19600:	learn: 0.0158999	test: 0.9783961	best: 0

 11%|█         | 310/2812 [00:00<00:00, 3094.96it/s]

... Augmenting Data ...


100%|██████████| 2812/2812 [00:00<00:00, 3145.11it/s]
  3%|▎         | 800/30932 [00:00<00:03, 7995.78it/s]

... Creating Dataframe ...


100%|██████████| 30932/30932 [00:03<00:00, 8015.51it/s]
100%|██████████| 313/313 [00:00<00:00, 8511.08it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 43.94077968597412
... Begin Training ...
Validating on fold 1




0:	learn: 3.9221950	test: 3.9286400	best: 3.9286400 (0)	total: 49.5ms	remaining: 16m 30s
200:	learn: 1.8633969	test: 1.8523528	best: 1.8523528 (200)	total: 8.57s	remaining: 14m 4s
400:	learn: 1.4453965	test: 1.5687968	best: 1.5687968 (400)	total: 17.8s	remaining: 14m 30s
600:	learn: 1.1670604	test: 1.3884381	best: 1.3884381 (600)	total: 27s	remaining: 14m 32s
800:	learn: 0.9615926	test: 1.2665339	best: 1.2665339 (800)	total: 36.2s	remaining: 14m 26s
1000:	learn: 0.8097997	test: 1.1832285	best: 1.1832285 (1000)	total: 45.4s	remaining: 14m 20s
1200:	learn: 0.6966270	test: 1.1252176	best: 1.1252176 (1200)	total: 54.6s	remaining: 14m 14s
1400:	learn: 0.6049750	test: 1.0808531	best: 1.0808531 (1400)	total: 1m 3s	remaining: 14m 5s
1600:	learn: 0.5293781	test: 1.0429876	best: 1.0429876 (1600)	total: 1m 12s	remaining: 13m 56s
1800:	learn: 0.4665264	test: 1.0147794	best: 1.0147794 (1800)	total: 1m 22s	remaining: 13m 48s
2000:	learn: 0.4154814	test: 0.9890581	best: 0.9890581 (2000)	total: 1m 31s

17400:	learn: 0.0187917	test: 0.8554625	best: 0.8450433 (8815)	total: 13m	remaining: 1m 56s
17600:	learn: 0.0184745	test: 0.8558144	best: 0.8450433 (8815)	total: 13m 8s	remaining: 1m 47s
17800:	learn: 0.0181641	test: 0.8561745	best: 0.8450433 (8815)	total: 13m 17s	remaining: 1m 38s
18000:	learn: 0.0178616	test: 0.8567066	best: 0.8450433 (8815)	total: 13m 26s	remaining: 1m 29s
18200:	learn: 0.0175645	test: 0.8570122	best: 0.8450433 (8815)	total: 13m 35s	remaining: 1m 20s
18400:	learn: 0.0172843	test: 0.8573469	best: 0.8450433 (8815)	total: 13m 44s	remaining: 1m 11s
18600:	learn: 0.0170088	test: 0.8576236	best: 0.8450433 (8815)	total: 13m 52s	remaining: 1m 2s
18800:	learn: 0.0167444	test: 0.8578699	best: 0.8450433 (8815)	total: 14m 1s	remaining: 53.7s
19000:	learn: 0.0164883	test: 0.8581954	best: 0.8450433 (8815)	total: 14m 10s	remaining: 44.7s
19200:	learn: 0.0162485	test: 0.8584879	best: 0.8450433 (8815)	total: 14m 19s	remaining: 35.8s
19400:	learn: 0.0160115	test: 0.8588902	best: 0.84

  9%|▊         | 242/2812 [00:00<00:01, 2411.58it/s]

... Augmenting Data ...


100%|██████████| 2812/2812 [00:00<00:00, 2880.97it/s]
  3%|▎         | 817/30932 [00:00<00:03, 8168.98it/s]

... Creating Dataframe ...


100%|██████████| 30932/30932 [00:03<00:00, 7793.04it/s]
100%|██████████| 313/313 [00:00<00:00, 8440.71it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 40.281172037124634
... Begin Training ...
Validating on fold 2




0:	learn: 3.9279875	test: 3.9252918	best: 3.9252918 (0)	total: 49.1ms	remaining: 16m 22s
200:	learn: 1.8598702	test: 1.8576065	best: 1.8576065 (200)	total: 8.53s	remaining: 14m
400:	learn: 1.4345714	test: 1.5366556	best: 1.5366556 (400)	total: 17.7s	remaining: 14m 23s
600:	learn: 1.1497379	test: 1.3513750	best: 1.3513750 (600)	total: 26.6s	remaining: 14m 18s
800:	learn: 0.9484400	test: 1.2283362	best: 1.2283362 (800)	total: 35.6s	remaining: 14m 14s
1000:	learn: 0.8000902	test: 1.1426782	best: 1.1426782 (1000)	total: 44.8s	remaining: 14m 10s
1200:	learn: 0.6867023	test: 1.0818297	best: 1.0818297 (1200)	total: 54s	remaining: 14m 4s
1400:	learn: 0.5958914	test: 1.0387901	best: 1.0387901 (1400)	total: 1m 3s	remaining: 13m 57s
1600:	learn: 0.5235174	test: 1.0029167	best: 1.0029167 (1600)	total: 1m 12s	remaining: 13m 49s
1800:	learn: 0.4613134	test: 0.9758030	best: 0.9758030 (1799)	total: 1m 21s	remaining: 13m 40s
2000:	learn: 0.4107690	test: 0.9491476	best: 0.9491476 (2000)	total: 1m 30s	re

17400:	learn: 0.0186825	test: 0.8089578	best: 0.7969122 (8541)	total: 12m 52s	remaining: 1m 55s
17600:	learn: 0.0183622	test: 0.8094512	best: 0.7969122 (8541)	total: 13m	remaining: 1m 46s
17800:	learn: 0.0180603	test: 0.8099088	best: 0.7969122 (8541)	total: 13m 9s	remaining: 1m 37s
18000:	learn: 0.0177617	test: 0.8104637	best: 0.7969122 (8541)	total: 13m 18s	remaining: 1m 28s
18200:	learn: 0.0174680	test: 0.8106797	best: 0.7969122 (8541)	total: 13m 27s	remaining: 1m 19s
18400:	learn: 0.0171826	test: 0.8112666	best: 0.7969122 (8541)	total: 13m 35s	remaining: 1m 10s
18600:	learn: 0.0169050	test: 0.8117346	best: 0.7969122 (8541)	total: 13m 44s	remaining: 1m 2s
18800:	learn: 0.0166402	test: 0.8122782	best: 0.7969122 (8541)	total: 13m 53s	remaining: 53.2s
19000:	learn: 0.0163857	test: 0.8125332	best: 0.7969122 (8541)	total: 14m 2s	remaining: 44.3s
19200:	learn: 0.0161359	test: 0.8132457	best: 0.7969122 (8541)	total: 14m 11s	remaining: 35.4s
19400:	learn: 0.0158896	test: 0.8135913	best: 0.79

 12%|█▏        | 350/2812 [00:00<00:00, 3491.81it/s]

... Augmenting Data ...


100%|██████████| 2812/2812 [00:00<00:00, 3527.08it/s]
  3%|▎         | 814/30932 [00:00<00:03, 8134.81it/s]

... Creating Dataframe ...


100%|██████████| 30932/30932 [00:03<00:00, 7987.08it/s]
100%|██████████| 313/313 [00:00<00:00, 7742.04it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 39.623345375061035
... Begin Training ...
Validating on fold 3




0:	learn: 3.9210981	test: 3.9349502	best: 3.9349502 (0)	total: 47.8ms	remaining: 15m 55s
200:	learn: 1.8620704	test: 1.8546919	best: 1.8546919 (200)	total: 8.58s	remaining: 14m 5s
400:	learn: 1.4348466	test: 1.5281703	best: 1.5281703 (400)	total: 17.8s	remaining: 14m 29s
600:	learn: 1.1520880	test: 1.3359804	best: 1.3359804 (600)	total: 27s	remaining: 14m 30s
800:	learn: 0.9570864	test: 1.2250367	best: 1.2250367 (800)	total: 36.1s	remaining: 14m 24s
1000:	learn: 0.8075652	test: 1.1427632	best: 1.1427632 (1000)	total: 45.1s	remaining: 14m 16s
1200:	learn: 0.6914529	test: 1.0829690	best: 1.0829690 (1200)	total: 54.2s	remaining: 14m 7s
1400:	learn: 0.6011160	test: 1.0419052	best: 1.0419052 (1400)	total: 1m 3s	remaining: 13m 59s
1600:	learn: 0.5273128	test: 1.0101416	best: 1.0101416 (1600)	total: 1m 12s	remaining: 13m 51s
1800:	learn: 0.4668953	test: 0.9832112	best: 0.9832112 (1800)	total: 1m 21s	remaining: 13m 42s
2000:	learn: 0.4173242	test: 0.9627389	best: 0.9627389 (2000)	total: 1m 30s

17400:	learn: 0.0189089	test: 0.8712402	best: 0.8465885 (8477)	total: 12m 59s	remaining: 1m 56s
17600:	learn: 0.0185907	test: 0.8719398	best: 0.8465885 (8477)	total: 13m 8s	remaining: 1m 47s
17800:	learn: 0.0182764	test: 0.8725658	best: 0.8465885 (8477)	total: 13m 16s	remaining: 1m 38s
18000:	learn: 0.0179785	test: 0.8731447	best: 0.8465885 (8477)	total: 13m 25s	remaining: 1m 29s
18200:	learn: 0.0176939	test: 0.8738828	best: 0.8465885 (8477)	total: 13m 34s	remaining: 1m 20s
18400:	learn: 0.0174064	test: 0.8743778	best: 0.8465885 (8477)	total: 13m 43s	remaining: 1m 11s
18600:	learn: 0.0171289	test: 0.8748760	best: 0.8465885 (8477)	total: 13m 52s	remaining: 1m 2s
18800:	learn: 0.0168651	test: 0.8757781	best: 0.8465885 (8477)	total: 14m 1s	remaining: 53.6s
19000:	learn: 0.0166041	test: 0.8765578	best: 0.8465885 (8477)	total: 14m 9s	remaining: 44.7s
19200:	learn: 0.0163519	test: 0.8771406	best: 0.8465885 (8477)	total: 14m 19s	remaining: 35.8s
19400:	learn: 0.0161053	test: 0.8778875	best: 0

 10%|█         | 283/2812 [00:00<00:00, 2824.84it/s]

... Augmenting Data ...


100%|██████████| 2812/2812 [00:00<00:00, 3424.85it/s]
  3%|▎         | 813/30932 [00:00<00:03, 8126.99it/s]

... Creating Dataframe ...


100%|██████████| 30932/30932 [00:03<00:00, 7892.19it/s]
100%|██████████| 313/313 [00:00<00:00, 8249.86it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 41.55143189430237
... Begin Training ...
Validating on fold 4
0:	learn: 3.9218177	test: 3.9286719	best: 3.9286719 (0)	total: 50.5ms	remaining: 16m 49s




200:	learn: 1.8590340	test: 1.8710224	best: 1.8710224 (200)	total: 8.59s	remaining: 14m 6s
400:	learn: 1.4333643	test: 1.5685215	best: 1.5685215 (400)	total: 17.8s	remaining: 14m 31s
600:	learn: 1.1551173	test: 1.3781262	best: 1.3781262 (600)	total: 27s	remaining: 14m 31s
800:	learn: 0.9555665	test: 1.2595681	best: 1.2595681 (800)	total: 36.1s	remaining: 14m 25s
1000:	learn: 0.8061918	test: 1.1757536	best: 1.1757536 (1000)	total: 45.3s	remaining: 14m 20s
1200:	learn: 0.6936664	test: 1.1236626	best: 1.1236626 (1200)	total: 54.4s	remaining: 14m 12s
1400:	learn: 0.6006745	test: 1.0744488	best: 1.0744488 (1400)	total: 1m 3s	remaining: 14m 3s
1600:	learn: 0.5249842	test: 1.0405361	best: 1.0405361 (1600)	total: 1m 12s	remaining: 13m 55s
1800:	learn: 0.4616854	test: 1.0113014	best: 1.0113014 (1800)	total: 1m 21s	remaining: 13m 46s
2000:	learn: 0.4109254	test: 0.9894379	best: 0.9894278 (1998)	total: 1m 30s	remaining: 13m 36s
2200:	learn: 0.3678679	test: 0.9708947	best: 0.9708947 (2200)	total: 

17600:	learn: 0.0181221	test: 0.8914902	best: 0.8630020 (8205)	total: 13m 6s	remaining: 1m 47s
17800:	learn: 0.0178245	test: 0.8921023	best: 0.8630020 (8205)	total: 13m 15s	remaining: 1m 38s
18000:	learn: 0.0175276	test: 0.8927596	best: 0.8630020 (8205)	total: 13m 23s	remaining: 1m 29s
18200:	learn: 0.0172398	test: 0.8931755	best: 0.8630020 (8205)	total: 13m 32s	remaining: 1m 20s
18400:	learn: 0.0169611	test: 0.8939114	best: 0.8630020 (8205)	total: 13m 41s	remaining: 1m 11s
18600:	learn: 0.0166885	test: 0.8944226	best: 0.8630020 (8205)	total: 13m 50s	remaining: 1m 2s
18800:	learn: 0.0164359	test: 0.8949536	best: 0.8630020 (8205)	total: 13m 59s	remaining: 53.5s
19000:	learn: 0.0161846	test: 0.8956207	best: 0.8630020 (8205)	total: 14m 8s	remaining: 44.6s
19200:	learn: 0.0159320	test: 0.8962108	best: 0.8630020 (8205)	total: 14m 16s	remaining: 35.7s
19400:	learn: 0.0156901	test: 0.8967105	best: 0.8630020 (8205)	total: 14m 25s	remaining: 26.7s
19600:	learn: 0.0154662	test: 0.8973874	best: 0

 13%|█▎        | 354/2813 [00:00<00:00, 3534.10it/s]

... Augmenting Data ...


100%|██████████| 2813/2813 [00:00<00:00, 3545.29it/s]
  2%|▏         | 758/30943 [00:00<00:03, 7576.21it/s]

... Creating Dataframe ...


100%|██████████| 30943/30943 [00:03<00:00, 7967.44it/s]
100%|██████████| 312/312 [00:00<00:00, 8126.33it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 39.53170919418335
... Begin Training ...
Validating on fold 5




0:	learn: 3.9221719	test: 3.9224693	best: 3.9224693 (0)	total: 48.3ms	remaining: 16m 6s
200:	learn: 1.8503323	test: 1.8250439	best: 1.8250439 (200)	total: 8.56s	remaining: 14m 3s
400:	learn: 1.4304227	test: 1.5361528	best: 1.5361528 (400)	total: 17.7s	remaining: 14m 26s
600:	learn: 1.1580571	test: 1.3680157	best: 1.3680157 (600)	total: 26.9s	remaining: 14m 28s
800:	learn: 0.9618128	test: 1.2630961	best: 1.2630961 (800)	total: 36.1s	remaining: 14m 24s
1000:	learn: 0.8134488	test: 1.1800199	best: 1.1800199 (1000)	total: 45.2s	remaining: 14m 18s
1200:	learn: 0.6955166	test: 1.1203134	best: 1.1203134 (1200)	total: 54.4s	remaining: 14m 11s
1400:	learn: 0.6009672	test: 1.0791749	best: 1.0791749 (1400)	total: 1m 3s	remaining: 14m 3s
1600:	learn: 0.5256620	test: 1.0458856	best: 1.0458856 (1600)	total: 1m 12s	remaining: 13m 55s
1800:	learn: 0.4637374	test: 1.0188982	best: 1.0188982 (1800)	total: 1m 21s	remaining: 13m 47s
2000:	learn: 0.4105113	test: 0.9984505	best: 0.9984505 (2000)	total: 1m 30

17400:	learn: 0.0187112	test: 0.9270164	best: 0.8986850 (7241)	total: 12m 57s	remaining: 1m 56s
17600:	learn: 0.0183950	test: 0.9278950	best: 0.8986850 (7241)	total: 13m 6s	remaining: 1m 47s
17800:	learn: 0.0180886	test: 0.9284389	best: 0.8986850 (7241)	total: 13m 14s	remaining: 1m 38s
18000:	learn: 0.0177787	test: 0.9289710	best: 0.8986850 (7241)	total: 13m 23s	remaining: 1m 29s
18200:	learn: 0.0174853	test: 0.9299640	best: 0.8986850 (7241)	total: 13m 32s	remaining: 1m 20s
18400:	learn: 0.0172095	test: 0.9305528	best: 0.8986850 (7241)	total: 13m 41s	remaining: 1m 11s
18600:	learn: 0.0169375	test: 0.9311652	best: 0.8986850 (7241)	total: 13m 50s	remaining: 1m 2s
18800:	learn: 0.0166692	test: 0.9319638	best: 0.8986850 (7241)	total: 14m	remaining: 53.6s
19000:	learn: 0.0164052	test: 0.9327972	best: 0.8986850 (7241)	total: 14m 9s	remaining: 44.6s
19200:	learn: 0.0161534	test: 0.9334285	best: 0.8986850 (7241)	total: 14m 18s	remaining: 35.7s
19400:	learn: 0.0159128	test: 0.9341737	best: 0.89

 12%|█▏        | 341/2813 [00:00<00:00, 3408.07it/s]

... Augmenting Data ...


100%|██████████| 2813/2813 [00:00<00:00, 3613.01it/s]
  3%|▎         | 794/30943 [00:00<00:03, 7937.07it/s]

... Creating Dataframe ...


100%|██████████| 30943/30943 [00:03<00:00, 7758.22it/s]
100%|██████████| 312/312 [00:00<00:00, 8577.14it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 38.98221182823181
... Begin Training ...
Validating on fold 6




0:	learn: 3.9234548	test: 3.9166808	best: 3.9166808 (0)	total: 51.6ms	remaining: 17m 11s
200:	learn: 1.8748909	test: 1.8426113	best: 1.8426113 (200)	total: 8.68s	remaining: 14m 15s
400:	learn: 1.4556393	test: 1.5457681	best: 1.5457681 (400)	total: 18s	remaining: 14m 40s
600:	learn: 1.1719259	test: 1.3518140	best: 1.3518140 (600)	total: 27.2s	remaining: 14m 36s
800:	learn: 0.9676649	test: 1.2377692	best: 1.2377692 (800)	total: 36.3s	remaining: 14m 29s
1000:	learn: 0.8173232	test: 1.1587437	best: 1.1587437 (1000)	total: 45.4s	remaining: 14m 22s
1200:	learn: 0.7011111	test: 1.0955042	best: 1.0955042 (1200)	total: 54.7s	remaining: 14m 15s
1400:	learn: 0.6065189	test: 1.0490174	best: 1.0490174 (1400)	total: 1m 3s	remaining: 14m 7s
1600:	learn: 0.5319816	test: 1.0146381	best: 1.0146381 (1600)	total: 1m 13s	remaining: 13m 58s
1800:	learn: 0.4687705	test: 0.9849879	best: 0.9849879 (1800)	total: 1m 21s	remaining: 13m 46s
2000:	learn: 0.4171910	test: 0.9622332	best: 0.9622116 (1999)	total: 1m 30

17400:	learn: 0.0187812	test: 0.8741525	best: 0.8532351 (6631)	total: 13m 4s	remaining: 1m 57s
17600:	learn: 0.0184658	test: 0.8743983	best: 0.8532351 (6631)	total: 13m 12s	remaining: 1m 48s
17800:	learn: 0.0181550	test: 0.8749469	best: 0.8532351 (6631)	total: 13m 21s	remaining: 1m 39s
18000:	learn: 0.0178495	test: 0.8753575	best: 0.8532351 (6631)	total: 13m 30s	remaining: 1m 30s
18200:	learn: 0.0175593	test: 0.8756781	best: 0.8532351 (6631)	total: 13m 39s	remaining: 1m 21s
18400:	learn: 0.0172798	test: 0.8764409	best: 0.8532351 (6631)	total: 13m 48s	remaining: 1m 11s
18600:	learn: 0.0170038	test: 0.8769447	best: 0.8532351 (6631)	total: 13m 57s	remaining: 1m 2s
18800:	learn: 0.0167401	test: 0.8772736	best: 0.8532351 (6631)	total: 14m 6s	remaining: 54s
19000:	learn: 0.0164842	test: 0.8776830	best: 0.8532351 (6631)	total: 14m 15s	remaining: 45s
19200:	learn: 0.0162368	test: 0.8781502	best: 0.8532351 (6631)	total: 14m 24s	remaining: 36s
19400:	learn: 0.0159955	test: 0.8788022	best: 0.8532

 11%|█         | 305/2813 [00:00<00:00, 3041.51it/s]

... Augmenting Data ...


100%|██████████| 2813/2813 [00:00<00:00, 3083.22it/s]
  3%|▎         | 813/30943 [00:00<00:03, 8126.15it/s]

... Creating Dataframe ...


100%|██████████| 30943/30943 [00:04<00:00, 7664.76it/s]
100%|██████████| 312/312 [00:00<00:00, 8209.83it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 42.48988699913025
... Begin Training ...
Validating on fold 7




0:	learn: 3.9202983	test: 3.9286022	best: 3.9286022 (0)	total: 49.8ms	remaining: 16m 36s
200:	learn: 1.8614097	test: 1.8937341	best: 1.8937341 (200)	total: 8.49s	remaining: 13m 56s
400:	learn: 1.4416392	test: 1.5846137	best: 1.5846137 (400)	total: 17.7s	remaining: 14m 25s
600:	learn: 1.1631747	test: 1.4040032	best: 1.4040032 (600)	total: 26.8s	remaining: 14m 26s
800:	learn: 0.9635615	test: 1.2877009	best: 1.2877009 (800)	total: 36s	remaining: 14m 22s
1000:	learn: 0.8120293	test: 1.1942127	best: 1.1942127 (1000)	total: 45.1s	remaining: 14m 15s
1200:	learn: 0.6947216	test: 1.1295782	best: 1.1295782 (1200)	total: 54.3s	remaining: 14m 10s
1400:	learn: 0.6003121	test: 1.0805045	best: 1.0805045 (1400)	total: 1m 3s	remaining: 14m 2s
1600:	learn: 0.5241976	test: 1.0422490	best: 1.0422490 (1600)	total: 1m 12s	remaining: 13m 54s
1800:	learn: 0.4640188	test: 1.0110134	best: 1.0110134 (1800)	total: 1m 21s	remaining: 13m 45s
2000:	learn: 0.4121886	test: 0.9857343	best: 0.9857343 (2000)	total: 1m 30

17400:	learn: 0.0188606	test: 0.8216349	best: 0.8166096 (13207)	total: 12m 59s	remaining: 1m 56s
17600:	learn: 0.0185385	test: 0.8221758	best: 0.8166096 (13207)	total: 13m 8s	remaining: 1m 47s
17800:	learn: 0.0182327	test: 0.8222167	best: 0.8166096 (13207)	total: 13m 17s	remaining: 1m 38s
18000:	learn: 0.0179322	test: 0.8224276	best: 0.8166096 (13207)	total: 13m 26s	remaining: 1m 29s
18200:	learn: 0.0176283	test: 0.8225301	best: 0.8166096 (13207)	total: 13m 35s	remaining: 1m 20s
18400:	learn: 0.0173445	test: 0.8229665	best: 0.8166096 (13207)	total: 13m 44s	remaining: 1m 11s
18600:	learn: 0.0170707	test: 0.8231545	best: 0.8166096 (13207)	total: 13m 53s	remaining: 1m 2s
18800:	learn: 0.0167995	test: 0.8235746	best: 0.8166096 (13207)	total: 14m 2s	remaining: 53.7s
19000:	learn: 0.0165361	test: 0.8240138	best: 0.8166096 (13207)	total: 14m 11s	remaining: 44.8s
19200:	learn: 0.0162813	test: 0.8242594	best: 0.8166096 (13207)	total: 14m 20s	remaining: 35.8s
19400:	learn: 0.0160361	test: 0.8241

 12%|█▏        | 332/2813 [00:00<00:00, 3314.37it/s]

... Augmenting Data ...


100%|██████████| 2813/2813 [00:00<00:00, 3514.61it/s]
  3%|▎         | 826/30943 [00:00<00:03, 8255.70it/s]

... Creating Dataframe ...


100%|██████████| 30943/30943 [00:03<00:00, 7837.69it/s]
100%|██████████| 312/312 [00:00<00:00, 6559.84it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 40.067867279052734
... Begin Training ...
Validating on fold 8




0:	learn: 3.9279037	test: 3.9190404	best: 3.9190404 (0)	total: 83ms	remaining: 27m 39s
200:	learn: 1.8645645	test: 1.8423658	best: 1.8423658 (200)	total: 8.55s	remaining: 14m 1s
400:	learn: 1.4405729	test: 1.5205352	best: 1.5205352 (400)	total: 17.6s	remaining: 14m 19s
600:	learn: 1.1592049	test: 1.3231917	best: 1.3231917 (600)	total: 26.6s	remaining: 14m 17s
800:	learn: 0.9597359	test: 1.1967622	best: 1.1967622 (800)	total: 35.5s	remaining: 14m 11s
1000:	learn: 0.8118605	test: 1.1159689	best: 1.1159689 (1000)	total: 44.4s	remaining: 14m 2s
1200:	learn: 0.6970526	test: 1.0581273	best: 1.0581273 (1200)	total: 53.5s	remaining: 13m 56s
1400:	learn: 0.6038428	test: 1.0105258	best: 1.0105258 (1400)	total: 1m 2s	remaining: 13m 49s
1600:	learn: 0.5288530	test: 0.9757195	best: 0.9756938 (1599)	total: 1m 11s	remaining: 13m 42s
1800:	learn: 0.4670114	test: 0.9449829	best: 0.9449258 (1799)	total: 1m 20s	remaining: 13m 34s
2000:	learn: 0.4146177	test: 0.9223411	best: 0.9223411 (2000)	total: 1m 29s

17400:	learn: 0.0185803	test: 0.7673940	best: 0.7627078 (11373)	total: 12m 52s	remaining: 1m 55s
17600:	learn: 0.0182700	test: 0.7677841	best: 0.7627078 (11373)	total: 13m	remaining: 1m 46s
17800:	learn: 0.0179605	test: 0.7680740	best: 0.7627078 (11373)	total: 13m 9s	remaining: 1m 37s
18000:	learn: 0.0176795	test: 0.7681166	best: 0.7627078 (11373)	total: 13m 18s	remaining: 1m 28s
18200:	learn: 0.0173884	test: 0.7684252	best: 0.7627078 (11373)	total: 13m 27s	remaining: 1m 19s
18400:	learn: 0.0171065	test: 0.7682985	best: 0.7627078 (11373)	total: 13m 36s	remaining: 1m 10s
18600:	learn: 0.0168442	test: 0.7680740	best: 0.7627078 (11373)	total: 13m 44s	remaining: 1m 2s
18800:	learn: 0.0165788	test: 0.7683193	best: 0.7627078 (11373)	total: 13m 53s	remaining: 53.1s
19000:	learn: 0.0163228	test: 0.7683373	best: 0.7627078 (11373)	total: 14m 1s	remaining: 44.3s
19200:	learn: 0.0160768	test: 0.7684528	best: 0.7627078 (11373)	total: 14m 10s	remaining: 35.4s
19400:	learn: 0.0158314	test: 0.7685708	

 13%|█▎        | 363/2813 [00:00<00:00, 3622.46it/s]

... Augmenting Data ...


100%|██████████| 2813/2813 [00:00<00:00, 3707.97it/s]
  3%|▎         | 828/30943 [00:00<00:03, 8270.35it/s]

... Creating Dataframe ...


100%|██████████| 30943/30943 [00:03<00:00, 8061.89it/s]
100%|██████████| 312/312 [00:00<00:00, 8335.12it/s]


... Aggregating Train DataFrame ...
... Aggregating Val DataFrame ...
total pre-processing time = 38.260475397109985
... Begin Training ...
Validating on fold 9
0:	learn: 3.9288268	test: 3.9228895	best: 3.9228895 (0)	total: 49.6ms	remaining: 16m 32s




200:	learn: 1.8743456	test: 1.8305206	best: 1.8305206 (200)	total: 8.46s	remaining: 13m 53s
400:	learn: 1.4508003	test: 1.5137402	best: 1.5137402 (400)	total: 17.7s	remaining: 14m 24s
600:	learn: 1.1677007	test: 1.3345407	best: 1.3345407 (600)	total: 26.9s	remaining: 14m 29s
800:	learn: 0.9626508	test: 1.2153780	best: 1.2153780 (800)	total: 36.1s	remaining: 14m 24s
1000:	learn: 0.8131238	test: 1.1357413	best: 1.1357413 (1000)	total: 45.2s	remaining: 14m 17s
1200:	learn: 0.6953066	test: 1.0767178	best: 1.0767178 (1200)	total: 54.3s	remaining: 14m 10s
1400:	learn: 0.6023214	test: 1.0343982	best: 1.0343982 (1400)	total: 1m 3s	remaining: 14m 2s
1600:	learn: 0.5239461	test: 0.9984298	best: 0.9984298 (1600)	total: 1m 12s	remaining: 13m 53s
1800:	learn: 0.4610706	test: 0.9704959	best: 0.9704959 (1800)	total: 1m 21s	remaining: 13m 43s
2000:	learn: 0.4100306	test: 0.9492225	best: 0.9492225 (2000)	total: 1m 30s	remaining: 13m 34s
2200:	learn: 0.3687656	test: 0.9315281	best: 0.9315281 (2200)	tota

17600:	learn: 0.0185505	test: 0.8458478	best: 0.8260187 (8601)	total: 12m 57s	remaining: 1m 46s
17800:	learn: 0.0182430	test: 0.8462235	best: 0.8260187 (8601)	total: 13m 6s	remaining: 1m 37s
18000:	learn: 0.0179512	test: 0.8467082	best: 0.8260187 (8601)	total: 13m 15s	remaining: 1m 28s
18200:	learn: 0.0176692	test: 0.8473010	best: 0.8260187 (8601)	total: 13m 24s	remaining: 1m 19s
18400:	learn: 0.0173864	test: 0.8477124	best: 0.8260187 (8601)	total: 13m 33s	remaining: 1m 10s
18600:	learn: 0.0171090	test: 0.8484299	best: 0.8260187 (8601)	total: 13m 41s	remaining: 1m 1s
18800:	learn: 0.0168402	test: 0.8488717	best: 0.8260187 (8601)	total: 13m 50s	remaining: 53s
19000:	learn: 0.0165766	test: 0.8492725	best: 0.8260187 (8601)	total: 13m 59s	remaining: 44.1s
19200:	learn: 0.0163269	test: 0.8497719	best: 0.8260187 (8601)	total: 14m 8s	remaining: 35.3s
19400:	learn: 0.0160750	test: 0.8503688	best: 0.8260187 (8601)	total: 14m 17s	remaining: 26.5s
19600:	learn: 0.0158276	test: 0.8511877	best: 0.8

## Make prediction

In [13]:
features = ['id', 0, 1, 2, 3, 4, 5] 
test = test.rename(columns={'acc_x':0, 'acc_y':1, 'acc_z':2, 'gy_x':3, 'gy_y':4, 'gy_z':5})
X_test = test[features].groupby('id').agg(['max', 'min', 'mean', 'std', 'skew'])
X_test.head() 

Unnamed: 0_level_0,0,0,0,0,0,1,1,1,1,1,...,4,4,4,4,4,5,5,5,5,5
Unnamed: 0_level_1,max,min,mean,std,skew,max,min,mean,std,skew,...,max,min,mean,std,skew,max,min,mean,std,skew
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
3125,-0.275446,-1.564,-1.018731,0.236232,0.480681,0.22804,-0.470937,-0.019574,0.091641,-0.73823,...,96.185341,-81.607713,2.731872,31.993022,0.285366,49.981455,-35.446915,-2.000683,12.251648,0.28497
3126,0.627571,-1.929033,-0.522843,0.539688,0.783894,1.708743,-0.200678,0.612161,0.333015,0.141712,...,241.240196,-97.100707,6.974772,45.706311,1.240907,169.41765,-147.597574,-3.604579,61.604867,0.462374
3127,2.972063,-0.792916,0.506947,0.219934,2.601368,1.94182,0.219008,0.903819,0.191485,0.048348,...,74.530763,-98.420987,-2.251452,13.467885,-0.950816,97.21173,-154.477074,-0.393175,23.041463,-1.10352
3128,0.337281,-1.045889,-0.577603,0.431713,0.632767,-0.258476,-1.294482,-0.610557,0.233601,-0.709434,...,118.268797,-168.03108,-2.984969,45.069932,-0.736913,167.860762,-117.297766,-0.024318,37.967372,0.83222
3129,0.015642,-2.153047,-0.73864,0.305797,-0.334159,1.562602,-0.860883,0.182535,0.314294,0.244188,...,226.728939,-223.475411,1.442366,67.911174,-0.005484,138.130133,-125.5986,5.745498,43.353007,0.042361


In [14]:
pred1 = models[0].predict_proba(X_test) 
pred2 = models[1].predict_proba(X_test) 
pred3 = models[2].predict_proba(X_test) 
pred4 = models[3].predict_proba(X_test) 
pred5 = models[4].predict_proba(X_test)
pred6 = models[5].predict_proba(X_test) 
pred7 = models[6].predict_proba(X_test) 
pred8 = models[7].predict_proba(X_test) 
pred9 = models[8].predict_proba(X_test) 
pred10 = models[9].predict_proba(X_test)

In [15]:
pred_avg = (pred1 + pred2 + pred3 + pred4 + pred5 + pred6 + pred7 + pred8 + pred9 + pred10)/10.0 

In [16]:
pred_avg.shape

(782, 61)

In [17]:
submission.iloc[:,1:] = pred_avg
submission.to_csv('augmented_catboost_10_fold.csv',index=False)  

In [18]:
submission.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,51,52,53,54,55,56,57,58,59,60
0,3125,4.1e-05,7.9e-05,4.9e-05,0.000122,0.000176,6e-06,0.000175,0.000459,1.7e-05,...,0.005536,0.00098,0.000995,0.000443,8.3e-05,5.3e-05,0.000294,0.131575,2e-06,0.00038
1,3126,0.000524,5.1e-05,9e-06,0.000177,6.1e-05,6.8e-05,3e-06,0.000212,5.1e-05,...,0.000157,5e-06,5.5e-05,3.7e-05,4e-05,0.000152,0.000241,1.8e-05,1.4e-05,0.000209
2,3127,0.002592,0.186397,0.000201,0.002554,0.000451,0.000292,0.087915,0.019466,0.00165,...,8.8e-05,0.000112,0.000202,0.007033,0.00034,0.000941,0.000257,0.000221,0.000194,0.013071
3,3128,0.001859,0.00024,0.000316,0.00052,0.000107,0.000112,9e-06,0.000351,0.000106,...,0.000114,1.4e-05,7.8e-05,4.5e-05,0.000261,1.9e-05,0.001375,3.3e-05,1.7e-05,0.01576
4,3129,0.00052,0.000332,6e-05,0.00087,0.001271,0.000206,7e-06,0.000338,0.000304,...,0.000127,3e-06,4.5e-05,3.7e-05,0.000278,0.00023,0.000195,1.2e-05,9e-05,4.1e-05


## Average the results from GRU and catboost

In [22]:
## average dataframes     
gru_df = pd.read_csv('fourier_transform_10_fold_gru.csv') 
gru_df.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,51,52,53,54,55,56,57,58,59,60
0,3125,3.5e-05,0.00807,1.592733e-05,1.8e-05,1.3e-05,4.770708e-08,0.352726,0.00292,2.5e-05,...,0.011902,0.0005910614,0.000436,0.001321,1.436803e-06,1.3e-05,0.000198,0.12201,5e-06,2.3e-05
1,3126,0.000126,0.000812,5.921524e-06,0.02147,4e-06,6.673528e-05,1.8e-05,0.002538,1.7e-05,...,9e-06,6.659289e-07,4e-06,0.00025,3.787994e-05,0.000515,0.000336,6e-06,3.2e-05,0.000115
2,3127,0.000486,0.003486,6.21078e-06,3.3e-05,0.001839,0.0001618933,0.070613,0.000983,0.000256,...,0.000582,0.003588521,4.1e-05,0.001242,8.133279e-06,2.5e-05,0.000115,0.01907,0.000449,0.015425
3,3128,0.000218,1.1e-05,4.616908e-05,3.2e-05,3e-06,3.350694e-06,1.4e-05,1.7e-05,1e-05,...,0.000118,3.847815e-05,3.4e-05,6.1e-05,5.05448e-05,1.4e-05,0.000958,0.000188,6e-06,0.003009
4,3129,0.049436,0.004557,1.857505e-07,3.1e-05,0.031574,1.093202e-05,6.7e-05,6e-06,3.8e-05,...,1.3e-05,1.700347e-06,2e-06,1e-06,5.227133e-07,1.4e-05,0.000324,9.7e-05,3.7e-05,0.000634


In [24]:
avg_df = pd.read_csv('sample_submission.csv') 
avg_df.iloc[:,1:] = (gru_df.iloc[:,1:] + submission.iloc[:,1:]) / 2.0  

In [28]:
avg_df.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,51,52,53,54,55,56,57,58,59,60
0,3125,3.8e-05,0.004075,3.2e-05,7e-05,9.4e-05,3e-06,0.176451,0.00169,2.1e-05,...,0.008719,0.000786,0.000715,0.000882,4.2e-05,3.3e-05,0.000246,0.126792,3e-06,0.000201
1,3126,0.000325,0.000431,7e-06,0.010823,3.2e-05,6.7e-05,1.1e-05,0.001375,3.4e-05,...,8.3e-05,3e-06,3e-05,0.000144,3.9e-05,0.000333,0.000288,1.2e-05,2.3e-05,0.000162
2,3127,0.001539,0.094941,0.000103,0.001294,0.001145,0.000227,0.079264,0.010224,0.000953,...,0.000335,0.00185,0.000122,0.004138,0.000174,0.000483,0.000186,0.009646,0.000321,0.014248
3,3128,0.001038,0.000126,0.000181,0.000276,5.5e-05,5.7e-05,1.2e-05,0.000184,5.8e-05,...,0.000116,2.6e-05,5.6e-05,5.3e-05,0.000156,1.6e-05,0.001167,0.00011,1.1e-05,0.009385
4,3129,0.024978,0.002445,3e-05,0.000451,0.016423,0.000109,3.7e-05,0.000172,0.000171,...,7e-05,2e-06,2.4e-05,1.9e-05,0.000139,0.000122,0.00026,5.5e-05,6.3e-05,0.000337


In [29]:
avg_df.to_csv('gru_catboost_10_fold_augmented.csv',index=False)