In [57]:
import numpy as np
import pandas as pd
import tsfresh
import os
import pickle

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, StackingClassifier, GradientBoostingClassifier
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold, RandomizedSearchCV
from catboost import CatBoostClassifier
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor
import tsfresh
import lightgbm as lgb
from pytorch_tabnet.tab_model import TabNetClassifier
import torch
import re

In [58]:
def make_submission(preds):
    assert len(preds) == 5000
    
    # Read labels
    with open('test_labels_sorted.npy', 'rb') as f:
        test_labels = np.load(f)
    len(test_labels)
    
    submission = pd.DataFrame(columns=['id', 'class'])
    for label, pred in zip(test_labels, preds):
        submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
    return submission

In [59]:
def plot_confusion_matrix(y_val_from_train, y_pred_from_train):
    fig, ax = plt.subplots(figsize=(8, 5))
    cmp = ConfusionMatrixDisplay(confusion_matrix(y_val_from_train, y_pred_from_train))
    cmp.plot(ax=ax)
    plt.show()

### Set train files

In [4]:
train_files = os.listdir('train/train/')
train_files.sort()
train_files[:5]

['10003.csv', '10005.csv', '10006.csv', '10007.csv', '10012.csv']

### Set test files

In [5]:
test_files = os.listdir('test/test/')
test_files.sort()
print(test_files[:5], len(test_files))

['10001.csv', '10002.csv', '10004.csv', '10008.csv', '10009.csv'] 5000


### Create Input DataFrame

In [24]:
columns = ['id', 'x', 'y', 'z', 'a']
N = 9000
df_train = pd.DataFrame(columns=columns)
labels = []

for i, file in enumerate(train_files[:N]):
    
    if i % 50 == 0:
        print(f"Iteration {i}...")
    
    # Take each training sample and store it as a temporary dataframe
    full_path = f'train/train/{file}'
    temp_df = pd.read_csv(full_path, names=['x', 'y', 'z'])
    
    # Assign each column as a numpy array
    x = temp_df['x'].to_numpy()
    y = temp_df['y'].to_numpy()
    z = temp_df['z'].to_numpy()
    
    # Compute acceleration magnitude
    acc_mag = np.sqrt(x ** 2 + y ** 2 + z ** 2)
    temp_df['a'] = acc_mag
    
    # Create id and time columns for TSFresh
    temp_df['id'] = i
    temp_df['time'] = range(len(x))
    
    df_train = pd.concat([df_train, temp_df], axis=0, ignore_index=True)
        
df_train['id'] = df_train['id'].astype(np.int64)
df_train['time'] = df_train['time'].astype(np.int64)

Iteration 0...
Iteration 50...
Iteration 100...
Iteration 150...
Iteration 200...
Iteration 250...
Iteration 300...
Iteration 350...
Iteration 400...
Iteration 450...
Iteration 500...
Iteration 550...
Iteration 600...
Iteration 650...
Iteration 700...
Iteration 750...
Iteration 800...
Iteration 850...
Iteration 900...
Iteration 950...
Iteration 1000...
Iteration 1050...
Iteration 1100...
Iteration 1150...
Iteration 1200...
Iteration 1250...
Iteration 1300...
Iteration 1350...
Iteration 1400...
Iteration 1450...
Iteration 1500...
Iteration 1550...
Iteration 1600...
Iteration 1650...
Iteration 1700...
Iteration 1750...
Iteration 1800...
Iteration 1850...
Iteration 1900...
Iteration 1950...
Iteration 2000...
Iteration 2050...
Iteration 2100...
Iteration 2150...
Iteration 2200...
Iteration 2250...
Iteration 2300...
Iteration 2350...
Iteration 2400...
Iteration 2450...
Iteration 2500...
Iteration 2550...
Iteration 2600...
Iteration 2650...
Iteration 2700...
Iteration 2750...
Iteration 2800.

In [25]:
df_train

Unnamed: 0,id,x,y,z,a,time
0,0,-0.767342,4.330513,7.996035,9.125718,0
1,0,-0.826000,4.371214,7.986458,9.141843,1
2,0,-0.819415,4.401740,8.008605,9.175212,2
3,0,-0.809240,4.446631,8.027160,9.212095,3
4,0,-0.798466,4.450222,8.033744,9.218626,4
...,...,...,...,...,...,...
1348950,8999,0.740407,5.185841,7.296928,8.982556,144
1348951,8999,0.808642,5.205593,7.309498,9.010042,145
1348952,8999,0.864905,5.319318,7.308899,9.080926,146
1348953,8999,0.807445,5.356428,7.321469,9.107535,147


In [26]:
df_train.to_csv('df_train_with_acc_tsfresh.csv', index=False)

### Create test DataFrame

In [28]:
columns = ['id', 'x', 'y', 'z', 'a']
N = 5000
df_test = pd.DataFrame(columns=columns)
labels = []

for i, file in enumerate(test_files[:N]):
    
    if i % 50 == 0:
        print(f"Iteration {i}...")
    
    # Take each training sample and store it as a temporary dataframe
    full_path = f'test/test/{file}'
    temp_df = pd.read_csv(full_path, names=['x', 'y', 'z'])
    
    # Assign each column as a numpy array
    x = temp_df['x'].to_numpy()
    y = temp_df['y'].to_numpy()
    z = temp_df['z'].to_numpy()
    
    # Compute acceleration magnitude
    acc_mag = np.sqrt(x ** 2 + y ** 2 + z ** 2)
    temp_df['a'] = acc_mag
    
    # Create id and time columns for TSFresh
    temp_df['id'] = i
    temp_df['time'] = range(len(x))
    
    df_test = pd.concat([df_test, temp_df], axis=0, ignore_index=True)
        
df_test['id'] = df_test['id'].astype(np.int64)
df_test['time'] = df_test['time'].astype(np.int64)

Iteration 0...
Iteration 50...
Iteration 100...
Iteration 150...
Iteration 200...
Iteration 250...
Iteration 300...
Iteration 350...
Iteration 400...
Iteration 450...
Iteration 500...
Iteration 550...
Iteration 600...
Iteration 650...
Iteration 700...
Iteration 750...
Iteration 800...
Iteration 850...
Iteration 900...
Iteration 950...
Iteration 1000...
Iteration 1050...
Iteration 1100...
Iteration 1150...
Iteration 1200...
Iteration 1250...
Iteration 1300...
Iteration 1350...
Iteration 1400...
Iteration 1450...
Iteration 1500...
Iteration 1550...
Iteration 1600...
Iteration 1650...
Iteration 1700...
Iteration 1750...
Iteration 1800...
Iteration 1850...
Iteration 1900...
Iteration 1950...
Iteration 2000...
Iteration 2050...
Iteration 2100...
Iteration 2150...
Iteration 2200...
Iteration 2250...
Iteration 2300...
Iteration 2350...
Iteration 2400...
Iteration 2450...
Iteration 2500...
Iteration 2550...
Iteration 2600...
Iteration 2650...
Iteration 2700...
Iteration 2750...
Iteration 2800.

In [29]:
df_test

Unnamed: 0,id,x,y,z,a,time
0,0,-0.320224,4.966173,7.307702,8.841263,0
1,0,-0.228646,4.947618,7.289746,8.813149,1
2,0,-0.156820,4.929063,7.320271,8.826473,2
3,0,-0.087388,5.010466,7.249643,8.813043,3
4,0,-0.032322,5.003882,7.349601,8.891373,4
...,...,...,...,...,...,...
749512,4999,-1.534683,4.169502,5.582680,7.134866,146
749513,4999,-1.263540,3.909133,6.930615,8.056754,147
749514,4999,-0.978630,3.858855,8.090008,9.016469,148
749515,4999,-1.177947,3.728371,8.290522,9.166300,149


In [30]:
df_test.to_csv('df_test_with_acc_tsfresh.csv', index=False)

### Preprocess y_train

In [33]:
# Load y_train
with open('y_train.npy', 'rb') as f:
    y_train = np.load(f)
print(y_train.shape)
y_train

y_train_series = pd.Series(y_train)
y_train_series

(9000,)


0        7
1       13
2        6
3        1
4        1
        ..
8995    20
8996     2
8997     3
8998     3
8999    18
Length: 9000, dtype: int64

### Extract relevant features train

In [34]:
relevant_features_with_acc = tsfresh.extract_relevant_features(df_train, y_train_series, column_id='id', column_sort='time')

Feature Extraction: 100%|██████████| 30/30 [04:36<00:00,  9.22s/it]


In [35]:
relevant_features_with_acc.shape

(9000, 2323)

In [37]:
relevant_features_with_acc.to_csv('relevant_features_train_with_acc_df_tsfresh.csv', index=False)

### Extract relevant features test

In [38]:
relevant_features_test_with_acc = tsfresh.extract_features(df_test, column_id='id', column_sort='time')

Feature Extraction: 100%|██████████| 30/30 [02:29<00:00,  4.98s/it]


In [39]:
relevant_features_test_with_acc.shape

(5000, 3156)

In [40]:
relevant_features_test_with_acc.to_csv('relevant_features_test_with_acc_df_tsfresh.csv', index=False)

### Train Catboost

In [41]:
cbc = CatBoostClassifier(loss_function='MultiClass', task_type="GPU")

In [42]:
%%time
cbc.fit(relevant_features_with_acc, y_train)

Learning rate set to 0.101613
0:	learn: 2.2382146	total: 74.1ms	remaining: 1m 13s
1:	learn: 1.9160048	total: 139ms	remaining: 1m 9s
2:	learn: 1.6843614	total: 200ms	remaining: 1m 6s
3:	learn: 1.5088086	total: 261ms	remaining: 1m 5s
4:	learn: 1.3676022	total: 328ms	remaining: 1m 5s
5:	learn: 1.2563792	total: 387ms	remaining: 1m 4s
6:	learn: 1.1606257	total: 458ms	remaining: 1m 4s
7:	learn: 1.0777635	total: 522ms	remaining: 1m 4s
8:	learn: 1.0103814	total: 577ms	remaining: 1m 3s
9:	learn: 0.9486890	total: 630ms	remaining: 1m 2s
10:	learn: 0.8846572	total: 701ms	remaining: 1m 3s
11:	learn: 0.8275425	total: 769ms	remaining: 1m 3s
12:	learn: 0.7802349	total: 840ms	remaining: 1m 3s
13:	learn: 0.7370273	total: 905ms	remaining: 1m 3s
14:	learn: 0.6945694	total: 966ms	remaining: 1m 3s
15:	learn: 0.6558121	total: 1.04s	remaining: 1m 4s
16:	learn: 0.6230279	total: 1.12s	remaining: 1m 4s
17:	learn: 0.5935146	total: 1.19s	remaining: 1m 4s
18:	learn: 0.5668456	total: 1.26s	remaining: 1m 5s
19:	learn

163:	learn: 0.0738247	total: 10s	remaining: 51s
164:	learn: 0.0735828	total: 10.1s	remaining: 50.9s
165:	learn: 0.0730574	total: 10.1s	remaining: 50.8s
166:	learn: 0.0727605	total: 10.2s	remaining: 50.7s
167:	learn: 0.0723861	total: 10.2s	remaining: 50.7s
168:	learn: 0.0720070	total: 10.3s	remaining: 50.6s
169:	learn: 0.0714840	total: 10.3s	remaining: 50.5s
170:	learn: 0.0709797	total: 10.4s	remaining: 50.4s
171:	learn: 0.0703714	total: 10.5s	remaining: 50.4s
172:	learn: 0.0696874	total: 10.5s	remaining: 50.3s
173:	learn: 0.0691814	total: 10.6s	remaining: 50.2s
174:	learn: 0.0688203	total: 10.6s	remaining: 50.1s
175:	learn: 0.0684667	total: 10.7s	remaining: 50.1s
176:	learn: 0.0682119	total: 10.8s	remaining: 50s
177:	learn: 0.0677321	total: 10.8s	remaining: 49.9s
178:	learn: 0.0675032	total: 10.9s	remaining: 49.9s
179:	learn: 0.0672099	total: 10.9s	remaining: 49.8s
180:	learn: 0.0668327	total: 11s	remaining: 49.7s
181:	learn: 0.0667472	total: 11s	remaining: 49.6s
182:	learn: 0.0665664	

323:	learn: 0.0366136	total: 19.3s	remaining: 40.2s
324:	learn: 0.0365880	total: 19.3s	remaining: 40.1s
325:	learn: 0.0364809	total: 19.4s	remaining: 40.1s
326:	learn: 0.0364108	total: 19.4s	remaining: 40s
327:	learn: 0.0362940	total: 19.5s	remaining: 39.9s
328:	learn: 0.0360239	total: 19.5s	remaining: 39.9s
329:	learn: 0.0359356	total: 19.6s	remaining: 39.8s
330:	learn: 0.0357037	total: 19.7s	remaining: 39.7s
331:	learn: 0.0356129	total: 19.7s	remaining: 39.7s
332:	learn: 0.0354982	total: 19.8s	remaining: 39.6s
333:	learn: 0.0354555	total: 19.8s	remaining: 39.5s
334:	learn: 0.0352866	total: 19.9s	remaining: 39.5s
335:	learn: 0.0351105	total: 20s	remaining: 39.4s
336:	learn: 0.0350283	total: 20s	remaining: 39.4s
337:	learn: 0.0349240	total: 20.1s	remaining: 39.3s
338:	learn: 0.0348723	total: 20.1s	remaining: 39.2s
339:	learn: 0.0347720	total: 20.2s	remaining: 39.2s
340:	learn: 0.0346836	total: 20.2s	remaining: 39.1s
341:	learn: 0.0344880	total: 20.3s	remaining: 39.1s
342:	learn: 0.0344

483:	learn: 0.0234224	total: 28.6s	remaining: 30.5s
484:	learn: 0.0234053	total: 28.6s	remaining: 30.4s
485:	learn: 0.0233942	total: 28.7s	remaining: 30.3s
486:	learn: 0.0233662	total: 28.7s	remaining: 30.3s
487:	learn: 0.0233408	total: 28.8s	remaining: 30.2s
488:	learn: 0.0232981	total: 28.9s	remaining: 30.2s
489:	learn: 0.0232406	total: 28.9s	remaining: 30.1s
490:	learn: 0.0232253	total: 29s	remaining: 30s
491:	learn: 0.0232137	total: 29s	remaining: 30s
492:	learn: 0.0231561	total: 29.1s	remaining: 29.9s
493:	learn: 0.0230952	total: 29.1s	remaining: 29.9s
494:	learn: 0.0230821	total: 29.2s	remaining: 29.8s
495:	learn: 0.0230577	total: 29.3s	remaining: 29.7s
496:	learn: 0.0229378	total: 29.3s	remaining: 29.7s
497:	learn: 0.0228800	total: 29.4s	remaining: 29.6s
498:	learn: 0.0228419	total: 29.4s	remaining: 29.5s
499:	learn: 0.0227753	total: 29.5s	remaining: 29.5s
500:	learn: 0.0226504	total: 29.5s	remaining: 29.4s
501:	learn: 0.0226344	total: 29.6s	remaining: 29.4s
502:	learn: 0.022623

643:	learn: 0.0162129	total: 37.8s	remaining: 20.9s
644:	learn: 0.0161371	total: 37.9s	remaining: 20.9s
645:	learn: 0.0161232	total: 38s	remaining: 20.8s
646:	learn: 0.0160714	total: 38s	remaining: 20.7s
647:	learn: 0.0159949	total: 38.1s	remaining: 20.7s
648:	learn: 0.0159862	total: 38.1s	remaining: 20.6s
649:	learn: 0.0159773	total: 38.2s	remaining: 20.6s
650:	learn: 0.0159468	total: 38.2s	remaining: 20.5s
651:	learn: 0.0159336	total: 38.3s	remaining: 20.4s
652:	learn: 0.0158802	total: 38.4s	remaining: 20.4s
653:	learn: 0.0158389	total: 38.4s	remaining: 20.3s
654:	learn: 0.0158129	total: 38.5s	remaining: 20.3s
655:	learn: 0.0157982	total: 38.6s	remaining: 20.2s
656:	learn: 0.0157707	total: 38.6s	remaining: 20.2s
657:	learn: 0.0157279	total: 38.7s	remaining: 20.1s
658:	learn: 0.0157147	total: 38.7s	remaining: 20s
659:	learn: 0.0157012	total: 38.8s	remaining: 20s
660:	learn: 0.0156677	total: 38.9s	remaining: 19.9s
661:	learn: 0.0156587	total: 38.9s	remaining: 19.9s
662:	learn: 0.015642

803:	learn: 0.0120505	total: 47.3s	remaining: 11.5s
804:	learn: 0.0120081	total: 47.4s	remaining: 11.5s
805:	learn: 0.0119811	total: 47.4s	remaining: 11.4s
806:	learn: 0.0119636	total: 47.5s	remaining: 11.4s
807:	learn: 0.0119372	total: 47.5s	remaining: 11.3s
808:	learn: 0.0119037	total: 47.6s	remaining: 11.2s
809:	learn: 0.0118610	total: 47.7s	remaining: 11.2s
810:	learn: 0.0118457	total: 47.7s	remaining: 11.1s
811:	learn: 0.0118296	total: 47.8s	remaining: 11.1s
812:	learn: 0.0118098	total: 47.9s	remaining: 11s
813:	learn: 0.0117874	total: 47.9s	remaining: 10.9s
814:	learn: 0.0117650	total: 48s	remaining: 10.9s
815:	learn: 0.0117345	total: 48s	remaining: 10.8s
816:	learn: 0.0117091	total: 48.1s	remaining: 10.8s
817:	learn: 0.0117010	total: 48.2s	remaining: 10.7s
818:	learn: 0.0116651	total: 48.2s	remaining: 10.7s
819:	learn: 0.0116611	total: 48.3s	remaining: 10.6s
820:	learn: 0.0116448	total: 48.3s	remaining: 10.5s
821:	learn: 0.0116305	total: 48.4s	remaining: 10.5s
822:	learn: 0.0116

963:	learn: 0.0093922	total: 56.6s	remaining: 2.11s
964:	learn: 0.0093712	total: 56.6s	remaining: 2.05s
965:	learn: 0.0093377	total: 56.7s	remaining: 2s
966:	learn: 0.0093297	total: 56.8s	remaining: 1.94s
967:	learn: 0.0093223	total: 56.8s	remaining: 1.88s
968:	learn: 0.0093074	total: 56.9s	remaining: 1.82s
969:	learn: 0.0092968	total: 56.9s	remaining: 1.76s
970:	learn: 0.0092926	total: 57s	remaining: 1.7s
971:	learn: 0.0092831	total: 57.1s	remaining: 1.64s
972:	learn: 0.0092640	total: 57.1s	remaining: 1.58s
973:	learn: 0.0092242	total: 57.2s	remaining: 1.53s
974:	learn: 0.0092033	total: 57.2s	remaining: 1.47s
975:	learn: 0.0091650	total: 57.3s	remaining: 1.41s
976:	learn: 0.0091407	total: 57.4s	remaining: 1.35s
977:	learn: 0.0091368	total: 57.4s	remaining: 1.29s
978:	learn: 0.0091330	total: 57.5s	remaining: 1.23s
979:	learn: 0.0091191	total: 57.5s	remaining: 1.17s
980:	learn: 0.0091145	total: 57.6s	remaining: 1.11s
981:	learn: 0.0091059	total: 57.7s	remaining: 1.06s
982:	learn: 0.0090

<catboost.core.CatBoostClassifier at 0x1b1d48f9dc0>

In [44]:
preds = cbc.predict(relevant_features_test_with_acc)
preds = [a[0] for a in preds]
preds

[3,
 4,
 5,
 20,
 13,
 13,
 13,
 4,
 5,
 3,
 16,
 9,
 2,
 8,
 17,
 6,
 19,
 5,
 2,
 18,
 17,
 18,
 12,
 7,
 16,
 18,
 8,
 15,
 16,
 3,
 16,
 6,
 10,
 9,
 4,
 3,
 6,
 20,
 5,
 1,
 5,
 10,
 3,
 3,
 1,
 3,
 18,
 18,
 10,
 20,
 17,
 3,
 7,
 4,
 3,
 12,
 16,
 16,
 8,
 17,
 15,
 4,
 17,
 13,
 9,
 13,
 20,
 8,
 4,
 15,
 14,
 20,
 16,
 9,
 12,
 8,
 8,
 6,
 18,
 9,
 13,
 8,
 17,
 8,
 11,
 12,
 19,
 12,
 14,
 17,
 17,
 16,
 20,
 5,
 11,
 1,
 10,
 5,
 13,
 11,
 8,
 16,
 17,
 20,
 14,
 17,
 3,
 17,
 16,
 3,
 17,
 5,
 2,
 8,
 5,
 16,
 12,
 3,
 4,
 10,
 9,
 14,
 20,
 15,
 9,
 13,
 16,
 18,
 17,
 17,
 20,
 1,
 12,
 11,
 2,
 4,
 2,
 5,
 11,
 18,
 8,
 15,
 4,
 19,
 9,
 12,
 20,
 4,
 19,
 4,
 10,
 13,
 16,
 10,
 14,
 14,
 10,
 5,
 7,
 8,
 4,
 20,
 12,
 13,
 19,
 1,
 5,
 2,
 19,
 6,
 3,
 18,
 12,
 2,
 12,
 5,
 10,
 15,
 20,
 14,
 1,
 19,
 9,
 4,
 14,
 15,
 10,
 17,
 4,
 17,
 6,
 15,
 14,
 14,
 18,
 11,
 13,
 9,
 18,
 7,
 17,
 5,
 8,
 10,
 20,
 14,
 4,
 14,
 20,
 17,
 19,
 18,
 5,
 19,
 18,
 20,
 20,
 9,


In [45]:
sub = make_submission(preds)
sub

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

Unnamed: 0,id,class
0,10001,3
1,10002,4
2,10004,5
3,10008,20
4,10009,13
...,...,...
4995,23986,9
4996,23991,12
4997,23992,4
4998,23998,5


In [46]:
sub.to_csv('submission_catboost_tsfresh_relevant_features_with_acc.csv', index=False)

### Train Ensemble

In [51]:
etc_clf = ExtraTreesClassifier(n_jobs=-1, n_estimators=933, max_depth=79, min_samples_split=2, min_samples_leaf=1, \
                               bootstrap = False, warm_start = False)

gb_clf = GradientBoostingClassifier(learning_rate=0.2, max_depth=5, min_samples_leaf=0.1, \
                                    min_samples_split=0.1363, subsample=1.0)

# lgb_clf = lgb.LGBMClassifier(objective='multiclass', num_class=20, n_jobs=-1, seed=42, boosting='dart', \
#                              min_child_samples=12, num_iterations=1936, num_leaves=66, min_data_in_leaf=50, \
#                              max_bin=20, max_depth=17, learning_rate=0.24, reg_alpha=0.0004127769671094072)

cbc_clf = CatBoostClassifier()

tabnet = TabNetClassifier(optimizer_params=dict(lr=2e-2), scheduler_params={"step_size":10, "gamma":0.9}, scheduler_fn=torch.optim.lr_scheduler.StepLR)
tabnet._estimator_type = "classifier"

estimators = [("etc", etc_clf), ("gb", gb_clf), ("cbc", cbc_clf), ('tabnet', tabnet)]
ensemble_estimators = StackingClassifier(estimators, final_estimator=LogisticRegression(), n_jobs=-1)

In [53]:
%%time
ensemble_estimators.fit(relevant_features_with_acc, y_train)



CatBoostError: C:/Program Files (x86)/Go Agent/pipelines/BuildMaster/catboost.git/catboost/libs/data/features_layout.cpp:109: All feature names should be different, but 'x__number_crossing_m__m_1' used more than once.

In [54]:
preds = ensemble_estimators.predict(relevant_features_test_with_acc)
preds

AttributeError: 'StackingClassifier' object has no attribute 'estimators_'

In [None]:
sub = make_submission(preds)
sub

In [None]:
sub.to_csv('submission_ensemble_tsfresh_relevant_features_with_acc.csv', index=False)

## Comprehensive Features

### Extract Comprehensive Features Train

In [60]:
comprehensive = tsfresh.feature_extraction.ComprehensiveFCParameters()

In [64]:
comprehensive_features_train = tsfresh.feature_extraction.extract_features(df_train, default_fc_parameters=comprehensive, column_id='id', column_sort='time')

Feature Extraction: 100%|██████████| 30/30 [04:17<00:00,  8.58s/it]


In [65]:
comprehensive_features_train.shape

(9000, 3156)

In [66]:
comprehensive_features_train.to_csv('comprehensive_features_train_with_acc_df_tsfresh.csv', index=False)

### Comprehensive Features Test

In [67]:
comprehensive_features_test = tsfresh.feature_extraction.extract_features(df_test, default_fc_parameters=comprehensive, column_id='id', column_sort='time')

Feature Extraction: 100%|██████████| 30/30 [02:22<00:00,  4.74s/it]


In [69]:
comprehensive_features_test.shape

(5000, 3156)

In [70]:
comprehensive_features_test.to_csv('comprehensive_features_test_with_acc_df_tsfresh.csv', index=False)

### Train Catboost

In [71]:
cbc = CatBoostClassifier(loss_function='MultiClass', task_type="GPU")

In [72]:
%%time
cbc.fit(comprehensive_features_train, y_train)

Learning rate set to 0.101613
0:	learn: 2.2382148	total: 83.6ms	remaining: 1m 23s
1:	learn: 1.9160050	total: 155ms	remaining: 1m 17s
2:	learn: 1.6843617	total: 224ms	remaining: 1m 14s
3:	learn: 1.5088085	total: 292ms	remaining: 1m 12s
4:	learn: 1.3676021	total: 366ms	remaining: 1m 12s
5:	learn: 1.2563790	total: 432ms	remaining: 1m 11s
6:	learn: 1.1606258	total: 510ms	remaining: 1m 12s
7:	learn: 1.0777635	total: 581ms	remaining: 1m 12s
8:	learn: 1.0103814	total: 640ms	remaining: 1m 10s
9:	learn: 0.9486888	total: 699ms	remaining: 1m 9s
10:	learn: 0.8846573	total: 778ms	remaining: 1m 9s
11:	learn: 0.8275426	total: 853ms	remaining: 1m 10s
12:	learn: 0.7802350	total: 933ms	remaining: 1m 10s
13:	learn: 0.7370274	total: 1s	remaining: 1m 10s
14:	learn: 0.6945693	total: 1.07s	remaining: 1m 10s
15:	learn: 0.6558121	total: 1.15s	remaining: 1m 10s
16:	learn: 0.6230279	total: 1.24s	remaining: 1m 11s
17:	learn: 0.5935146	total: 1.31s	remaining: 1m 11s
18:	learn: 0.5668456	total: 1.39s	remaining: 1m 

159:	learn: 0.0800799	total: 10.7s	remaining: 56s
160:	learn: 0.0798787	total: 10.7s	remaining: 55.9s
161:	learn: 0.0795499	total: 10.8s	remaining: 55.8s
162:	learn: 0.0791442	total: 10.9s	remaining: 55.7s
163:	learn: 0.0784722	total: 10.9s	remaining: 55.6s
164:	learn: 0.0781963	total: 11s	remaining: 55.5s
165:	learn: 0.0781546	total: 11s	remaining: 55.4s
166:	learn: 0.0780712	total: 11.1s	remaining: 55.3s
167:	learn: 0.0776916	total: 11.1s	remaining: 55.2s
168:	learn: 0.0775242	total: 11.2s	remaining: 55.1s
169:	learn: 0.0769770	total: 11.3s	remaining: 55s
170:	learn: 0.0761786	total: 11.3s	remaining: 54.9s
171:	learn: 0.0755152	total: 11.4s	remaining: 54.9s
172:	learn: 0.0749192	total: 11.5s	remaining: 54.8s
173:	learn: 0.0741531	total: 11.5s	remaining: 54.7s
174:	learn: 0.0736126	total: 11.6s	remaining: 54.7s
175:	learn: 0.0733400	total: 11.7s	remaining: 54.6s
176:	learn: 0.0726331	total: 11.7s	remaining: 54.5s
177:	learn: 0.0724028	total: 11.8s	remaining: 54.4s
178:	learn: 0.071950

318:	learn: 0.0402894	total: 20.7s	remaining: 44.2s
319:	learn: 0.0401970	total: 20.8s	remaining: 44.2s
320:	learn: 0.0400093	total: 20.8s	remaining: 44.1s
321:	learn: 0.0398750	total: 20.9s	remaining: 44s
322:	learn: 0.0398028	total: 21s	remaining: 44s
323:	learn: 0.0397281	total: 21s	remaining: 43.9s
324:	learn: 0.0395903	total: 21.1s	remaining: 43.8s
325:	learn: 0.0395241	total: 21.2s	remaining: 43.8s
326:	learn: 0.0394500	total: 21.2s	remaining: 43.7s
327:	learn: 0.0393740	total: 21.3s	remaining: 43.6s
328:	learn: 0.0393243	total: 21.4s	remaining: 43.6s
329:	learn: 0.0390968	total: 21.4s	remaining: 43.5s
330:	learn: 0.0389950	total: 21.5s	remaining: 43.4s
331:	learn: 0.0389493	total: 21.5s	remaining: 43.4s
332:	learn: 0.0389218	total: 21.6s	remaining: 43.3s
333:	learn: 0.0388848	total: 21.7s	remaining: 43.2s
334:	learn: 0.0386622	total: 21.7s	remaining: 43.2s
335:	learn: 0.0385178	total: 21.8s	remaining: 43.1s
336:	learn: 0.0384253	total: 21.9s	remaining: 43s
337:	learn: 0.0383047	

478:	learn: 0.0260395	total: 31.1s	remaining: 33.8s
479:	learn: 0.0259780	total: 31.1s	remaining: 33.7s
480:	learn: 0.0259296	total: 31.2s	remaining: 33.7s
481:	learn: 0.0258692	total: 31.3s	remaining: 33.6s
482:	learn: 0.0258498	total: 31.3s	remaining: 33.5s
483:	learn: 0.0257207	total: 31.4s	remaining: 33.5s
484:	learn: 0.0256994	total: 31.5s	remaining: 33.4s
485:	learn: 0.0255827	total: 31.5s	remaining: 33.4s
486:	learn: 0.0255154	total: 31.6s	remaining: 33.3s
487:	learn: 0.0253737	total: 31.7s	remaining: 33.2s
488:	learn: 0.0253356	total: 31.7s	remaining: 33.2s
489:	learn: 0.0252417	total: 31.8s	remaining: 33.1s
490:	learn: 0.0251805	total: 31.9s	remaining: 33s
491:	learn: 0.0250913	total: 31.9s	remaining: 33s
492:	learn: 0.0250668	total: 32s	remaining: 32.9s
493:	learn: 0.0249396	total: 32.1s	remaining: 32.8s
494:	learn: 0.0249071	total: 32.1s	remaining: 32.8s
495:	learn: 0.0247656	total: 32.2s	remaining: 32.7s
496:	learn: 0.0246971	total: 32.3s	remaining: 32.6s
497:	learn: 0.0246

637:	learn: 0.0181575	total: 41.3s	remaining: 23.4s
638:	learn: 0.0180460	total: 41.4s	remaining: 23.4s
639:	learn: 0.0179892	total: 41.4s	remaining: 23.3s
640:	learn: 0.0179783	total: 41.5s	remaining: 23.2s
641:	learn: 0.0179686	total: 41.6s	remaining: 23.2s
642:	learn: 0.0179126	total: 41.6s	remaining: 23.1s
643:	learn: 0.0179120	total: 41.7s	remaining: 23s
644:	learn: 0.0178666	total: 41.8s	remaining: 23s
645:	learn: 0.0178558	total: 41.8s	remaining: 22.9s
646:	learn: 0.0178173	total: 41.9s	remaining: 22.9s
647:	learn: 0.0178038	total: 42s	remaining: 22.8s
648:	learn: 0.0177943	total: 42s	remaining: 22.7s
649:	learn: 0.0177607	total: 42.1s	remaining: 22.7s
650:	learn: 0.0177036	total: 42.2s	remaining: 22.6s
651:	learn: 0.0176872	total: 42.2s	remaining: 22.5s
652:	learn: 0.0176352	total: 42.3s	remaining: 22.5s
653:	learn: 0.0175973	total: 42.4s	remaining: 22.4s
654:	learn: 0.0175714	total: 42.4s	remaining: 22.3s
655:	learn: 0.0175557	total: 42.5s	remaining: 22.3s
656:	learn: 0.017527

799:	learn: 0.0131916	total: 51.9s	remaining: 13s
800:	learn: 0.0131488	total: 51.9s	remaining: 12.9s
801:	learn: 0.0131362	total: 52s	remaining: 12.8s
802:	learn: 0.0131229	total: 52.1s	remaining: 12.8s
803:	learn: 0.0131060	total: 52.1s	remaining: 12.7s
804:	learn: 0.0130765	total: 52.2s	remaining: 12.6s
805:	learn: 0.0130239	total: 52.3s	remaining: 12.6s
806:	learn: 0.0129826	total: 52.3s	remaining: 12.5s
807:	learn: 0.0129624	total: 52.4s	remaining: 12.4s
808:	learn: 0.0129549	total: 52.5s	remaining: 12.4s
809:	learn: 0.0129290	total: 52.5s	remaining: 12.3s
810:	learn: 0.0128830	total: 52.6s	remaining: 12.3s
811:	learn: 0.0128644	total: 52.6s	remaining: 12.2s
812:	learn: 0.0128471	total: 52.7s	remaining: 12.1s
813:	learn: 0.0128074	total: 52.8s	remaining: 12.1s
814:	learn: 0.0127834	total: 52.9s	remaining: 12s
815:	learn: 0.0127470	total: 52.9s	remaining: 11.9s
816:	learn: 0.0127328	total: 53s	remaining: 11.9s
817:	learn: 0.0127223	total: 53.1s	remaining: 11.8s
818:	learn: 0.012693

960:	learn: 0.0101680	total: 1m 2s	remaining: 2.52s
961:	learn: 0.0101351	total: 1m 2s	remaining: 2.45s
962:	learn: 0.0101264	total: 1m 2s	remaining: 2.39s
963:	learn: 0.0101122	total: 1m 2s	remaining: 2.32s
964:	learn: 0.0100862	total: 1m 2s	remaining: 2.26s
965:	learn: 0.0100742	total: 1m 2s	remaining: 2.19s
966:	learn: 0.0100649	total: 1m 2s	remaining: 2.13s
967:	learn: 0.0100583	total: 1m 2s	remaining: 2.07s
968:	learn: 0.0100389	total: 1m 2s	remaining: 2s
969:	learn: 0.0100259	total: 1m 2s	remaining: 1.94s
970:	learn: 0.0100147	total: 1m 2s	remaining: 1.87s
971:	learn: 0.0099953	total: 1m 2s	remaining: 1.81s
972:	learn: 0.0099908	total: 1m 2s	remaining: 1.74s
973:	learn: 0.0099798	total: 1m 2s	remaining: 1.68s
974:	learn: 0.0099767	total: 1m 2s	remaining: 1.61s
975:	learn: 0.0099514	total: 1m 3s	remaining: 1.55s
976:	learn: 0.0099373	total: 1m 3s	remaining: 1.49s
977:	learn: 0.0099286	total: 1m 3s	remaining: 1.42s
978:	learn: 0.0099230	total: 1m 3s	remaining: 1.36s
979:	learn: 0.0

<catboost.core.CatBoostClassifier at 0x1b1d590f520>

In [73]:
preds = cbc.predict(comprehensive_features_test)
preds = [a[0] for a in preds]
preds

[3,
 4,
 5,
 20,
 13,
 13,
 13,
 4,
 5,
 3,
 16,
 9,
 2,
 8,
 17,
 2,
 19,
 5,
 2,
 18,
 17,
 18,
 12,
 7,
 16,
 18,
 8,
 15,
 16,
 3,
 16,
 6,
 10,
 9,
 4,
 3,
 6,
 20,
 5,
 1,
 5,
 10,
 3,
 3,
 1,
 3,
 18,
 18,
 10,
 20,
 17,
 3,
 7,
 4,
 3,
 12,
 16,
 16,
 8,
 17,
 15,
 4,
 17,
 13,
 9,
 13,
 20,
 8,
 4,
 15,
 14,
 20,
 16,
 9,
 12,
 8,
 8,
 6,
 18,
 9,
 13,
 8,
 17,
 8,
 11,
 12,
 19,
 12,
 14,
 17,
 17,
 16,
 20,
 5,
 11,
 1,
 10,
 5,
 13,
 11,
 8,
 16,
 17,
 20,
 14,
 17,
 3,
 17,
 16,
 3,
 17,
 5,
 2,
 8,
 5,
 16,
 12,
 3,
 4,
 10,
 9,
 14,
 20,
 15,
 9,
 13,
 16,
 18,
 17,
 17,
 20,
 1,
 12,
 11,
 2,
 4,
 2,
 5,
 11,
 18,
 14,
 15,
 4,
 19,
 9,
 12,
 20,
 4,
 19,
 4,
 10,
 13,
 16,
 10,
 14,
 14,
 10,
 5,
 7,
 8,
 4,
 20,
 12,
 13,
 19,
 1,
 5,
 2,
 19,
 6,
 3,
 18,
 12,
 2,
 12,
 5,
 10,
 15,
 20,
 14,
 1,
 19,
 9,
 4,
 14,
 15,
 10,
 17,
 4,
 17,
 6,
 15,
 14,
 14,
 18,
 11,
 13,
 9,
 18,
 7,
 17,
 5,
 8,
 10,
 20,
 14,
 4,
 14,
 20,
 17,
 19,
 18,
 5,
 19,
 18,
 20,
 20,
 9,

In [74]:
sub = make_submission(preds)
sub

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

Unnamed: 0,id,class
0,10001,3
1,10002,4
2,10004,5
3,10008,20
4,10009,13
...,...,...
4995,23986,9
4996,23991,12
4997,23992,4
4998,23998,5


In [75]:
sub.to_csv('submission_catboost_tsfresh_comprehensive_features_with_acc.csv', index=False)

In [76]:
print(len(set(comprehensive_features_train.columns)))

3156


### Fill NaN for comprehensive_features train

In [82]:
comprehensive_features_train.fillna(0, inplace=True)

### Try Ensemble again, we seem to have unique column names

In [83]:
etc_clf = ExtraTreesClassifier(n_jobs=-1, n_estimators=933, max_depth=79, min_samples_split=2, min_samples_leaf=1, \
                               bootstrap = False, warm_start = False)

gb_clf = GradientBoostingClassifier(learning_rate=0.2, max_depth=5, min_samples_leaf=0.1, \
                                    min_samples_split=0.1363, subsample=1.0)

cbc_clf = CatBoostClassifier()

tabnet = TabNetClassifier(optimizer_params=dict(lr=2e-2), scheduler_params={"step_size":10, "gamma":0.9}, scheduler_fn=torch.optim.lr_scheduler.StepLR)
tabnet._estimator_type = "classifier"

estimators = [("etc", etc_clf), ("gb", gb_clf), ("cbc", cbc_clf), ('tabnet', tabnet)]
ensemble_estimators = StackingClassifier(estimators, final_estimator=LogisticRegression(), n_jobs=-1)

In [87]:
%%time
ensemble_estimators.fit(comprehensive_features_train.to_numpy(), y_train)



CPU times: total: 3.19 s
Wall time: 8h 49min 49s


In [99]:
comprehensive_features_train.shape

(9000, 3156)

In [101]:
comprehensive_features_test = comprehensive_features_test[comprehensive_features_train.columns]

### Fill NaN for comprehensive features test

In [102]:
comprehensive_features_test.fillna(0, inplace=True)

### Create Predictions using Ensemble

In [103]:
test_array = comprehensive_features_test.to_numpy()

In [104]:
test_array.shape

(5000, 3156)

In [105]:
preds = ensemble_estimators.predict(test_array)
preds

array([3, 4, 5, ..., 4, 5, 1], dtype=int64)

In [106]:
sub = make_submission(preds)
sub

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

Unnamed: 0,id,class
0,10001,3
1,10002,4
2,10004,5
3,10008,20
4,10009,13
...,...,...
4995,23986,9
4996,23991,12
4997,23992,4
4998,23998,5


In [107]:
sub.to_csv('submission_ensemble_tsfresh_comprehensive_features_with_acc.csv', index=False)