In [1]:
# !pip install -U tsfresh

In [50]:
import numpy as np
import pandas as pd
import tsfresh
import os
import pickle

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, StackingClassifier, GradientBoostingClassifier
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold, RandomizedSearchCV
from catboost import CatBoostClassifier
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor
import tsfresh
import lightgbm as lgb
from pytorch_tabnet.tab_model import TabNetClassifier
import torch
import re

In [29]:
def make_submission(preds):
    assert len(preds) == 5000
    
    # Read labels
    with open('test_labels_sorted.npy', 'rb') as f:
        test_labels = np.load(f)
    len(test_labels)
    
    submission = pd.DataFrame(columns=['id', 'class'])
    for label, pred in zip(test_labels, preds):
        submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
    return submission

In [30]:
def plot_confusion_matrix(y_val_from_train, y_pred_from_train):
    fig, ax = plt.subplots(figsize=(8, 5))
    cmp = ConfusionMatrixDisplay(confusion_matrix(y_val_from_train, y_pred_from_train))
    cmp.plot(ax=ax)
    plt.show()

### Set train files

In [4]:
train_files = os.listdir('train/train/')
train_files.sort()
train_files[:5]

['10003.csv', '10005.csv', '10006.csv', '10007.csv', '10012.csv']

### Set test files

In [5]:
test_files = os.listdir('test/test/')
test_files.sort()
print(test_files[:5], len(test_files))

['10001.csv', '10002.csv', '10004.csv', '10008.csv', '10009.csv'] 5000


### Create input DF

In [6]:
columns = ['id', 'x', 'y', 'z']
N = 9000
df_train = pd.DataFrame(columns=columns)
labels = []

for i, file in enumerate(train_files[:N]):
    
    if i % 50 == 0:
        print(f"Iteration {i}...")
    
    # Take each training sample and store it as a temporary dataframe
    full_path = f'train/train/{file}'
    temp_df = pd.read_csv(full_path, names=['x', 'y', 'z'])
    
    # Assign each column as a numpy array
    x = temp_df['x'].to_numpy()
    y = temp_df['y'].to_numpy()
    z = temp_df['z'].to_numpy()
    
    time = 0
    for xx, yy, zz in zip(x, y, z):
        row = {'id': int(i), 'time': time, 'x': xx, 'y': yy, 'z': zz}
        time += 1
        row_df = pd.DataFrame([row])
        df_train = pd.concat([df_train, row_df], axis=0, ignore_index=True)
        
df_train['id'] = df_train['id'].astype(np.int64)
df_train['time'] = df_train['time'].astype(np.int64)

In [7]:
# df_train.shape

In [8]:
# df_train.head()

In [9]:
# df_train.to_csv('df_train_tsfresh.csv', index=False)

### Create test DF

In [26]:
columns = ['id', 'x', 'y', 'z']
N = 5000
df_test = pd.DataFrame(columns=columns)
labels = []

for i, file in enumerate(test_files[:N]):
    
    if i % 50 == 0:
        print(f"Iteration {i}...")
    
    # Take each training sample and store it as a temporary dataframe
    full_path = f'test/test/{file}'
    temp_df = pd.read_csv(full_path, names=['x', 'y', 'z'])
    
    # Assign each column as a numpy array
    x = temp_df['x'].to_numpy()
    y = temp_df['y'].to_numpy()
    z = temp_df['z'].to_numpy()
    
    time = 0
    for xx, yy, zz in zip(x, y, z):
        row = {'id': int(i), 'time': time, 'x': xx, 'y': yy, 'z': zz}
        time += 1
        row_df = pd.DataFrame([row])
        df_test = pd.concat([df_test, row_df], axis=0, ignore_index=True)
        
df_test['id'] = df_test['id'].astype(np.int64)
df_test['time'] = df_test['time'].astype(np.int64)

Iteration 0...
Iteration 50...
Iteration 100...
Iteration 150...
Iteration 200...
Iteration 250...
Iteration 300...
Iteration 350...
Iteration 400...
Iteration 450...
Iteration 500...
Iteration 550...
Iteration 600...
Iteration 650...
Iteration 700...
Iteration 750...
Iteration 800...
Iteration 850...
Iteration 900...
Iteration 950...
Iteration 1000...
Iteration 1050...
Iteration 1100...
Iteration 1150...
Iteration 1200...
Iteration 1250...
Iteration 1300...
Iteration 1350...
Iteration 1400...
Iteration 1450...
Iteration 1500...
Iteration 1550...
Iteration 1600...
Iteration 1650...
Iteration 1700...
Iteration 1750...
Iteration 1800...
Iteration 1850...
Iteration 1900...
Iteration 1950...
Iteration 2000...
Iteration 2050...
Iteration 2100...
Iteration 2150...
Iteration 2200...
Iteration 2250...
Iteration 2300...
Iteration 2350...
Iteration 2400...
Iteration 2450...
Iteration 2500...
Iteration 2550...
Iteration 2600...
Iteration 2650...
Iteration 2700...
Iteration 2750...
Iteration 2800.

In [27]:
df_test.shape

(749517, 5)

In [28]:
df_test.head()

Unnamed: 0,id,x,y,z,time
0,0,-0.320224,4.966173,7.307702,0
1,0,-0.228646,4.947618,7.289746,1
2,0,-0.15682,4.929063,7.320271,2
3,0,-0.087388,5.010466,7.249643,3
4,0,-0.032322,5.003882,7.349601,4


In [31]:
df_test.to_csv('df_test_tsfresh.csv', index=False)

### Read y_train

In [10]:
# Load y_train
with open('y_train.npy', 'rb') as f:
    y_train = np.load(f)
print(y_train.shape)
y_train

(9000,)


array([ 7, 13,  6, ...,  3,  3, 18], dtype=int64)

### Train test split

In [25]:
# Create validation data
# Train/test sets
# X_train_from_train, X_val_from_train, y_train_from_train, y_val_from_train = train_test_split(
#     df_train, y_train, test_size=0.15, random_state=42
# )

### Extract Relevant Features Train

In [12]:
df_train = pd.read_csv('df_train_tsfresh.csv')
df_train.shape

(1348955, 5)

In [16]:
df_train.id

0             0
1             0
2             0
3             0
4             0
           ... 
1348950    8999
1348951    8999
1348952    8999
1348953    8999
1348954    8999
Name: id, Length: 1348955, dtype: int64

In [14]:
y_train_series = pd.Series(y_train)
y_train_series

0        7
1       13
2        6
3        1
4        1
        ..
8995    20
8996     2
8997     3
8998     3
8999    18
Length: 9000, dtype: int64

In [18]:
relevant_features = extract_relevant_features(df_train, y_train_series, column_id='id', column_sort='time')

Feature Extraction: 100%|██████████| 30/30 [03:36<00:00,  7.22s/it]


In [19]:
relevant_features_train.shape

(9000, 1801)

In [20]:
relevant_features_train.head()

Unnamed: 0,z__autocorrelation__lag_2,z__autocorrelation__lag_3,z__cid_ce__normalize_True,z__autocorrelation__lag_1,z__partial_autocorrelation__lag_1,z__binned_entropy__max_bins_10,z__autocorrelation__lag_4,z__large_standard_deviation__r_0.15000000000000002,z__lempel_ziv_complexity__bins_100,z__kurtosis,...,z__index_mass_quantile__q_0.4,"z__fft_coefficient__attr_""real""__coeff_63","z__fft_coefficient__attr_""imag""__coeff_44","y__fft_coefficient__attr_""real""__coeff_1",z__time_reversal_asymmetry_statistic__lag_3,"y__fft_coefficient__attr_""imag""__coeff_37",x__skewness,z__count_above_mean,x__has_duplicate_max,x__large_standard_deviation__r_0.35000000000000003
0,0.383785,0.071512,7.927186,0.790512,0.790512,1.612019,-0.038573,0.0,0.613333,2.897404,...,0.4,0.020175,1.610347,18.880332,2.786798,-0.139789,-0.396157,61.0,0.0,0.0
1,0.467027,0.072127,7.218087,0.829486,0.829486,1.923455,-0.210522,1.0,0.644295,0.763159,...,0.409396,-0.193747,0.304703,17.646115,1.155473,0.837684,-0.311413,71.0,0.0,0.0
2,0.836092,0.684936,3.85489,0.954838,0.954838,1.780139,0.542536,1.0,0.626667,4.174026,...,0.386667,0.250836,-0.455345,16.39727,-1.295116,0.398187,0.656742,66.0,0.0,0.0
3,0.345219,-0.116666,7.942487,0.79353,0.79353,1.698915,-0.443249,0.0,0.633333,3.35024,...,0.406667,0.623098,-1.401261,-6.580115,-3.361814,-0.440244,0.23451,85.0,0.0,0.0
4,0.162649,-0.269728,9.410718,0.708973,0.708973,1.468597,-0.445556,1.0,0.613333,5.158069,...,0.4,0.644833,-1.0391,-22.167133,-0.182558,-0.639239,-0.287995,81.0,0.0,0.0


In [21]:
relevant_features_train.to_csv('relevant_features_train_df_tsfresh.csv', index=False)

### Extract Relevant Features Test

In [32]:
relevant_features_test = tsfresh.extract_features(df_test, column_id='id', column_sort='time')

Feature Extraction: 100%|██████████| 30/30 [01:46<00:00,  3.53s/it]


In [34]:
relevant_features_test.shape

(5000, 2367)

In [35]:
relevant_features_test.head()

Unnamed: 0,x__variance_larger_than_standard_deviation,x__has_duplicate_max,x__has_duplicate_min,x__has_duplicate,x__sum_values,x__abs_energy,x__mean_abs_change,x__mean_change,x__mean_second_derivative_central,x__median,...,z__permutation_entropy__dimension_6__tau_1,z__permutation_entropy__dimension_7__tau_1,z__query_similarity_count__query_None__threshold_0.0,"z__matrix_profile__feature_""min""__threshold_0.98","z__matrix_profile__feature_""max""__threshold_0.98","z__matrix_profile__feature_""mean""__threshold_0.98","z__matrix_profile__feature_""median""__threshold_0.98","z__matrix_profile__feature_""25""__threshold_0.98","z__matrix_profile__feature_""75""__threshold_0.98",z__mean_n_absolute_max__number_of_maxima_7
0,0.0,0.0,0.0,1.0,-45.811852,96.766993,0.114231,0.005029,-0.000162,-0.232536,...,3.762578,4.245347,,0.99041,4.449196,2.495787,2.456786,1.75123,3.254645,10.401438
1,0.0,0.0,0.0,0.0,-52.769405,112.703054,0.104212,0.011292,-0.001014,-0.475848,...,3.242708,3.794044,,1.637313,5.109867,3.309542,3.247018,2.978184,3.741292,8.443238
2,0.0,0.0,0.0,1.0,-127.081834,129.84536,0.055581,-0.00605,0.000319,-0.731728,...,3.821317,4.331212,,0.829056,3.442428,1.765044,1.704453,1.287588,2.201886,9.493352
3,0.0,0.0,0.0,1.0,-24.30953,68.464475,0.151678,0.005254,-0.000623,-0.31783,...,3.799215,4.322378,,0.802354,2.982442,1.809753,1.74906,1.501245,2.156459,8.465555
4,0.0,0.0,0.0,1.0,27.24063,30.78687,0.102927,0.005658,9.6e-05,0.159813,...,4.088793,4.610421,,0.792163,3.579949,1.613036,1.468788,1.195924,1.842903,9.265475


In [37]:
relevant_features_test = relevant_features_test[relevant_features_train.columns]

In [38]:
relevant_features_test.shape

(5000, 1801)

In [39]:
relevant_features_test.to_csv('relevant_features_test_df_tsfresh.csv', index=False)

### Train Catboost on Relevant Features

In [40]:
cbc = CatBoostClassifier(loss_function='MultiClass', task_type="GPU")

In [41]:
%%time
cbc.fit(relevant_features_train, y_train)

Learning rate set to 0.101613
0:	learn: 2.4376376	total: 63.3ms	remaining: 1m 3s
1:	learn: 2.1561513	total: 112ms	remaining: 55.9s
2:	learn: 1.8919807	total: 171ms	remaining: 56.9s
3:	learn: 1.6938308	total: 225ms	remaining: 56s
4:	learn: 1.5356283	total: 278ms	remaining: 55.3s
5:	learn: 1.4366620	total: 332ms	remaining: 55.1s
6:	learn: 1.3346274	total: 391ms	remaining: 55.5s
7:	learn: 1.2462224	total: 444ms	remaining: 55s
8:	learn: 1.1655085	total: 498ms	remaining: 54.8s
9:	learn: 1.1117673	total: 551ms	remaining: 54.5s
10:	learn: 1.0408433	total: 608ms	remaining: 54.7s
11:	learn: 0.9803968	total: 658ms	remaining: 54.1s
12:	learn: 0.9291310	total: 712ms	remaining: 54.1s
13:	learn: 0.8781223	total: 763ms	remaining: 53.8s
14:	learn: 0.8357736	total: 821ms	remaining: 53.9s
15:	learn: 0.7973394	total: 875ms	remaining: 53.8s
16:	learn: 0.7655645	total: 933ms	remaining: 54s
17:	learn: 0.7280308	total: 996ms	remaining: 54.3s
18:	learn: 0.6929767	total: 1.06s	remaining: 54.6s
19:	learn: 0.663

161:	learn: 0.0959425	total: 8.09s	remaining: 41.9s
162:	learn: 0.0951243	total: 8.14s	remaining: 41.8s
163:	learn: 0.0944923	total: 8.19s	remaining: 41.7s
164:	learn: 0.0943692	total: 8.23s	remaining: 41.6s
165:	learn: 0.0940156	total: 8.27s	remaining: 41.6s
166:	learn: 0.0934865	total: 8.31s	remaining: 41.5s
167:	learn: 0.0926172	total: 8.36s	remaining: 41.4s
168:	learn: 0.0919025	total: 8.41s	remaining: 41.4s
169:	learn: 0.0913502	total: 8.45s	remaining: 41.3s
170:	learn: 0.0906857	total: 8.5s	remaining: 41.2s
171:	learn: 0.0903690	total: 8.55s	remaining: 41.1s
172:	learn: 0.0897515	total: 8.59s	remaining: 41.1s
173:	learn: 0.0892476	total: 8.64s	remaining: 41s
174:	learn: 0.0889208	total: 8.68s	remaining: 40.9s
175:	learn: 0.0883820	total: 8.73s	remaining: 40.9s
176:	learn: 0.0879308	total: 8.77s	remaining: 40.8s
177:	learn: 0.0872959	total: 8.82s	remaining: 40.7s
178:	learn: 0.0868285	total: 8.87s	remaining: 40.7s
179:	learn: 0.0862433	total: 8.91s	remaining: 40.6s
180:	learn: 0.0

321:	learn: 0.0512827	total: 15.5s	remaining: 32.5s
322:	learn: 0.0511106	total: 15.5s	remaining: 32.5s
323:	learn: 0.0509185	total: 15.6s	remaining: 32.4s
324:	learn: 0.0506070	total: 15.6s	remaining: 32.4s
325:	learn: 0.0502814	total: 15.6s	remaining: 32.3s
326:	learn: 0.0500393	total: 15.7s	remaining: 32.3s
327:	learn: 0.0499401	total: 15.7s	remaining: 32.2s
328:	learn: 0.0497490	total: 15.8s	remaining: 32.2s
329:	learn: 0.0496095	total: 15.8s	remaining: 32.1s
330:	learn: 0.0494219	total: 15.9s	remaining: 32.1s
331:	learn: 0.0493264	total: 15.9s	remaining: 32s
332:	learn: 0.0491802	total: 16s	remaining: 32s
333:	learn: 0.0489388	total: 16s	remaining: 31.9s
334:	learn: 0.0487963	total: 16.1s	remaining: 31.9s
335:	learn: 0.0486196	total: 16.1s	remaining: 31.8s
336:	learn: 0.0484823	total: 16.2s	remaining: 31.8s
337:	learn: 0.0482940	total: 16.2s	remaining: 31.7s
338:	learn: 0.0481062	total: 16.3s	remaining: 31.7s
339:	learn: 0.0479010	total: 16.3s	remaining: 31.6s
340:	learn: 0.047725

481:	learn: 0.0327367	total: 22.9s	remaining: 24.7s
482:	learn: 0.0325717	total: 23s	remaining: 24.6s
483:	learn: 0.0324884	total: 23s	remaining: 24.6s
484:	learn: 0.0324579	total: 23.1s	remaining: 24.5s
485:	learn: 0.0323746	total: 23.1s	remaining: 24.5s
486:	learn: 0.0323272	total: 23.2s	remaining: 24.4s
487:	learn: 0.0322649	total: 23.2s	remaining: 24.4s
488:	learn: 0.0321846	total: 23.3s	remaining: 24.3s
489:	learn: 0.0320762	total: 23.3s	remaining: 24.3s
490:	learn: 0.0319622	total: 23.4s	remaining: 24.2s
491:	learn: 0.0318863	total: 23.4s	remaining: 24.2s
492:	learn: 0.0318035	total: 23.5s	remaining: 24.1s
493:	learn: 0.0317125	total: 23.5s	remaining: 24.1s
494:	learn: 0.0316585	total: 23.5s	remaining: 24s
495:	learn: 0.0316207	total: 23.6s	remaining: 24s
496:	learn: 0.0315107	total: 23.6s	remaining: 23.9s
497:	learn: 0.0314866	total: 23.7s	remaining: 23.9s
498:	learn: 0.0313849	total: 23.7s	remaining: 23.8s
499:	learn: 0.0313564	total: 23.8s	remaining: 23.8s
500:	learn: 0.031267

641:	learn: 0.0227549	total: 30.3s	remaining: 16.9s
642:	learn: 0.0227116	total: 30.4s	remaining: 16.9s
643:	learn: 0.0226672	total: 30.4s	remaining: 16.8s
644:	learn: 0.0225511	total: 30.5s	remaining: 16.8s
645:	learn: 0.0225312	total: 30.5s	remaining: 16.7s
646:	learn: 0.0225060	total: 30.6s	remaining: 16.7s
647:	learn: 0.0224158	total: 30.6s	remaining: 16.6s
648:	learn: 0.0223737	total: 30.7s	remaining: 16.6s
649:	learn: 0.0223184	total: 30.7s	remaining: 16.5s
650:	learn: 0.0222816	total: 30.8s	remaining: 16.5s
651:	learn: 0.0222590	total: 30.8s	remaining: 16.4s
652:	learn: 0.0222049	total: 30.9s	remaining: 16.4s
653:	learn: 0.0221886	total: 30.9s	remaining: 16.3s
654:	learn: 0.0221400	total: 30.9s	remaining: 16.3s
655:	learn: 0.0221246	total: 31s	remaining: 16.2s
656:	learn: 0.0221017	total: 31s	remaining: 16.2s
657:	learn: 0.0220795	total: 31.1s	remaining: 16.2s
658:	learn: 0.0220656	total: 31.1s	remaining: 16.1s
659:	learn: 0.0220190	total: 31.2s	remaining: 16.1s
660:	learn: 0.02

800:	learn: 0.0166981	total: 37.7s	remaining: 9.36s
801:	learn: 0.0166806	total: 37.7s	remaining: 9.31s
802:	learn: 0.0166162	total: 37.8s	remaining: 9.26s
803:	learn: 0.0166000	total: 37.8s	remaining: 9.21s
804:	learn: 0.0165764	total: 37.8s	remaining: 9.17s
805:	learn: 0.0165329	total: 37.9s	remaining: 9.12s
806:	learn: 0.0165238	total: 37.9s	remaining: 9.07s
807:	learn: 0.0164924	total: 38s	remaining: 9.03s
808:	learn: 0.0164639	total: 38s	remaining: 8.98s
809:	learn: 0.0164407	total: 38.1s	remaining: 8.93s
810:	learn: 0.0164346	total: 38.1s	remaining: 8.88s
811:	learn: 0.0164213	total: 38.2s	remaining: 8.84s
812:	learn: 0.0163950	total: 38.2s	remaining: 8.79s
813:	learn: 0.0163848	total: 38.3s	remaining: 8.74s
814:	learn: 0.0163567	total: 38.3s	remaining: 8.69s
815:	learn: 0.0163156	total: 38.3s	remaining: 8.65s
816:	learn: 0.0162840	total: 38.4s	remaining: 8.6s
817:	learn: 0.0162682	total: 38.4s	remaining: 8.55s
818:	learn: 0.0162313	total: 38.5s	remaining: 8.51s
819:	learn: 0.016

960:	learn: 0.0128398	total: 44.9s	remaining: 1.82s
961:	learn: 0.0128222	total: 45s	remaining: 1.78s
962:	learn: 0.0128189	total: 45s	remaining: 1.73s
963:	learn: 0.0127933	total: 45.1s	remaining: 1.68s
964:	learn: 0.0127765	total: 45.1s	remaining: 1.64s
965:	learn: 0.0127390	total: 45.2s	remaining: 1.59s
966:	learn: 0.0127293	total: 45.2s	remaining: 1.54s
967:	learn: 0.0127139	total: 45.2s	remaining: 1.5s
968:	learn: 0.0126963	total: 45.3s	remaining: 1.45s
969:	learn: 0.0126835	total: 45.3s	remaining: 1.4s
970:	learn: 0.0126716	total: 45.4s	remaining: 1.35s
971:	learn: 0.0126499	total: 45.4s	remaining: 1.31s
972:	learn: 0.0126215	total: 45.5s	remaining: 1.26s
973:	learn: 0.0125961	total: 45.5s	remaining: 1.22s
974:	learn: 0.0125716	total: 45.6s	remaining: 1.17s
975:	learn: 0.0125682	total: 45.6s	remaining: 1.12s
976:	learn: 0.0125377	total: 45.7s	remaining: 1.07s
977:	learn: 0.0125319	total: 45.7s	remaining: 1.03s
978:	learn: 0.0125205	total: 45.8s	remaining: 982ms
979:	learn: 0.0124

<catboost.core.CatBoostClassifier at 0x1d8a02baf10>

### Predict on Relevant Features Test

In [42]:
preds = cbc.predict(relevant_features_test)
preds = [a[0] for a in preds]
preds

[3,
 4,
 5,
 20,
 13,
 13,
 13,
 4,
 5,
 3,
 16,
 9,
 2,
 8,
 17,
 15,
 19,
 5,
 2,
 18,
 17,
 18,
 12,
 7,
 16,
 18,
 8,
 15,
 16,
 3,
 16,
 6,
 10,
 9,
 4,
 3,
 6,
 20,
 5,
 1,
 5,
 10,
 3,
 3,
 1,
 3,
 18,
 18,
 10,
 20,
 17,
 3,
 7,
 4,
 3,
 12,
 16,
 16,
 11,
 17,
 15,
 4,
 17,
 13,
 9,
 13,
 20,
 11,
 4,
 15,
 14,
 20,
 16,
 9,
 12,
 8,
 5,
 11,
 18,
 9,
 13,
 6,
 17,
 8,
 11,
 12,
 19,
 12,
 14,
 17,
 17,
 16,
 11,
 5,
 11,
 1,
 10,
 5,
 13,
 11,
 5,
 16,
 17,
 20,
 14,
 17,
 3,
 17,
 16,
 3,
 17,
 5,
 2,
 8,
 5,
 16,
 12,
 3,
 20,
 10,
 9,
 14,
 20,
 15,
 9,
 13,
 16,
 18,
 17,
 17,
 20,
 1,
 12,
 11,
 2,
 4,
 2,
 5,
 11,
 18,
 8,
 15,
 4,
 19,
 9,
 12,
 20,
 4,
 19,
 4,
 10,
 18,
 16,
 10,
 14,
 14,
 10,
 5,
 7,
 8,
 4,
 20,
 12,
 13,
 19,
 1,
 5,
 2,
 19,
 6,
 3,
 18,
 12,
 2,
 12,
 5,
 10,
 15,
 20,
 14,
 1,
 19,
 9,
 4,
 14,
 15,
 10,
 17,
 4,
 17,
 6,
 15,
 14,
 14,
 18,
 4,
 13,
 9,
 18,
 7,
 17,
 5,
 8,
 10,
 20,
 14,
 4,
 14,
 20,
 17,
 19,
 18,
 5,
 19,
 18,
 20,
 20,


In [43]:
sub = make_submission(preds)
sub

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = submission.append({'id': label, 'class': pred}, ignore_index=True)
  submission = s

Unnamed: 0,id,class
0,10001,3
1,10002,4
2,10004,5
3,10008,20
4,10009,13
...,...,...
4995,23986,9
4996,23991,12
4997,23992,18
4998,23998,5


### Save Sub Catboost

In [44]:
sub.to_csv('submission_catboost_tsfresh_relevant_features.csv', index=False)

### TODO: Try Ensemble

In [51]:
etc_clf = ExtraTreesClassifier(n_jobs=-1, n_estimators=933, max_depth=79, min_samples_split=2, min_samples_leaf=1, \
                               bootstrap = False, warm_start = False)

gb_clf = GradientBoostingClassifier(learning_rate=0.2, max_depth=5, min_samples_leaf=0.1, \
                                    min_samples_split=0.1363, subsample=1.0)

lgb_clf = lgb.LGBMClassifier(objective='multiclass', num_class=20, n_jobs=-1, seed=42, boosting='dart', \
                             min_child_samples=12, num_iterations=1936, num_leaves=66, min_data_in_leaf=50, \
                             max_bin=20, max_depth=17, learning_rate=0.24, reg_alpha=0.0004127769671094072)

cbc_clf = CatBoostClassifier()

tabnet = TabNetClassifier(optimizer_params=dict(lr=2e-2), scheduler_params={"step_size":10, "gamma":0.9}, scheduler_fn=torch.optim.lr_scheduler.StepLR)
tabnet._estimator_type = "classifier"

estimators = [("etc", etc_clf), ("gb", gb_clf), ("lgb", lgb_clf), ("cbc", cbc_clf), ('tabnet', tabnet)]
ensemble_estimators = StackingClassifier(estimators, final_estimator=LogisticRegression(), n_jobs=-1)

### Quick pre-processing for DataFrames

In [56]:
features_filtered_direct = features_filtered_direct.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))
relevant_features_test = relevant_features_test.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))

In [None]:
%%time
ensemble_estimators.fit(features_filtered_direct, y_train)



In [None]:
preds = ensemble_estimators.predict(relevant_features_test)
preds

In [None]:
sub = make_submission(preds)
sub

In [None]:
sub.to_csv('submission_ensemble_tsfresh_relevant_features.csv', index=False)