In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.utils import Sequence
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential
from sklearn.utils.class_weight import compute_class_weight
from lightgbm import LGBMClassifier
from sklearn.metrics import balanced_accuracy_score
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

In [2]:
train_features = pd.read_csv('features_train.csv')
train_lab = np.ravel(pd.read_csv('train_labels.csv').drop(columns='Id').to_numpy())-1

In [3]:
x_subj1, y_subj1 = train_features[0:21600], train_lab[0:21600]
x_subj2, y_subj2 = train_features[21600:2*21600], train_lab[21600:2*21600]
x_subj3, y_subj3 = train_features[2*21600:3*21600], train_lab[2*21600:3*21600]

In [4]:
shift_arr = [2,1,0,-1,-2]

In [5]:
x_subj1 = pd.concat([x_subj1.shift(i) for i in shift_arr], axis=1).fillna(0).to_numpy()
x_subj2 = pd.concat([x_subj2.shift(i) for i in shift_arr], axis=1).fillna(0).to_numpy()
x_subj3 = pd.concat([x_subj3.shift(i) for i in shift_arr], axis=1).fillna(0).to_numpy()

In [6]:
BAC = []
fold1_train = (np.concatenate((x_subj1, x_subj2), axis=0), np.concatenate((y_subj1, y_subj2), axis=0))
fold1_test = (x_subj3, y_subj3)

fold2_train = (np.concatenate((x_subj1, x_subj3), axis=0), np.concatenate((y_subj1, y_subj3), axis=0))
fold2_test = (x_subj2, y_subj2)

fold3_train = (np.concatenate((x_subj2, x_subj3), axis=0), np.concatenate((y_subj2, y_subj3), axis=0))
fold3_test = (x_subj1, y_subj1)

folds = [ (fold1_train, fold1_test)
        , (fold2_train, fold2_test)
        , (fold3_test, fold3_test)]

for ((X_cvtrain, y_cvtrain), (X_cvtest, y_cvtest)) in folds:
    model = LGBMClassifier(objective='multiclass',
                        n_estimators = 60,
                        learning_rate = 0.1,
                        random_state=42,
                        max_depth = 150,
                        min_child_samples = 20,
                        subsample = 0.75,
                        colsample_bytree = 0.8,
                        reg_alpha = 0,
                        class_weight="balanced")
    X_cvtrain = StandardScaler().fit_transform(X_cvtrain)
    X_cvtest = StandardScaler().fit_transform(X_cvtest)
    model.fit(X_cvtrain, y_cvtrain)

    predtrain = model.predict(X_cvtrain)
    pred = model.predict(X_cvtest)

    print("\nTrain BMAC:")
    print(np.round(balanced_accuracy_score(y_cvtrain, predtrain), 4))
    print("\nTest BMAC:")
    print(np.round(balanced_accuracy_score(y_cvtest, pred), 4))
    print("\n________________________")

    BAC.append(np.round(balanced_accuracy_score(y_cvtest, pred), 4))

print("\nAverage BMAC:", round(np.sum(BAC)/3, 4))
print("Std:", round(np.std(BAC), 4))


Train BMAC:
0.9923

Test BMAC:
0.957

________________________

Train BMAC:
0.9912

Test BMAC:
0.9621

________________________

Train BMAC:
0.9973

Test BMAC:
0.9973

________________________

Average BMAC: 0.9721
Std: 0.0179


In [6]:
test_features = pd.read_csv('features_test.csv')
test_subj1 = test_features[0:21600]
test_subj2 = test_features[21600:2*21600]
test_subj1 = pd.concat([test_subj1.shift(i) for i in shift_arr], axis=1).fillna(0).to_numpy()
test_subj2 = pd.concat([test_subj2.shift(i) for i in shift_arr], axis=1).fillna(0).to_numpy()

In [7]:
model = LGBMClassifier(objective='multiclass',
                    n_estimators = 60,
                    learning_rate = 0.1,
                    random_state=42,
                    max_depth = 150,
                    min_child_samples = 20,
                    subsample = 0.75,
                    colsample_bytree = 0.8,
                    reg_alpha = 0,
                    class_weight="balanced")

In [8]:
pred_train_x = np.concatenate([x_subj1, x_subj2, x_subj3], axis=0)
pred_train_x = StandardScaler().fit_transform(pred_train_x)
model.fit(pred_train_x, np.concatenate((y_subj1, y_subj2, y_subj3)))



LGBMClassifier(class_weight='balanced', colsample_bytree=0.8, max_depth=150,
               n_estimators=60, objective='multiclass', random_state=42,
               reg_alpha=0, subsample=0.75)

In [9]:
pred_test_x = np.concatenate([test_subj1, test_subj2], axis=0)
pred_test_x = StandardScaler().fit_transform(pred_test_x)
preds = model.predict(pred_test_x)+1

In [11]:
dfResults = pd.DataFrame({"Id": list(range(0, preds.shape[0], 1)), "y": preds})
dfResults.to_csv("Results.csv", sep=',', index=False)