# Train sktime-dl LSTM-FCN Model

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Only print warnings, ignore info and error 
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Disable GPU

import time
import numpy as np
import pandas as pd

from sktime_dl.classification import LSTMFCNClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, f1_score, make_scorer

In [None]:
DATA_TRAIN = "./dataset/SITS-train-phase1-subset-1000.csv"
use_univariate = False # also used in prediction later on

# Set univariate=True, if you use a classifier with multivariate capabilities
def read_data_sktime(DATA, univariate=False):
    data = pd.read_csv(DATA, delimiter="," , na_values=['?'], dtype='float', index_col="id")

    # Extract Data and Labels
    X = data.iloc[:,1:].values
    y = data.iloc[:,0].astype(int)

    if univariate:
        X = X.reshape(X.shape[0], 1, X.shape[1])
    else:
        X = X.reshape(X.shape[0], 3, X.shape[1]//3)
        
        
    print(X.shape)
    print(y.shape)
    return X, y


X_train, y_train = read_data_sktime(DATA_TRAIN, univariate=use_univariate)

In [None]:
class LSTMFCNClassifierCustom(LSTMFCNClassifier):
    def __init__(
        self,
        nb_epochs=2000,
        batch_size=128,
        dropout=0.8,
        kernel_sizes=[8, 5, 3],
        filter_sizes=[128, 256, 128],
        lstm_size=8,
        use_att=False,
        callbacks=None,
        random_state=0,
        verbose=False,
        model_name="lstmfcn",
        model_save_directory=None,
    ):

        super(LSTMFCNClassifierCustom, self).__init__(
            model_name=model_name, model_save_directory=model_save_directory
        )

        self.verbose = verbose
        self._is_fitted = False

        # calced in fit
        self.classes_ = None
        self.nb_classes = -1
        self.input_shape = None
        self.model = None
        self.history = None

        # predefined
        self.nb_epochs = nb_epochs
        self.batch_size = batch_size
        self.kernel_sizes = kernel_sizes
        self.filter_sizes = filter_sizes
        self.lstm_size = lstm_size
        self.dropout = dropout
        self.attention = use_att

        self.callbacks = callbacks
        self.random_state = random_state
        self.verbose = verbose

        self._is_fitted = False

    @property
    def NUM_CELLS(self):
        return self.lstm_size

    @property
    def use_att(self):
        return self.attention


In [None]:
# param_grid = {
#                 "kernel_size": [3,5,7,9,11],
#                 "n_conv_layers": [2,3,5,7],
#               }

param_grid = {
    "nb_epochs": [10, 20],
    "batch_size": [128],
}

# choose a classifier
clf = LSTMFCNClassifierCustom(nb_epochs=10)
scorer = make_scorer(f1_score, average="weighted")

# perform a grid-search
fit_time = time.perf_counter()
grid = GridSearchCV(
    clf, param_grid, cv=5, scoring=scorer, refit=True, n_jobs=-1, verbose=1
)
grid.fit(X_train, y_train)
fit_time = np.round(time.perf_counter() - fit_time, 5)

# get best model
best_clf = grid.best_estimator_
best_params = grid.best_params_
best_score = grid.best_score_

print("Best F1-Score:", best_score)
print("Time taken:", fit_time)

print("Detailed scores on train dataset:")
print()

means = grid.cv_results_["mean_test_score"]
stds = grid.cv_results_["std_test_score"]

for mean, std, params in zip(means, stds, grid.cv_results_["params"]):
    print("%0.3f (+/-%0.03f) for \n\t %r" % (mean, std * 2, params))
    print()


# Predict

In [None]:
# Read the data
DATA_TEST = "./dataset/SITS-test-data-phase1-nolabel.csv"

X_test, _ = read_data_sktime(DATA_TEST, univariate=use_univariate)

In [None]:
# Make a prediction
predictions = best_clf.predict(X_test)

In [None]:
# Create a submission file for kaggle
submission = pd.DataFrame({'PREDICTED': predictions})
submission.index.name="ID"

filename = 'baseline_tsf_submission_phase1.csv'
submission.to_csv(filename,index=True)
print('Saved file: ' + filename)

#Visualize the first 5 rows
submission.head()