# Train sktime-dl LSTM-FCN Model

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Only print warnings, ignore info and error 
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Disable GPU

# os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 
# import tensorflow as tf
# tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)

import time
import numpy as np
import pandas as pd

from tensorflow.keras import callbacks

from sktime.classification.deep_learning.lstmfcn import LSTMFCNClassifier

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, make_scorer



In [16]:
DATA_TRAIN = "./dataset/SITS-train-phase1-subset-10000.csv"
use_univariate = True # also used in prediction later on

# Set univariate=True, if you use a classifier with multivariate capabilities
def read_data_sktime(DATA, univariate=False):
    data = pd.read_csv(DATA, delimiter="," , na_values=['?'], dtype='float', index_col="id")

    # Extract Data and Labels
    X = data.iloc[:,1:].values
    y = data.iloc[:,0].astype(int)

    if univariate:
        X = X.reshape(X.shape[0], 1, X.shape[1])
    else:
        X = X.reshape(X.shape[0], 3, X.shape[1]//3)
        

    return X, y


X, y = read_data_sktime(DATA_TRAIN, univariate=use_univariate)

for (train_ix, test_ix)  in StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42).split(X, y):
    X_train, y_train = X[train_ix], y[train_ix]
    X_val, y_val = X[test_ix], y[test_ix]

print(X.shape)
print(y.shape)
print()

print(X_train.shape)
print(y_train.shape)
print()

print(X_val.shape)
print(y_val.shape)
print()


(10000, 1, 138)
(10000,)

(8500, 1, 138)
(8500,)

(1500, 1, 138)
(1500,)



In [6]:
# Define callbacks

reduce_lr = callbacks.ReduceLROnPlateau(monitor="loss", factor=0.7, patience=10, min_lr=0.0001)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
tensorboard = callbacks.TensorBoard(log_dir="./tensorboard/", histogram_freq=1)
model_save = callbacks.ModelCheckpoint('best_model_exploring.h5', monitor='val_loss', mode='min', save_best_only=True)

callbacks_ = [early_stopping, reduce_lr, tensorboard]

In [22]:
class LSTMFCNWithValidation(LSTMFCNClassifier):

    def fit(self, X, y, X_val=None, y_val=None, **kwargs):
        self.reset()

        start = int(round(time.time() * 1000))
        # convenience conversions to allow user flexibility:
        # if X is 2D array, convert to 3D, if y is Series, convert to numpy
        X, y = self._internal_convert(X, y)
        X_metadata = self._check_classifier_input(X, y)
        missing = X_metadata["has_nans"]
        multivariate = not X_metadata["is_univariate"]
        unequal = not X_metadata["is_equal_length"]
        self._X_metadata = X_metadata

        # Check this classifier can handle characteristics
        self._check_capabilities(missing, multivariate, unequal)

        # remember class labels
        self.classes_ = np.unique(y)
        self.n_classes_ = self.classes_.shape[0]
        self._class_dictionary = {}
        for index, class_val in enumerate(self.classes_):
            self._class_dictionary[class_val] = index

        # escape early and do not fit if only one class label has been seen
        #   in this case, we later predict the single class label seen
        if len(self.classes_) == 1:
            self.fit_time_ = int(round(time.time() * 1000)) - start
            self._is_fitted = True
            return self

        # Convert data as dictated by the classifier tags
        X = self._convert_X(X)
        multithread = self.get_tag("capability:multithreading")
        if multithread:
            try:
                self._threads_to_use = check_n_jobs(self.n_jobs)
            except NameError:
                raise AttributeError(
                    "self.n_jobs must be set if capability:multithreading is True"
                )

        # pass coerced and checked data to inner _fit
        self._fit(X, y, X_val, y_val, **kwargs)
        self.fit_time_ = int(round(time.time() * 1000)) - start

        # this should happen last
        self._is_fitted = True
        return self

    def _fit(self, X, y, X_val=None, y_val=None, **kwargs):
        from sklearn.utils.validation import check_random_state
        
        self.random_state = check_random_state(self.random_state)
        y_onehot = self.convert_y_to_keras(y)
        
        if y_val is not None:
            y_val = self.label_encoder.transform(y_val)
            y_val = y_val.reshape(-1, 1)
            y_val = self.onehot_encoder.transform(y_val)

        # Transpose to conform to Keras input style.
        X = X.transpose(0, 2, 1)
        
        if X_val is not None:
             X_val = X_val.transpose(0, 2, 1)
             
        validation_data = (X_val, y_val) if X_val is not None and y_val is not None else None

        # ignore the number of instances, X.shape[0],
        # just want the shape of each instance
        self.input_shape = X.shape[1:]

        self.model_ = self.build_model(self.input_shape, self.n_classes_)

        if self.verbose:
            self.model_.summary()

        self.history = self.model_.fit(
            X,
            y_onehot,
            batch_size=self.batch_size,
            epochs=self.n_epochs,
            verbose=self.verbose,
            callbacks=self.callbacks,
            validation_data=validation_data,
            **kwargs
        )

        self._is_fitted = True

        return self

In [15]:
y_val

id
765.0     3
482.0     1
861.0    21
500.0     1
663.0     0
         ..
219.0     8
217.0     0
392.0     2
998.0    21
363.0     1
Name: y, Length: 150, dtype: int64

In [None]:
clf = LSTMFCNWithValidation(
    n_epochs=100,
    batch_size=128,
    callbacks=callbacks_,
)

clf.fit(X_train, y_train, X_val, y_val)
# clf.save('test.mdl')

In [27]:
param_grid = {
    "kernel_sizes":[(8, 5, 3),(11, 8, 5)],
    "filter_sizes":[(128, 256, 128),(128, 128, 128),(64, 128, 64),(32, 64, 32)],
}

# choose a classifier
clf = LSTMFCNClassifier(
    # n_epochs=1,
    n_epochs=1000,
    batch_size=128,
    lstm_size=5,
    attention=False,
    callbacks=callbacks_,
    X_val=X_val, 
    y_val=y_val,
)

scorer = make_scorer(f1_score, average="macro")

# perform a grid-search
fit_time = time.perf_counter()
grid = GridSearchCV(
    clf, param_grid, 
    cv=3, 
    scoring=scorer, 
    refit=True, 
    n_jobs=1, 
    verbose=1
)
grid.fit(X_train, y_train)
# grid.fit(X_train, y_train)
fit_time = np.round(time.perf_counter() - fit_time, 5)

# get best model
best_clf = grid.best_estimator_
best_params = grid.best_params_
best_score = grid.best_score_

print("Best F1-Score:", best_score)
print("Time taken:", fit_time)
print("Best Params:", best_params)

del best_clf.__dict__['callbacks']
best_clf.save(f"best_model_grid_search_f1-{best_score:.4f}".replace('.', 'p'))

import json 
with open(f"best_params_f1-{best_score:.4f}.txt", 'w') as f:
    f.write(json.dumps(best_params))


Fitting 3 folds for each of 8 candidates, totalling 24 fits
Best F1-Score: 0.5648223482866533
Time taken: 9122.12894
Best Params: {'filter_sizes': (32, 64, 32), 'kernel_sizes': (8, 5, 3)}




INFO:tensorflow:Assets written to: best_model_grid_search_f1-0p5648/keras/assets


INFO:tensorflow:Assets written to: best_model_grid_search_f1-0p5648/keras/assets


# Predict

In [None]:
# Read the data
DATA_TEST = "./dataset/SITS-test-data-phase1-nolabel.csv"

X_test, _ = read_data_sktime(DATA_TEST, univariate=use_univariate)

In [None]:
# Make a prediction
predictions = best_clf.predict(X_test)

In [None]:
# Create a submission file for kaggle
submission = pd.DataFrame({'PREDICTED': predictions})
submission.index.name="ID"

filename = 'baseline_tsf_submission_phase1.csv'
submission.to_csv(filename,index=True)
print('Saved file: ' + filename)

#Visualize the first 5 rows
submission.head()