# Train sktime-dl MLSTM-FCN Model

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Only print warnings, ignore info and error 
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Disable GPU

# os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 
# import tensorflow as tf
# tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)

import time
import numpy as np
import pandas as pd

from tensorflow.keras import callbacks

from sktime.classification.deep_learning.mlstmfcn import MLSTMFCNClassifier

from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, make_scorer

In [2]:
# you can use a larger subset, if you like, there are:
# - SITS-train-phase2-subset-1000000.csv.gz
# - SITS-train-phase2-subset-100000.csv.gz
# - SITS-train-phase2-subset-10000.csv.gz
DATA_TRAIN = "./dataset/SITS-train-phase2-subset-1000000.csv.gz"
use_univariate = False
indices_f = [
    lambda n,r,g: (n - r) / (n + r),    # vegetation index
    #lambda n,r,g: (n - g) / (n + g),    # water index
    #lambda n,r,g: (n * r) / (g ** 2),   # chlorophyll index
    #lambda n,r,g: np.sqrt(n ** 2 + r ** 2 + g ** 2) # brightness
]

# Set univariate=False, if you use a classifier with multivariate capabilities
def read_data_sktime(DATA, univariate=False, indices_f=[]):
    data = pd.read_csv(DATA, delimiter="," , 
                       na_values=['?'], dtype='float', 
                       index_col="id", compression='gzip')

    # Fill NaN values
    # We use the most basic way with bfill and ffill to carry on the last values
    data.fillna(method='bfill', inplace=True, axis=1)
    data.fillna(method='ffill', inplace=True, axis=1)

    # Extract Data and Labels
    X = data.iloc[:,1:]
    y = data.iloc[:,0].astype(int)

    # Extract value groups
    s = X.shape[1]//3
    nir = X.iloc[:,0:s].to_numpy()
    red = X.iloc[:,s:2*s].to_numpy()
    green = X.iloc[:,2*s:3*s].to_numpy()
    X = [X]

    # Calculate indices
    for i in indices_f:
        X.append(i(nir, red, green))

    X = np.concatenate(X, axis=1)
    y = np.array(y)

    if univariate:
        X = X.reshape(X.shape[0], 1, X.shape[1])
    else:
        X = X.reshape(X.shape[0], 3 + len(indices_f), X.shape[1] // (3 + len(indices_f)))

    return X, y

X, y = read_data_sktime(DATA_TRAIN, univariate=use_univariate, indices_f=indices_f)

for (train_ix, test_ix)  in StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42).split(X, y):
    X_train, y_train = X[train_ix], y[train_ix]
    X_val, y_val = X[test_ix], y[test_ix]

print("Training data:")
print(X_train.shape)
print(y_train.shape)
print()

print("Validation data:")
print(X_val.shape)
print(y_val.shape)
print()


Training data:
(850000, 4, 46)
(850000,)

Validation data:
(150000, 4, 46)
(150000,)



In [3]:
# Define callbacks

reduce_lr = callbacks.ReduceLROnPlateau(monitor="loss", factor=0.7, patience=10, min_lr=0.0001)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
tensorboard = callbacks.TensorBoard(log_dir="./tensorboard/phase_2_mlstm_full_dataset_tuned_and_fixed_multivariate_with_dilation", histogram_freq=1)
# model_save = callbacks.ModelCheckpoint('best_model_exploring.h5', monitor='val_loss', mode='min', save_best_only=True)

callbacks_ = [early_stopping, reduce_lr, tensorboard]

In [4]:
class MLSTMFCNWithValidation(MLSTMFCNClassifier):

    def fit(self, X, y, X_val=None, y_val=None, **kwargs):
        self.reset()

        start = int(round(time.time() * 1000))
        # convenience conversions to allow user flexibility:
        # if X is 2D array, convert to 3D, if y is Series, convert to numpy
        X, y = self._internal_convert(X, y)
        X_metadata = self._check_classifier_input(X, y)
        missing = X_metadata["has_nans"]
        multivariate = not X_metadata["is_univariate"]
        unequal = not X_metadata["is_equal_length"]
        self._X_metadata = X_metadata

        # Check this classifier can handle characteristics
        self._check_capabilities(missing, multivariate, unequal)

        # remember class labels
        self.classes_ = np.unique(y)
        self.n_classes_ = self.classes_.shape[0]
        self._class_dictionary = {}
        for index, class_val in enumerate(self.classes_):
            self._class_dictionary[class_val] = index

        # escape early and do not fit if only one class label has been seen
        #   in this case, we later predict the single class label seen
        if len(self.classes_) == 1:
            self.fit_time_ = int(round(time.time() * 1000)) - start
            self._is_fitted = True
            return self

        # Convert data as dictated by the classifier tags
        X = self._convert_X(X)
        multithread = self.get_tag("capability:multithreading")
        if multithread:
            try:
                self._threads_to_use = check_n_jobs(self.n_jobs)
            except NameError:
                raise AttributeError(
                    "self.n_jobs must be set if capability:multithreading is True"
                )

        # pass coerced and checked data to inner _fit
        self._fit(X, y, X_val, y_val, **kwargs)
        self.fit_time_ = int(round(time.time() * 1000)) - start

        # this should happen last
        self._is_fitted = True
        return self

    def _fit(self, X, y, X_val=None, y_val=None, **kwargs):
        from sklearn.utils.validation import check_random_state
        
        self.random_state = check_random_state(self.random_state)
        y_onehot = self.convert_y_to_keras(y)
        
        if y_val is not None:
            y_val = self.label_encoder.transform(y_val)
            y_val = y_val.reshape(-1, 1)
            y_val = self.onehot_encoder.transform(y_val)

        # Transpose to conform to Keras input style.
        X = X.transpose(0, 2, 1)
        
        if X_val is not None:
             X_val = X_val.transpose(0, 2, 1)
             
        validation_data = (X_val, y_val) if X_val is not None and y_val is not None else None

        # ignore the number of instances, X.shape[0],
        # just want the shape of each instance
        self.input_shape = X.shape[1:]

        self.model_ = self.build_model(self.input_shape, self.n_classes_)

        if self.verbose:
            self.model_.summary()

        self.history = self.model_.fit(
            X,
            y_onehot,
            batch_size=self.batch_size,
            epochs=self.n_epochs,
            verbose=self.verbose,
            callbacks=self.callbacks,
            validation_data=validation_data,
            **kwargs
        )

        self._is_fitted = True

        return self

In [5]:
clf = MLSTMFCNWithValidation(
    n_epochs=50,
    attention=False,
    batch_size= 32, 
    dilation_rate= 2, 
    filter_sizes= (64, 128, 64),
    kernel_sizes= (5, 3, 1),
    lstm_size= 3,
    callbacks=callbacks_,
    verbose=True
)

clf.fit(X_train, y_train, X_val, y_val)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 46, 4)]      0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 46, 64)       1344        ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 46, 64)      256         ['conv1d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 activation (Activation)        (None, 46, 64)       0           ['batch_normalization[0][0]']

Unnamed: 0_level_0,PREDICTED
ID,Unnamed: 1_level_1
0,1
1,9
2,2
3,18
4,3


In [None]:
DATA_TEST = "./dataset/SITS-test-data-phase2-nolabel.csv.gz"

X_test, _ = read_data_sktime(DATA_TEST, univariate=use_univariate, indices_f=indices_f)

# Make a prediction
predictions = clf.predict(X_test)

# Create a submission file for kaggle
submission = pd.DataFrame({'PREDICTED': predictions})
submission.index.name="ID"

filename = 'predictions/mlstm_submission_1M_indices.csv'
submission.to_csv(filename,index=True)
print('Saved file: ' + filename)

#Visualize the first 5 rows
submission.head()