In [1]:
!pip install sktime

Collecting sktime
  Downloading sktime-0.11.4-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
Collecting deprecated>=1.2.13
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: deprecated, sktime
Successfully installed deprecated-1.2.13 sktime-0.11.4
[0m

In [2]:
"""
Hackathon - INCAP - IconPro GmbH
Timeseries Classification with Transformers
"""
import pandas as pd
from tensorflow import keras
from dataclasses import dataclass
from tensorflow.keras import layers
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.rocket import Rocket, MiniRocket
# Import packages as you need

In [3]:
def load_data(data_path):
    """
    Loading of the dataset provided
    Edit the code below
    """
    data = pd.read_pickle(data_path)
    return data

In [4]:
def preprocess_data(data):
    """
    A standard nan removal to be added.
    Add more preprocessing steps if needed.
    """
    
    X = data['dim_0'].apply(lambda x: x.reshape(500,1))
    
    for i in range(data.shape[0]):
        if True in np.isnan(data['dim_0'][i]).flatten():
            print(i)
            
    input_x = []
    for array in X:
        input_x.append(array)
    
#     X = pd.DataFrame(data.dim_0.tolist())
#     X = X.to_numpy()
    
    y = data['labels']
    y = y.astype(int)
    y[y == -1] = 0
    return input_x,y

In [5]:
def Rocket_preprocessing(input_x):
    input_x1=[]
    input_x = np.array(input_x).reshape(4921, 500)
    print(input_x.shape)
    for i in range(input_x.shape[0]):
        input_x1.append(pd.Series(input_x[i]))
    input_x1=pd.Series(input_x1)
#    input_x1.shape
    
    input_x1=pd.DataFrame(input_x1)
    return input_x1

In [6]:
def split_train_test(X, y):
    """
    Splitting the data into train, test, validation 
    """
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)
    
    return X_train, X_val, X_test, y_train, y_val, y_test

In [7]:
def Rocket(X_train, X_val, X_test):
    rocket = MiniRocket(num_kernels=500)  # by default, ROCKET uses 10,000 kernels
    X_train = rocket.fit_transform(X_train)
    X_train = np.expand_dims(np.array(X_train), axis=2)
    
    X_val = rocket.transform(X_val)
    X_val = np.expand_dims(np.array(X_val), axis=2)
    
    X_test = rocket.transform(X_test)
    X_test = np.expand_dims(np.array(X_test), axis=2)
    
    return X_train, X_val, X_test

In [8]:
def normalization(X_train, X_val, X_test):
    scaler = StandardScaler()
    shp = X_train.shape[1]
    X_train = np.reshape(X_train, (-1,shp))
    X_val = np.reshape(X_val, (-1,shp))
    X_test = np.reshape(X_test, (-1,shp))
    
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)
    
    return np.reshape(X_train, (-1,shp,1)), np.reshape(X_val, (-1,shp,1)), np.reshape(X_test, (-1,shp,1))

In [9]:
def timeseries_transform(data, head_size, num_heads, ff_dim, dropout=0):
    """
    Implement the timeseries transformer here
    """
    # Normalization and Attention
    x = data
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    x = layers.Dropout(dropout)(x)
    res = x + data

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=data.shape[-1], kernel_size=1)(x)
    return x + res

In [10]:
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = timeseries_transform(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(2, activation="sigmoid")(x)
    return keras.Model(inputs, outputs)

In [11]:
def model_training(X_train, y_train, X_val, y_val):
    """
    Train the data with the compatible model
    """
    
    input_shape = X_train.shape[1:]

    model = build_model(input_shape, head_size=256, num_heads=4, ff_dim=4, num_transformer_blocks=4, mlp_units=[256], mlp_dropout=0.4, dropout=0.25)
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1e-4, decay_steps=10000, decay_rate=0.9)

    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
        metrics=["sparse_categorical_accuracy"],
    )
    
    model.summary()

    callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

    model.fit(X_train, y_train, validation_data=(X_val,y_val),  epochs=200, batch_size=128, callbacks=callbacks)
    return model

In [12]:
def metric(y_act, y_pred):
    """
    Standard metrics and plotting should be same
    Metrics should be computed on validation data(unseen data)
    1. Balanced accuracy score
    2. Confusion matrix
    3. Per-class accuracy
    """
    
    cm = metrics.confusion_matrix(y_act, y_pred)
    balanced_accuracy = metrics.balanced_accuracy_score(y_act, y_pred)
    
    return cm, balanced_accuracy

In [13]:
def validation(X_val, y_val, metrics):
    """
    Comparing the results with provided Series Embedder
    Plot confusion matrices of self analysis and LSTM with balanced_accuracy
    
    """
    
    score = model.evaluate(X_val, y_val, verbose=1)
    
    return score

In [14]:
def evaluate(X_test, y_act, metric, model):
    y_pred = model.predict(X_test, verbose=1)
    y_pred = np.argmax(y_pred, axis=1)
    cm, ba = metric(y_act, y_pred)
    
    return y_pred, cm, ba

In [15]:
path = "../input/fordadata/data.pkl"
data = load_data(path)
X, y = preprocess_data(data)
X = Rocket_preprocessing(X)

X_train, X_val, X_test, y_train, y_val, y_test = split_train_test(X, y)
X_train, X_val, X_test = Rocket(X_train, X_val, X_test)
X_train, X_val, X_test = normalization(X_train, X_val, X_test)
model_self=model_training(X_train, y_train, X_val, y_val)

evaluate(X_test, y_test, metric, model_self)

(4921, 500)
(3148, 420, 1)
(985, 420, 1)
(788, 420, 1)


2022-05-22 00:53:13.624607: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-22 00:53:13.725845: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-22 00:53:13.726704: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-22 00:53:13.728731: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 420, 1)]     0                                            
__________________________________________________________________________________________________
multi_head_attention (MultiHead (None, 420, 1)       7169        input_1[0][0]                    
                                                                 input_1[0][0]                    
__________________________________________________________________________________________________
layer_normalization (LayerNorma (None, 420, 1)       2           multi_head_attention[0][0]       
__________________________________________________________________________________________________
dropout (Dropout)               (None, 420, 1)       0           layer_normalization[0][0]    

2022-05-22 00:53:16.668537: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-05-22 00:53:21.396493: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200


(array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0,
        1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
        1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,
        0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
        0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
        1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
        1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
        0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
        0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,
        0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
        1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
        1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,
        1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
        0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 

In [16]:
model_self.save("transformer_banana_muffin_normalized")
!zip -r transformer_normalized.zip "/kaggle/working/transformer_banana_muffin_normalized
model_self.save("transformer_banana_muffin_normalized.h5")
!zip -r transformer_normalized.zip "/kaggle/working/transformer_banana_muffin_normalized"

2022-05-22 01:04:44.620657: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


/bin/bash: -c: line 0: unexpected EOF while looking for matching `"'
/bin/bash: -c: line 1: syntax error: unexpected end of file
  adding: kaggle/working/transformer_banana_muffin_normalized/ (stored 0%)
  adding: kaggle/working/transformer_banana_muffin_normalized/assets/ (stored 0%)
  adding: kaggle/working/transformer_banana_muffin_normalized/saved_model.pb (deflated 91%)
  adding: kaggle/working/transformer_banana_muffin_normalized/keras_metadata.pb (deflated 95%)
  adding: kaggle/working/transformer_banana_muffin_normalized/variables/ (stored 0%)
  adding: kaggle/working/transformer_banana_muffin_normalized/variables/variables.data-00000-of-00001 (deflated 25%)
  adding: kaggle/working/transformer_banana_muffin_normalized/variables/variables.index (deflated 84%)


In [17]:
# metrics=metric(val,model_self)

# lstm_cm,lstm_balanced_accuracy=lstm(preprocessed_data,target='labels')
# metrics_validation = [lstm_cm, lstm_balanced_accuracy]
# validation(metrics,metrics_validation)