In [None]:
!wget http://mindbigdata.com/opendb/MindBigData-IN-v1.06.zip && unzip MindBigData-IN-v1.06.zip

In [56]:
import pandas as pd
import numpy as np
import numpy as np

def pad_sequences(sequences):
    # Get the length of the longest sequence
    max_length = max(len(seq) for seq in sequences)
    
    # Create a new list of sequences, each padded to the max_length with zeros at the end
    padded_sequences = np.zeros((len(sequences), max_length))
    for i, seq in enumerate(sequences):
        padded_sequences[i, :len(seq)] = seq

    return padded_sequences

# read IN.txt into dataframe
columns = ["id", "event", "device", "channel", "code", "size", "data"]

df = pd.read_csv("IN.txt", delimiter="\t", names=columns)

# Preprocess the 'data' column
df['data'] = df['data'].apply(lambda x: [float(i) for i in x.split(',')])

# group by event
df = df.groupby('event').agg(list)

# merge code from list to integer (eg [0, 0, 0, 0, 0] -> 0)
df['code'] = df['code'].apply(lambda x: x[0])

# flatten data
df['data'] = df['data'].apply(lambda x: np.array(x).flatten())

df.drop(columns=['id', 'device', 'size'], inplace=True)

# Convert 'channel' and 'data' columns to list
channels = df['channel'].tolist()
data = df['data'].tolist()

# Pad the sequences
padded_data = pad_sequences(data)

# Reshape to your desired dimensions
X = padded_data.reshape(len(df) // 5, -1, 5)

# Normalize across the last dimension
X = X / np.linalg.norm(X, axis=1, keepdims=True)

# Convert 'code' column to list
code = df['code'].tolist()

# Convert list to numpy array
Y = np.array(code)
Y = Y.reshape(len(Y) // 5, 5)

# Assuming that you want to use 80% of data for training and remaining 20% for testing
num_train = int(0.8 * len(X))

# For X
X_train = X[:num_train]
X_test = X[num_train:]

# For Y
Y_train = Y[:num_train]
Y_test = Y[num_train:]

# Shuffle the training data
idx = np.random.permutation(len(X_train))
X_train = X_train[idx]
Y_train = Y_train[idx]

n_classes = len(np.unique(Y_train))

In [57]:
X_train.shape, Y_train.shape

((2088, 1360, 5), (2088, 5))

## Build the model

Our model processes a tensor of shape `(batch size, sequence length, features)`,
where `sequence length` is the number of time steps and `features` is each input
timeseries.

You can replace your classification RNN layers with this one: the
inputs are fully compatible!

We include residual connections, layer normalization, and dropout.
The resulting layer can be stacked multiple times.

The projection layers are implemented through `keras.layers.Conv1D`.

The main part of our model is now complete. We can stack multiple of those
`transformer_encoder` blocks and we can also proceed to add the final
Multi-Layer Perceptron classification head. Apart from a stack of `Dense`
layers, we need to reduce the output tensor of the `TransformerEncoder` part of
our model down to a vector of features for each data point in the current
batch. A common way to achieve this is to use a pooling layer. For
this example, a `GlobalAveragePooling1D` layer is sufficient.

In [59]:
from tensorflow import keras
from tensorflow.keras import layers
import wandb
from wandb.keras import WandbCallback

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    # outputs = layers.Dense(n_classes, activation="softmax")(x)
    x = layers.Dense(n_classes, activation="linear")(x) # set units to n_classes and use linear
    outputs = [layers.Dense(n_classes, activation="softmax")(x) for _ in range(5)] # 5 outputs each with 10 probabilities
    return keras.Model(inputs, outputs)

input_shape = X_train.shape[1:]

wandb.init(project="mindbigdata")

model = build_model(
    input_shape,
    head_size=16,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[8], # 128
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True), WandbCallback()]

model.fit(
    X_train,
    # Y_train,
    [Y_train[:, i] for i in range(5)], # split Y_train into 5 outputs
    validation_split=0.2,
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
)

#model.evaluate(X_test, Y_test, verbose=1)

[34m[1mwandb[0m: [32m[41mERROR[0m Unable to patch Tensorflow/Keras
ERROR:wandb.integration.keras.keras:exception while trying to patch_tf_keras
Traceback (most recent call last):
  File "/Users/louisbeaumont/Documents/mne-hack/env/lib/python3.10/site-packages/wandb/integration/keras/keras.py", line 86, in patch_tf_keras
    from keras.engine import training
ModuleNotFoundError: No module named 'keras.engine'
[34m[1mwandb[0m: [32m[41mERROR[0m Unable to patch Tensorflow/Keras
ERROR:wandb.integration.keras.keras:exception while trying to patch_tf_keras
Traceback (most recent call last):
  File "/Users/louisbeaumont/Documents/mne-hack/env/lib/python3.10/site-packages/wandb/integration/keras/keras.py", line 86, in patch_tf_keras
    from keras.engine import training
ModuleNotFoundError: No module named 'keras.engine'
[34m[1mwandb[0m: [32m[41mERROR[0m Unable to patch Tensorflow/Keras
ERROR:wandb.integration.keras.keras:exception while trying to patch_tf_keras
Traceback (mos



Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 1360, 5)]            0         []                            
                                                                                                  
 layer_normalization_88 (La  (None, 1360, 5)              10        ['input_12[0][0]']            
 yerNormalization)                                                                                
                                                                                                  
 multi_head_attention_44 (M  (None, 1360, 5)              1477      ['layer_normalization_88[0][0]
 ultiHeadAttention)                                                 ',                            
                                                                     'layer_normalization_8



Epoch 1/200

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/louisbeaumont/Documents/mne-hack/wandb/run-20230716_073305-5ppj586e/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/louisbeaumont/Documents/mne-hack/wandb/run-20230716_073305-5ppj586e/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/louisbeaumont/Documents/mne-hack/wandb/run-20230716_073305-5ppj586e/files/model-best)... Done. 0.1s


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200


<keras.src.callbacks.History at 0x2ad090e50>

In [53]:
X_train.shape
# display all length on dim 1
for i in range(len(X_train)):
    print(len(X_train[i]))

1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088
1088


In [40]:
[Y_train[:, i] for i in range(5)]

[array([1, 9, 9, ..., 4, 8, 7]),
 array([0, 8, 1, ..., 7, 6, 4]),
 array([2, 9, 2, ..., 2, 0, 4]),
 array([9, 2, 3, ..., 7, 5, 2]),
 array([2, 1, 9, ..., 0, 9, 1])]

## Conclusions

In about 110-120 epochs (25s each on Colab), the model reaches a training
accuracy of ~0.95, validation accuracy of ~84 and a testing
accuracy of ~85, without hyperparameter tuning. And that is for a model
with less than 100k parameters. Of course, parameter count and accuracy could be
improved by a hyperparameter search and a more sophisticated learning rate
schedule, or a different optimizer.

You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/timeseries_transformer_classification) and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/timeseries_transformer_classification).