In [19]:

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt

import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import torch.optim as optim
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence

from torch.utils.data import DataLoader
import glob

!pip install pydot



#### Actual Data

In [20]:
## generate test / train split

class CustomDataset(tf.keras.utils.Sequence):
    def __init__(self, x_set, y_set, seq_len, batch_size):
        self.x, self.y = x_set, y_set
        self.seq_len = seq_len
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.seq_len)))

        
    def most_common(self,lst):
        # print("mc",lst)
        lst = [np.where(r==1)[0][0] for r in lst]
        return (np.bincount(lst).argmax())
    
    def __getitem__(self, idx):
        start_idx = idx * self.seq_len
        end_idx = (idx + 1) * self.seq_len

        batch_x = self.x[start_idx:end_idx]
        batch_y = self.y[start_idx:end_idx]
        
        
        # most_com = self.most_common(batch_y)
        # print(most_com)
        
        batch_x = torch.from_numpy(batch_x)
        batch_y = torch.from_numpy(batch_y)
        
        # Pad sequences to ensure they have the same length within the batch
        pad_len = self.seq_len - batch_x.shape[0]
        if pad_len > 0:
            pad_shape = (pad_len,) + batch_x.shape[1:]
            pad_shape_y = (pad_len,) + batch_y.shape[1:]
            
            batch_x = torch.cat([batch_x, torch.zeros(pad_shape)], dim=0)
            batch_y = torch.cat([batch_y, torch.zeros(pad_shape_y)], dim=0)


        return batch_x, batch_y


    def on_epoch_end(self):
        indices = np.arange(len(self.x))
        np.random.shuffle(indices)
        self.x = self.x[indices]
        self.y = self.y[indices]
        
def generate_data_split(subject_id):
    # Get CSV files list from a folder
    train_path = './Train'
    test_path = './Test'
    
    csv_path = './ProcessedDatasets/Knot_Tying'
    
    csv_files = glob.glob(csv_path + "/*.csv")
    
    train_df_list = []
    test_df_list = []
    
    for file in csv_files:
        if(subject_id in file):
            test_df_list.append(pd.read_csv(file))
#             print(file)
        else:
            train_df_list.append(pd.read_csv(file))
            

    print('Train Subject Trials: ',len(train_df_list))
    print('Test Subject Trials: ',len(test_df_list))
    
    # Concatenate all DataFrames
    train_df   = pd.concat(train_df_list, ignore_index=True)
    test_df   = pd.concat(test_df_list, ignore_index=True)

    
    lb = preprocessing.LabelBinarizer()

    train_labels= train_df.pop('label')
    train_features = train_df

    test_labels= test_df.pop('label')
    test_features = test_df


    all_class_names = ["G1", 'G2', 'G3', 'G4', 'G5', 'G6', 'G8', 'G9', 'G10', 'G11', 'G12', 'G13', 'G14', 'G15']
    lb.fit(all_class_names)

    train_labels = lb.transform(train_labels)
    test_labels = lb.transform(test_labels)
    
    train_x = train_features.to_numpy()
    train_y = train_labels

    test_x = test_features.to_numpy()
    test_y = test_labels

    seq_len = 30
    batch_size = 64

    valid_test_split = 0.8
    # Step 2: Split the remaining data into validation and test sets
    val_x, test_x, val_y, test_y = train_test_split(
    test_x, test_y, test_size=valid_test_split, random_state=42)
    
    train_dataset = CustomDataset(train_x, train_y, seq_len, batch_size)
    
    val_dataset = CustomDataset(val_x, val_y, seq_len, batch_size)
    
    test_dataset = CustomDataset(test_x, test_y, seq_len, batch_size)

    train_dataloader = tf.data.Dataset.from_generator(
        lambda: train_dataset,
        output_signature=(
            tf.TensorSpec(shape=( seq_len, train_x.shape[1]), dtype=tf.float32),
            tf.TensorSpec(shape=( seq_len, train_y.shape[1]), dtype=tf.float32),
        ),
    # )
    ).repeat()
    train_dataloader = train_dataloader.batch(batch_size)


    val_dataloader = tf.data.Dataset.from_generator(
        lambda: val_dataset,
        output_signature=(
            tf.TensorSpec(shape=( seq_len, test_x.shape[1]), dtype=tf.float32),
            tf.TensorSpec(shape=( seq_len, test_y.shape[1]), dtype=tf.float32),
        ),
    )
    val_dataloader = val_dataloader.batch(batch_size)
    

    test_dataloader = tf.data.Dataset.from_generator(
        lambda: test_dataset,
        output_signature=(
            tf.TensorSpec(shape=( seq_len, test_x.shape[1]), dtype=tf.float32),
            tf.TensorSpec(shape=( seq_len, test_y.shape[1]), dtype=tf.float32),
        ),
    )
    test_dataloader = test_dataloader.batch(batch_size)
    
    return train_dataloader, val_dataloader, test_dataloader
    
subjects = ['S02','S03','S04','S05','S06','S07','S08','S09']

for subject in subjects:
    train_dataloader, val_dataloader, test_dataloader = generate_data_split(subject)
    train_features, train_labels = next(iter(val_dataloader))

    input_shape = train_features.shape[1:]
    output_dim = train_labels.shape[2]

    print("input_dim: ",input_shape)
    print("output_dim: ",output_dim)
    
    break



Train Subject Trials:  32
Test Subject Trials:  4
input_dim:  (30, 66)
output_dim:  14


### LSTM

In [21]:
def make_model(input_shape, num_classes):
    input_layer = keras.layers.Input(input_shape)
    
    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)

    conv2 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)

    conv3 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)

    gap = keras.layers.GlobalAveragePooling1D(data_format="channels_first", keepdims=True)(conv3)

    output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)

    return keras.models.Model(inputs=input_layer, outputs=output_layer)


train_dataloader, val_dataloader, test_dataloader = generate_data_split('S04')
train_features, train_labels = next(iter(train_dataloader))

input_shape = train_features.shape[1:]
output_dim = train_labels.shape[2]

print("input_dim: ",input_shape)
print("output_dim: ",output_dim)


model = make_model(input_shape=input_shape, num_classes=output_dim)
# keras.utils.plot_model(model, show_shapes=True)
# model.summary()


Train Subject Trials:  31
Test Subject Trials:  5
input_dim:  (30, 66)
output_dim:  14


### Train and Validation

In [22]:
epochs = 25
steps_per_epoch = 500

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["categorical_accuracy"],
)




subjects = ['S02','S03','S04','S05','S06','S07','S08','S09']


model.save_weights('./checkpoints/blank_state')

for subject in subjects:
    
    print(" *** --------START TRAIN--------- *** ")
    print("SUBJECT EXCLUDED: ",subject)
    
    train_dataloader, val_dataloader, test_dataloader = generate_data_split(subject)

    model.load_weights('./checkpoints/blank_state')

    history = model.fit(
        train_dataloader,
        epochs=epochs,
        callbacks=callbacks,
        steps_per_epoch=steps_per_epoch,
        validation_data=val_dataloader,
        verbose=1,
    )

    print(" *** -------END TRAIN-------- *** ")
    print(" *** -------START EVAL-------- *** ")
    
    model.evaluate(test_dataloader, verbose=1)

    print(" *** -------END EVAL-------- *** ")
    




 *** --------START TRAIN--------- *** 
SUBJECT EXCLUDED:  S02
Train Subject Trials:  32
Test Subject Trials:  4
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
 *** -------END TRAIN-------- *** 
 *** -------START EVAL-------- *** 
 *** -------END EVAL-------- *** 
 *** --------START TRAIN--------- *** 
SUBJECT EXCLUDED:  S03
Train Subject Trials:  31
Test Subject Trials:  5
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
 *** -------END TRAIN-------- *** 
 *** -------START EVAL-------- *** 
 *** -------END EV

KeyboardInterrupt: 

In [16]:
model.evaluate(test_dataloader, verbose=1)


      1/Unknown - 0s 20ms/step - loss: 7.4180 - categorical_accuracy: 0.2594



[7.492281436920166, 0.2655773460865021]

### Train and Evaluate

In [None]:
# input_shape = (30,66)
# input_shape = train_features.shape

# train_features = train_features.to_numpy()
# train_features = train_features.reshape((train_features.shape[0], train_features.shape[1], 1))
# input_shape = train_features.shape
train_dataloader, test_dataloader = generate_data_split(subject)
train_features, train_labels = next(iter(train_dataloader))

input_shape = train_features.shape[1:]
output_dim = train_labels.shape[2]

print("input_dim: ",input_shape)
print("output_dim: ",output_dim)



model = build_model(
    input_shape,
    output_dim,
    head_size=256,
    num_heads=2,
    ff_dim=512,
    num_transformer_blocks=4,
    mlp_units=[256,128],
    mlp_dropout=0.5,
    dropout=0.3,
)

model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["categorical_accuracy"],
)
# # # # model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

epochs = 50
# steps_per_epoch = train_dataloader.__len__()//1
# print("steps_per", steps_per_epoch)


subjects = ['S02','S03','S04','S05','S06','S07','S08','S09']

for subject in subjects:
    
    print(" *** --------START--------- *** ")
    print("SUBJECT EXCLUDED: ",subject)
    
    train_dataloader, test_dataloader = generate_data_split(subject)

    model.fit(
        train_dataloader,
        epochs=epochs,
        callbacks=callbacks,
        steps_per_epoch=500
#         validation_data = test_dataloader
    )


    print(" *** -------END-------- *** ")
    



## Pytorch dataloader implementation

# def gen(torch_loader):
#     for x,y in torch_loader:
#         yield (x,y)

# train = gen(train_dataloader)

# epochs = 300
# steps_per_epoch = train_dataloader.__len__()//5
# print("steps_per", steps_per_epoch)

# model.fit(
#     train,
#     epochs=epochs,
#     callbacks=callbacks,
#     steps_per_epoch=steps_per_epoch
# )

# # model.evaluate(x_test, y_test, verbose=1)


Train Subject Trials:  32
Test Subject Trials:  4
input_dim:  (30, 66)
output_dim:  14
14
 *** --------START--------- *** 
SUBJECT EXCLUDED:  S02
Train Subject Trials:  32
Test Subject Trials:  4
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
 *** -------END-------- *** 
 *** --------START--------- *** 
SUBJECT EXCLUDED:  S03
Train Subject Trials:  31
Test Subject Trials:  5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
E

In [85]:
# !pip install -q -U keras-tuner



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [91]:
# import keras_tuner as kt

# train_features, train_labels = next(iter(train_dataloader))

# input_shape = train_features.shape[1:]
# output_dim = train_labels.shape[2]

# print("input_dim: ",input_shape)
# print("output_dim: ",output_dim)


# def model_builder(hp):
    
    
#     hp_headsize = hp.Int('units', min_value=16, max_value=512, step=16)
#     hp_numheads = hp.Int('units', min_value=2, max_value=16, step=2)
#     hp_ff_dim = hp.Int('units', min_value=1, max_value=512, step=16)
#     hp_num_transformer_blocks = hp.Int('units', min_value=1, max_value=16, step=2)

#     model = build_model(
#     input_shape,
#     output_dim,
#     head_size=hp_headsize,
#     num_heads=hp_numheads,
#     ff_dim=hp_ff_dim,
#     num_transformer_blocks=hp_num_transformer_blocks,
#     mlp_units=[128],
#     mlp_dropout=0.4,
#     dropout=0.25,
#     )

#     model.compile(
#         loss="categorical_crossentropy",
#         optimizer=keras.optimizers.Adam(learning_rate=1e-4),
#         metrics=["categorical_accuracy"],
#     )
    
#     return model


# tuner = kt.Hyperband(model_builder,
#                      objective='val_accuracy',
#                      max_epochs=10,
#                      factor=3,
#                      project_name='intro_to_kt')


input_dim:  (30, 71)
output_dim:  14
INFO:tensorflow:Reloading Tuner from ./intro_to_kt/tuner0.json


In [92]:
# stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)


In [None]:
# tuner.search(train_dataloader, epochs=10, steps_per_epoch=500, callbacks=[stop_early])

# # Get the optimal hyperparameters
# best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

# print(f"""
# The hyperparameter search is complete. The optimal number of units in the first densely-connected
# layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
# is {best_hps.get('learning_rate')}.
# """)


In [35]:
model.evaluate(test_dataloader, verbose=1)

# test_features, test_labels = next(iter(test_dataloader))

# y_pred = model(test_features)

# relu = nn.ReLU()


# for idx,y in enumerate(y_pred) :
#     for i,sample in enumerate(y):
#         print(np.argmax(test_labels[idx][i].numpy()))
#         print(np.argmax(sample))
#         print('-----')
        
#     break



[0.9061856865882874, 0.6734204888343811]

In [101]:
model.save_weights('./checkpoints/progress_v2')

In [117]:
# Restore the weights
model.load_weights('./checkpoints/progress_v2')


ValueError: Received incompatible tensor with shape (71,) when attempting to restore variable with shape (66,) and name layer_with_weights-0/beta/.ATTRIBUTES/VARIABLE_VALUE.