<a href="https://colab.research.google.com/github/tonytarizzo/NeverLateX/blob/main/Copy_of_STABILO_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.utils import plot_model

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load Data from Google Drive
data_path = "/content/drive/My Drive/NeverLaTeX/AML/onhw-chars_2021-06-30/onhw2_both_indep_0"

X_train_path = f'{data_path}/X_train.npy'
X_test_path = f'{data_path}/X_test.npy'
y_train_path = f'{data_path}/y_train.npy'
y_test_path = f'{data_path}/y_test.npy'

In [None]:
# Load the data
import numpy as np

X_train = np.load(X_train_path, allow_pickle=True)
X_test = np.load(X_test_path, allow_pickle=True)
y_train = np.load(y_train_path, allow_pickle=True)
y_test = np.load(y_test_path, allow_pickle=True)

In [None]:
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (23319,)
X_test shape: (7956,)
y_train shape: (23319,)
y_test shape: (7956,)


In [None]:
from scipy.signal import resample

series_names = [
      'acc1x',
      'acc1y',
      'acc1z',
      'acc2x',
      'acc2y',
      'acc2z',
      'gyrox',
      'gyroy',
      'gyroz',
      'magnx',
      'magny',
      'magnz',
      'force',
  ]

def get_resampled_data(X, y, target_len=60):
  data_resampled = dict(zip(series_names, [list() for x in range(0, len(series_names))]))
  new_y = []
  for item_idx, item in enumerate(X):
    #print(item_idx)
    if item.shape[0] == 0:
      continue
    for series_idx, series in enumerate(series_names):
      data_resampled[series].append(resample(item[:, series_idx], target_len))
    new_y.append(y[item_idx])
  stacked_data = {}
  for key, value in data_resampled.items():
    stacked_data[key] = np.stack(value, axis=0)
  return stacked_data, np.array(new_y)

def make_pd(X):
  new_X = np.concatenate(list(X.values()), axis=1)
  return new_X


In [None]:
X_train_resampled, y_train_resampled = get_resampled_data(X_train, y_train)
X_test_resampled, y_test_resampled = get_resampled_data(X_test, y_test)

X_train_concated = make_pd(X_train_resampled)
X_test_concated = make_pd(X_test_resampled)

In [None]:
# Reshape concatenated data to (samples, timesteps, features)
timesteps = 60  # Target length during resampling
features = 13   # Number of original series

X_train = X_train_concated.reshape(X_train_concated.shape[0], timesteps, features)
X_test = X_test_concated.reshape(X_test_concated.shape[0], timesteps, features)

# Define input shape for the model
input_shape = (X_train.shape[1], X_train.shape[2])  # (timesteps, features)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("Input shape for models:", input_shape)


X_train shape: (23316, 60, 13)
X_test shape: (7956, 60, 13)
Input shape for models: (60, 13)


In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Initialize label encoder
label_encoder = LabelEncoder()

# Fit the encoder on the training and testing labels
all_labels = np.concatenate((y_train, y_test))  # Combine train and test labels for consistency
label_encoder.fit(all_labels)

# Transform the labels
y_train_encoded = label_encoder.transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

print(f"Classes: {label_encoder.classes_}")  # Display the mapping of characters to integers

# Convert labels to one-hot encoding

# Number of classes
num_classes = len(label_encoder.classes_)

# One-hot encode the labels
y_train_categorical = to_categorical(y_train_encoded, num_classes)
y_test_categorical = to_categorical(y_test_encoded, num_classes)

print(f"Shape of y_train: {y_train_categorical.shape}")
print(f"Shape of y_test: {y_test_categorical.shape}")

Classes: ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R'
 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']
Shape of y_train: (23319, 52)
Shape of y_test: (7956, 52)


In [None]:
# Define CNN Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

def build_cnn(input_shape, num_classes):
    model = Sequential([
        Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Conv1D(64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Define CLDNN Model
from tensorflow.keras.layers import LSTM, BatchNormalization

def build_cldnn(input_shape, num_classes):
    model = Sequential([
        Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        BatchNormalization(),
        LSTM(64, return_sequences=True),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
from re import X
# Train and Evaluate Models
print(X_train_concated.shape)
print(X_test_concated.shape)
print(y_train_categorical.shape)
print(y_test_categorical.shape, "\n")
input_shape = (X_train.shape[1], X_train.shape[2])  # Assuming (samples, timesteps, features)

# Ensure no empty sequences exist
non_empty_indices = [i for i, x in enumerate(X_train) if x.size > 0]
X_train = X_train[non_empty_indices]
y_train_categorical = y_train_categorical[non_empty_indices]

non_empty_indices = [i for i, x in enumerate(X_test) if x.size > 0]
X_test = X_test[non_empty_indices]
y_test_categorical = y_test_categorical[non_empty_indices]

print(X_train.shape)
print(X_test.shape)
print(y_train_categorical.shape)
print(y_test_categorical.shape)


(23316, 780)
(7956, 780)
(23319, 52)
(7956, 52) 

(23316, 60, 13)
(7956, 60, 13)
(23316, 52)
(7956, 52)


In [None]:
cnn_model = build_cnn(input_shape, num_classes)
cldnn_model = build_cldnn(input_shape, num_classes)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Train CNN
cnn_history = cnn_model.fit(X_train, y_train_categorical, epochs=20, batch_size=64, validation_data=(X_test, y_test_categorical), verbose=1)

Epoch 1/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 21ms/step - accuracy: 0.0197 - loss: 309.8084 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 2/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.0190 - loss: 3.9520 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 3/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0180 - loss: 3.9514 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 4/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.0163 - loss: 3.9515 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 5/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0214 - loss: 3.9514 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 6/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0189 - loss: 3.9514 - val_accuracy: 0.0192 - val_loss: 3.9512
Epoch 7/20
[1m365/365

In [None]:
# Train CLDNN
cldnn_history = cldnn_model.fit(X_train, y_train_categorical, epochs=20, batch_size=64, validation_data=(X_test, y_test_categorical), verbose=1)

Epoch 1/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.0176 - loss: 3.9600 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 2/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.0178 - loss: 3.9515 - val_accuracy: 0.0192 - val_loss: 3.9512
Epoch 3/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.0190 - loss: 3.9515 - val_accuracy: 0.0192 - val_loss: 3.9513
Epoch 4/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.0192 - loss: 3.9514 - val_accuracy: 0.0192 - val_loss: 3.9512
Epoch 5/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.0183 - loss: 3.9515 - val_accuracy: 0.0192 - val_loss: 3.9512
Epoch 6/20
[1m365/365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.0177 - loss: 3.9514 - val_accuracy: 0.0192 - val_loss: 3.9512
Epoch 7/20
[1m365/365[0m 