In [18]:
import json
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import tensorflow as tf
import random
from tensorflow.keras.preprocessing.sequence import pad_sequences
import joblib
from sklearn.utils import shuffle
from tensorflow.keras.layers import GRU, Input, Dropout, Dense
from tensorflow.keras.models import Model

import joblib
from google.colab import drive
from tensorflow.keras.layers import GRU

# Mount Google Drive for access
drive.mount('/content/drive')

# Load and process the data
def load_data(file_path):
    with open(file_path, 'r') as file:
        return np.array(json.load(file))







Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
X_train_centroid = load_data('/content/drive/MyDrive/My-207/X_train_centroid.json')
X_val_centroid = load_data('/content/drive/MyDrive/My-207/X_val_centroid.json')
X_train_mfcc = load_data('/content/drive/MyDrive/My-207/X_train_mfcc.json')
X_val_mfcc = load_data('/content/drive/MyDrive/My-207/X_val_mfcc.json')
y_train = np.array(json.load(open('/content/drive/MyDrive/My-207/y_train_centroid.json', 'r')))
y_val = np.array(json.load(open('/content/drive/MyDrive/My-207/y_val_centroid.json', 'r')))


In [None]:
# Swap the x and y axis for convolution on the time dimension
train_X_perm = [np.transpose(each) for each in X_train_mfcc]
X_train_mfcc = np.array(train_X_perm)

val_X_perm = [np.transpose(each) for each in X_val_mfcc]
X_val_mfcc = np.array(val_X_perm)

In [25]:
# Convert string labels to numerical labels (same as before)
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

# Upsample the centroid data to match the size of the MFCC data
random_indices = np.random.choice(range(X_train_centroid.shape[0]), size=X_train_mfcc.shape[0], replace=True)
X_train_centroid_upsampled = X_train_centroid[random_indices]
y_train_encoded_upsampled = y_train_encoded[random_indices]

random_indices = np.random.choice(range(X_val_centroid.shape[0]), size=X_val_mfcc.shape[0], replace=True)
X_val_centroid_upsampled = X_val_centroid[random_indices]
y_val_encoded_upsampled = y_val_encoded[random_indices]

# Pad the Centroid data to match the time steps of the MFCC data (same as before)
max_time_steps = max(X_train_mfcc.shape[1], X_val_mfcc.shape[1])
X_train_centroid_padded = pad_sequences(X_train_centroid_upsampled, maxlen=max_time_steps, dtype='float32', padding='post', truncating='post')
X_val_centroid_padded = pad_sequences(X_val_centroid_upsampled, maxlen=max_time_steps, dtype='float32', padding='post', truncating='post')

# Add a new axis to the padded Centroid data
X_train_centroid_expanded = X_train_centroid_padded[..., np.newaxis]
X_val_centroid_expanded = X_val_centroid_padded[..., np.newaxis]

# Ensure both datasets have the same number of samples
min_samples_train = min(X_train_mfcc.shape[0], X_train_centroid_expanded.shape[0])
X_train_mfcc = X_train_mfcc[:min_samples_train]
X_train_centroid_expanded = X_train_centroid_expanded[:min_samples_train]
y_train_encoded = y_train_encoded[:min_samples_train]

min_samples_val = min(X_val_mfcc.shape[0], X_val_centroid_expanded.shape[0])
X_val_mfcc = X_val_mfcc[:min_samples_val]
X_val_centroid_expanded = X_val_centroid_expanded[:min_samples_val]
y_val_encoded = y_val_encoded[:min_samples_val]

# Concatenate the upsampled Centroid and MFCC data
X_train_combined = np.concatenate((X_train_centroid_expanded, X_train_mfcc), axis=-1)
X_val_combined = np.concatenate((X_val_centroid_expanded, X_val_mfcc), axis=-1)

# Normalize MFCC and centroid data and reshape (same as before)
train_X_reshape = X_train_combined.reshape(-1, X_train_combined.shape[1]*X_train_combined.shape[2])
val_X_reshape = X_val_combined.reshape(-1, X_train_combined.shape[1]*X_train_combined.shape[2])

scaler = MinMaxScaler((0,1)).fit(train_X_reshape)
train_X_scaled = scaler.transform(train_X_reshape)
val_X_scaled = scaler.transform(val_X_reshape)

X_train_combined = train_X_scaled.reshape(-1, X_train_combined.shape[1], X_train_combined.shape[2])
X_val_combined = val_X_scaled.reshape(-1, X_train_combined.shape[1], X_train_combined.shape[2])


# Shuffle the training and validation data indices
train_indices = np.random.permutation(X_train_combined.shape[0])
val_indices = np.random.permutation(X_val_combined.shape[0])

X_train_combined = X_train_combined[train_indices]
y_train_encoded = y_train_encoded_upsampled[train_indices]

X_val_combined = X_val_combined[val_indices]
y_val_encoded = y_val_encoded_upsampled[val_indices]

# Save the label encoder for later use (same as before)
joblib.dump(label_encoder, "label_encoder.pkl")


['label_encoder.pkl']

In [28]:

# Build RNN model topology with GRU units for combined data
model = tf.keras.Sequential([
    tf.keras.layers.GRU(32, return_sequences=True, activation='tanh', input_shape=(X_train_combined.shape[1], X_train_combined.shape[2])),
    tf.keras.layers.GRU(64, return_sequences=True, activation='tanh'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Create the Adam optimizer with your desired learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.002)

# Compile model
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()




Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_2 (GRU)                 (None, 20, 32)            18432     
                                                                 
 gru_3 (GRU)                 (None, 20, 64)            18816     
                                                                 
 flatten_1 (Flatten)         (None, 1280)              0         
                                                                 
 dense_3 (Dense)             (None, 1024)              1311744   
                                                                 
 dropout_3 (Dropout)         (None, 1024)              0         
                                                                 
 dense_4 (Dense)             (None, 1024)              1049600   
                                                                 
 dropout_4 (Dropout)         (None, 1024)             

In [29]:
# Train the model
history = model.fit(X_train_combined, y_train_encoded,
                    validation_data=(X_val_combined, y_val_encoded),
                    batch_size=32, epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100

KeyboardInterrupt: ignored