## Objective:  Train and evaluate a 'CNN + LSTM' and 'CNN + RNN' model for classifying indian classical music

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, SimpleRNN
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import seaborn as sns


In [9]:
all_tracks = pickle.load(open('all_tracks.pkl', 'rb'))
raag = pickle.load(open('raag.pkl', 'rb'))
len(set(raag))
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(raag)
le.classes_
y = le.transform(raag)


Split data into training and validation, test sets

In [10]:

X_train, X_temp, y_train, y_temp = train_test_split(np.array(all_tracks),
                                                    np.array(y),
                                                    test_size=0.33,
                                                    random_state=42,
                                                    stratify=y)


X_val, X_test, y_val, y_test = train_test_split(X_temp,
                                                y_temp,
                                                test_size=0.5,
                                                random_state=42,
                                                stratify=y_temp)



In [11]:
X_train = X_train[..., np.newaxis]
X_val = X_val[..., np.newaxis]
X_test = X_test[..., np.newaxis]

Create CNN-LSTM hybrid model 

In [12]:
def create_cnn_lstm_model(input_shape, num_classes):
    model = Sequential()

    # CNN layers
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # Flatten CNN output
    model.add(TimeDistributed(Flatten()))

    # LSTM layers
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.5))

    # Fully connected layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    # Output layer
    model.add(Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model


Create CNN-RNN hybrid model 

In [13]:
def create_cnn_rnn_model(input_shape, num_classes):
    model = Sequential()

    # CNN layers
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # Flatten the CNN output to feed into RNN
    model.add(TimeDistributed(Flatten()))

    # Simple RNN layers
    model.add(SimpleRNN(128, return_sequences=False))
    model.add(Dropout(0.5))

    # Fully connected layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    # Output layer
    model.add(Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model


Train the CNN + LSTM model

In [14]:

input_shape = X_train.shape[1:]  
num_classes = len(set(y_train))

cnn_lstm_model = create_cnn_lstm_model(input_shape, num_classes)

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


history_cnn_lstm = cnn_lstm_model.fit(X_train, y_train,
                                      validation_data=(X_val, y_val),
                                      epochs=50,
                                      batch_size=32,
                                      callbacks=[early_stopping])



  super().__init__(


Epoch 1/50
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m342s[0m 2s/step - accuracy: 0.1222 - loss: 2.7105 - val_accuracy: 0.2189 - val_loss: 2.3652
Epoch 2/50
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m349s[0m 2s/step - accuracy: 0.2611 - loss: 2.3235 - val_accuracy: 0.3379 - val_loss: 2.0625
Epoch 3/50
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 2s/step - accuracy: 0.2916 - loss: 2.1652 - val_accuracy: 0.3379 - val_loss: 1.9741
Epoch 4/50
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m400s[0m 2s/step - accuracy: 0.3295 - loss: 2.0141 - val_accuracy: 0.3827 - val_loss: 1.8624
Epoch 5/50
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 2s/step - accuracy: 0.3620 - loss: 1.8799 - val_accuracy: 0.4480 - val_loss: 1.6885
Epoch 6/50
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 2s/step - accuracy: 0.4185 - loss: 1.7671 - val_accuracy: 0.4873 - val_loss: 1.6561
Epoch 7/50
[1m185/185

Train the CNN + RNN model

In [None]:
cnn_rnn_model = create_cnn_rnn_model(input_shape, num_classes)

history_cnn_rnn = cnn_rnn_model.fit(X_train, y_train,
                                    validation_data=(X_val, y_val),
                                    epochs=50,
                                    batch_size=32,
                                    callbacks=[early_stopping])


In [None]:
# Evaluate CNN + LSTM model
test_loss_cnn_lstm, test_acc_cnn_lstm = cnn_lstm_model.evaluate(X_test, y_test, verbose=0)
print(f"CNN + LSTM Test Accuracy: {test_acc_cnn_lstm:.4f}")

# Evaluate CNN + Simple RNN model
test_loss_cnn_rnn, test_acc_cnn_rnn = cnn_rnn_model.evaluate(X_test, y_test, verbose=0)
print(f"CNN + RNN Test Accuracy: {test_acc_cnn_rnn:.4f}")


In [None]:
def plot_history(history, title):
    plt.figure(figsize=(12, 6))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{title} Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{title} Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()


plot_history(history_cnn_lstm, 'CNN + LSTM')
plot_history(history_cnn_rnn, 'CNN + RNN')