In [3]:
#Imports + Seed (same for both runs)
import os, random
import numpy as np
import tensorflow as tf

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

In [5]:
#Loading preprocessed MEL images and labels
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import pandas as pd

MEL_DIR = "/content/drive/MyDrive/irmas_multilabel_mels"
LABEL_CSV = "/content/drive/MyDrive/multilabel_labels.csv"

IMG_SIZE = 128

labels_df = pd.read_csv(LABEL_CSV)

X, y = [], []

for _, row in labels_df.iterrows():
    img_path = os.path.join(MEL_DIR, row["file"].replace(".wav", ".png"))
    if not os.path.exists(img_path):
        continue

    img = load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    img = img_to_array(img) / 255.0
    X.append(img)

    y.append(row[1:].values.astype("float32"))

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

print(X.shape, y.shape)

(1000, 128, 128, 3) (1000, 11)


In [6]:
#SAME train/val split
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=SEED
)

In [7]:
#CNN model function (reusable)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

def build_model(learning_rate):
    model = Sequential([
        Conv2D(32, (3,3), activation="relu", input_shape=(128,128,3)),
        MaxPooling2D(2,2),

        Conv2D(64, (3,3), activation="relu"),
        MaxPooling2D(2,2),

        Flatten(),
        Dense(128, activation="relu"),
        Dropout(0.5),
        Dense(y.shape[1], activation="sigmoid")
    ])

    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss="binary_crossentropy",
        metrics=["binary_accuracy"]
    )
    return model

In [8]:
#BASELINE experiment (LR = 0.001)
baseline_model = build_model(learning_rate=0.001)

history_base = baseline_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    verbose=1
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1s/step - binary_accuracy: 0.7170 - loss: 0.6200 - val_binary_accuracy: 0.7764 - val_loss: 0.5331
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 866ms/step - binary_accuracy: 0.7657 - loss: 0.5620 - val_binary_accuracy: 0.7764 - val_loss: 0.5297
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 951ms/step - binary_accuracy: 0.7682 - loss: 0.5531 - val_binary_accuracy: 0.7764 - val_loss: 0.5215
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 906ms/step - binary_accuracy: 0.7690 - loss: 0.5415 - val_binary_accuracy: 0.7764 - val_loss: 0.5178
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 855ms/step - binary_accuracy: 0.7686 - loss: 0.5354 - val_binary_accuracy: 0.7768 - val_loss: 0.5128
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 858ms/step - binary_accuracy: 0.7674 - los

In [9]:
#Evaluation
from sklearn.metrics import precision_score, recall_score, f1_score

y_pred_base = baseline_model.predict(X_val)
y_pred_base_bin = (y_pred_base >= 0.2).astype(int)

base_precision = precision_score(y_val, y_pred_base_bin, average="micro")
base_recall = recall_score(y_val, y_pred_base_bin, average="micro")
base_f1 = f1_score(y_val, y_pred_base_bin, average="micro")

print("BASELINE")
print("Precision:", base_precision)
print("Recall:", base_recall)
print("F1:", base_f1)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 455ms/step
BASELINE
Precision: 0.328125
Recall: 0.5548780487804879
F1: 0.4123867069486405


In [10]:
#MODIFIED experiment (LR = 0.0001)
modified_model = build_model(learning_rate=0.0001)

history_mod = modified_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    verbose=1
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 875ms/step - binary_accuracy: 0.6921 - loss: 0.6125 - val_binary_accuracy: 0.7764 - val_loss: 0.5405
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 926ms/step - binary_accuracy: 0.7535 - loss: 0.5682 - val_binary_accuracy: 0.7764 - val_loss: 0.5327
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 849ms/step - binary_accuracy: 0.7486 - loss: 0.5754 - val_binary_accuracy: 0.7764 - val_loss: 0.5305
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 993ms/step - binary_accuracy: 0.7552 - loss: 0.5648 - val_binary_accuracy: 0.7764 - val_loss: 0.5269
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 858ms/step - binary_accuracy: 0.7524 - loss: 0.5567 - val_binary_accuracy: 0.7764 - val_loss: 0.5268
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 924ms/step - binary_accuracy: 0.7572 - 

In [11]:
#Evaluate
y_pred_mod = modified_model.predict(X_val)
y_pred_mod_bin = (y_pred_mod >= 0.2).astype(int)

mod_precision = precision_score(y_val, y_pred_mod_bin, average="micro")
mod_recall = recall_score(y_val, y_pred_mod_bin, average="micro")
mod_f1 = f1_score(y_val, y_pred_mod_bin, average="micro")

print("MODIFIED")
print("Precision:", mod_precision)
print("Recall:", mod_recall)
print("F1:", mod_f1)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 179ms/step
MODIFIED
Precision: 0.2757452574525745
Recall: 0.8272357723577236
F1: 0.4136178861788618


In [12]:
#Final Comparison
print("\n===== COMPARISON =====")
print(f"Baseline LR=0.001 → F1: {base_f1:.4f}")
print(f"Modified LR=0.0001 → F1: {mod_f1:.4f}")

if mod_f1 > base_f1:
    print("Lower learning rate IMPROVED multilabel performance")
else:
    print("Lower learning rate did NOT improve performance")


===== COMPARISON =====
Baseline LR=0.001 → F1: 0.4124
Modified LR=0.0001 → F1: 0.4136
Lower learning rate IMPROVED multilabel performance
