In [1]:
import numpy as np

X = np.load("X_mel.npy")      # shape: (N, 128, 128, 1)
y = np.load("y_labels.npy")  # shape: (N,)

print(X.shape, y.shape)


(6705, 128, 128, 1) (6705,)


In [2]:
from tensorflow.keras.utils import to_categorical

num_classes = len(np.unique(y))
y = to_categorical(y, num_classes)

print("Number of classes:", num_classes)
print("y shape:", y.shape)


  if not hasattr(np, "object"):


Number of classes: 11
y shape: (6705, 11)


In [3]:
from sklearn.model_selection import train_test_split

# Train (70%) + Temp (30%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y,
    test_size=0.30,
    random_state=42,
    stratify=y
)

# Validation (15%) + Test (15%)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp,
    test_size=0.50,
    random_state=42,
    stratify=y_temp
)

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)


Train: (4693, 128, 128, 1)
Val: (1006, 128, 128, 1)
Test: (1006, 128, 128, 1)


In [4]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

y_train_labels = np.argmax(y_train, axis=1)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train_labels),
    y=y_train_labels
)

class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)


Class weights: {0: 1.5685160427807487, 1: 1.2086015967035797, 2: 1.350115074798619, 3: 0.9565837749694251, 4: 0.801948051948052, 5: 0.8944158566800077, 6: 0.8448244824482448, 7: 0.9740556247405563, 8: 1.056030603060306, 9: 1.0508284818629645, 10: 0.7842580213903744}


In [5]:
import numpy as np

def spec_augment(mel, freq_mask=15, time_mask=20):
    mel = mel.copy()

    # Frequency masking
    f = np.random.randint(0, freq_mask)
    f0 = np.random.randint(0, mel.shape[0] - f)
    mel[f0:f0 + f, :] = 0

    # Time masking
    t = np.random.randint(0, time_mask)
    t0 = np.random.randint(0, mel.shape[1] - t)
    mel[:, t0:t0 + t] = 0

    return mel


In [6]:
# Remove channel dim → apply augment → add back
X_train_aug = np.array([
    spec_augment(x.squeeze()) for x in X_train
])
X_train_aug = X_train_aug[..., np.newaxis]

# Combine original + augmented
X_train_combined = np.concatenate([X_train, X_train_aug])
y_train_combined = np.concatenate([y_train, y_train])

print("Original train:", X_train.shape)
print("Augmented train:", X_train_combined.shape)


Original train: (4693, 128, 128, 1)
Augmented train: (9386, 128, 128, 1)
