In [None]:
import os
import re
import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import random
from tqdm import tqdm

DEAM_PATH = "/content/drive/MyDrive/DEAM"
AUDIO_DIR = f"{DEAM_PATH}/Audio"
MEL_DIR   = f"{DEAM_PATH}/MEL"
os.makedirs(MEL_DIR, exist_ok=True)
SR = 22050
N_MELS = 128
WINDOW_SEC = 20
HOP_SEC = 10
BATCH_SIZE = 4
EPOCHS = 5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
#audio augmentation

def augment_audio(y, sr):
    if random.random() < 0.5:
        rate = random.uniform(0.9, 1.1)
        y = librosa.effects.time_stretch(y=y, rate=rate)

    if random.random() < 0.5:
        n_steps = random.randint(-2, 2)
        y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=n_steps)

    return y

In [None]:
# feature extraction

def mel_spectrogram(y):
    mel = librosa.feature.melspectrogram(
        y=y,
        sr=SR,
        n_mels=N_MELS,
        fmax=8000
    )
    mel = librosa.power_to_db(mel)
    return mel.astype(np.float32)

for f in tqdm(os.listdir(AUDIO_DIR)):
    if not f.endswith(".mp3"):
        continue

    match = re.match(r"(\d+)", f)
    if not match:
        print(f"Skipping {f}")
        continue
    song_id = int(match.group(1))

    path = os.path.join(AUDIO_DIR, f)
    y, sr = librosa.load(path, sr=SR)

    win = int(WINDOW_SEC * SR)
    hop = int(HOP_SEC * SR)

    segments = []
    for start in range(0, len(y) - win + 1, hop):
        seg = y[start:start+win]
        mel = mel_spectrogram(seg)
        segments.append(mel)

    if len(segments) == 0:
        continue

    segments = np.stack(segments)
    np.save(os.path.join(MEL_DIR, f"{song_id}.npy"), segments)


100%|██████████| 1809/1809 [13:43<00:00,  2.20it/s]


In [None]:
# dataset

class MusicDataset(Dataset):
    def __init__(self, df, mel_dir, augment=False):
        self.df = df.reset_index(drop=True)
        self.mel_dir = mel_dir
        self.augment = augment
        self.samples = []

        for _, row in self.df.iterrows():
            song_id = int(row["song_id"])
            mel_path = os.path.join(mel_dir, f"{song_id}.npy")
            segments = np.load(mel_path)
            for mel in segments:
                self.samples.append((mel, row["label_idx"]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
      mel, label = self.samples[idx]

      if self.augment:
        mel = mel + np.random.normal(0, 0.01, mel.shape)

      mel = torch.tensor(mel, dtype=torch.float32).unsqueeze(0)
      label = torch.tensor(label, dtype=torch.long)
      return mel, label




In [None]:
# model

class AudioCNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        self.fc = nn.Linear(128, 4)

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

DEAM_PATH = "/content/drive/MyDrive/DEAM"

!ls $DEAM_PATH



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Annotations  Audio  MEL


In [None]:


ann1 = pd.read_csv(f"{DEAM_PATH}/Annotations/static_annotations_averaged_songs_1_2000.csv")
ann2 = pd.read_csv(f"{DEAM_PATH}/Annotations/static_annotations_averaged_songs_2000_2058.csv")
annotations = pd.concat([ann1, ann2], ignore_index=True)

annotations.columns = annotations.columns.str.strip()



song_id = annotations.iloc[0]["song_id"]
audio_path = f"{DEAM_PATH}/Audio/{int(song_id)}.mp3"
y, sr = librosa.load(audio_path, sr=22050)
print(y.shape, sr)

audio_ids = set()
for f in os.listdir(f"{DEAM_PATH}/Audio"):
    match = re.match(r"^(\d+)\.mp3$", f)
    if match:
        audio_ids.add(int(match.group(1)))

annotations = annotations[annotations["song_id"].astype(int).isin(audio_ids)].reset_index(drop=True)
print(f"Number of usable songs: {len(annotations)}")

y_targets = annotations[["valence_mean", "arousal_mean"]].values.astype(np.float32)
print(y_targets.shape)




(993595,) 22050
Number of usable songs: 1800
(1800, 2)


In [None]:
# load and split data

df = annotations.copy()
df.columns = df.columns.str.strip()

def mood_from_valence_arousal(valence, arousal, v_thresh=5.0, a_thresh=5.0):
    if valence >= v_thresh and arousal >= a_thresh:
        return "happy"
    elif valence < v_thresh and arousal >= a_thresh:
        return "angry"
    elif valence < v_thresh and arousal < a_thresh:
        return "sad"
    else:
        return "relaxed"

df["label"] = df.apply(
    lambda r: mood_from_valence_arousal(r["valence_mean"], r["arousal_mean"]),
    axis=1
)

label2idx = {label: idx for idx, label in enumerate(df["label"].unique())}
idx2label = {idx: label for label, idx in label2idx.items()}
df["label_idx"] = df["label"].map(label2idx)

train_df, val_df = train_test_split(
    df, test_size=0.2, random_state=42, stratify=df["label_idx"]
)

def collate_fn(batch):
    mels, labels = zip(*batch)

    max_len = max(mel.shape[2] for mel in mels)
    mels_padded = [
        torch.nn.functional.pad(mel, (0, max_len - mel.shape[2]))
        for mel in mels
    ]

    mels = torch.stack(mels_padded).float()
    labels = torch.stack(labels)

    return mels, labels


def create_loaders(train_df, val_df, audio_dir):
    train_ds = MusicDataset(train_df, audio_dir, augment=False)
    val_ds = MusicDataset(val_df, audio_dir, augment=False)

    train_loader = DataLoader(
        train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn, num_workers=0
    )
    val_loader = DataLoader(
        val_ds, batch_size=BATCH_SIZE, collate_fn=collate_fn, num_workers=0
    )
    return train_loader, val_loader

train_loader, val_loader = create_loaders(train_df, val_df, MEL_DIR)




In [None]:
# training

model = AudioCNN().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")

    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    train_bar = tqdm(train_loader, desc="Training", leave=False)

    for x, y in train_bar:

        x = x.float().to(DEVICE)
        y = y.long().to(DEVICE)

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

        train_bar.set_postfix(
            loss=f"{loss.item():.4f}",
            acc=f"{correct/total:.3f}"
        )

    train_loss = total_loss / len(train_loader)
    train_acc = correct / total


    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        val_bar = tqdm(val_loader, desc="Validation", leave=False)
        for x, y in val_bar:
            x = x.float().to(DEVICE)
            y = y.long().to(DEVICE)

            outputs = model(x)
            preds = outputs.argmax(dim=1)

            val_correct += (preds == y).sum().item()
            val_total += y.size(0)

            val_bar.set_postfix(
                acc=f"{val_correct/val_total:.3f}"
            )

    val_acc = val_correct / val_total

    print(
        f"Epoch {epoch+1} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Train Acc: {train_acc:.3f} | "
        f"Val Acc: {val_acc:.3f}")



Epoch 1/5




Epoch 1 | Train Loss: 1.1762 | Train Acc: 0.514 | Val Acc: 0.555

Epoch 2/5




Epoch 2 | Train Loss: 1.1357 | Train Acc: 0.533 | Val Acc: 0.538

Epoch 3/5




Epoch 3 | Train Loss: 1.1217 | Train Acc: 0.543 | Val Acc: 0.581

Epoch 4/5




Epoch 4 | Train Loss: 1.1161 | Train Acc: 0.544 | Val Acc: 0.559

Epoch 5/5


                                                                        

Epoch 5 | Train Loss: 1.0984 | Train Acc: 0.553 | Val Acc: 0.552


