In [10]:
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader, WeightedRandomSampler
from AudioDataset import AudioDataset
import torch.optim as optim
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import random

In [11]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA device for GPU acceleration.")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS device for GPU acceleration.")
else:
    device = torch.device("cpu")
    print("No GPU device found. Using CPU.")

Using MPS device for GPU acceleration.


In [4]:
# ── % share of every class in the CSV ────────────────────────────────
import pandas as pd

df = pd.read_csv("./data/train.csv")      # adjust path if needed
pct = df["label"].value_counts(normalize=True)*100
print("train.csv  – class % share"); print(pct.round(2))
print("test.csv  – class % share"); print(df["label"].value_counts(normalize=True)*100)

train.csv  – class % share
label
unknown    63.32
stop        3.67
on          3.63
go          3.62
yes         3.62
no          3.60
right       3.60
up          3.59
down        3.58
left        3.58
off         3.58
silence     0.61
Name: proportion, dtype: float64
test.csv  – class % share
label
unknown    63.321920
stop        3.667030
on          3.626177
go          3.620341
yes         3.618395
no          3.604778
right       3.602832
up          3.585324
down        3.583379
left        3.577543
off         3.577543
silence     0.614738
Name: proportion, dtype: float64


## without balancing

In [7]:
train_set = AudioDataset("./data/train.csv", "./data_raw/train/audio")
train_loader = DataLoader(train_set, batch_size=256, shuffle=True, drop_last=True)

test_set = AudioDataset("./data/test.csv", "./data_raw/train/audio")
test_loader = DataLoader(test_set, batch_size=256, shuffle=True, drop_last=True)

In [None]:
from collections import Counter
import torch


# ─── count labels in one epoch ─────────────────────────────────────────
label_counts = Counter()
total_samples = 0

for specs, labels in train_loader:
    labels = labels.cpu().tolist()
    label_counts.update(labels)
    total_samples += len(labels)

# ─── print percentage per class ─────────────────────────────────────────
for cls_idx, count in sorted(label_counts.items()):
    pct = count / total_samples * 100
    print(f"class {cls_idx:2d}: {pct:5.2f}% ({count}/{total_samples})")

class  0:  3.62% (1852/51200)
class  1:  3.61% (1846/51200)
class  2:  3.58% (1833/51200)
class  3:  3.58% (1835/51200)
class  4:  3.58% (1833/51200)
class  5:  3.61% (1846/51200)
class  6:  3.62% (1854/51200)
class  7:  3.58% (1831/51200)
class  8:  3.67% (1878/51200)
class  9:  3.61% (1849/51200)
class 10:  0.62% (315/51200)
class 11: 63.34% (32428/51200)


## with equal balancing

In [None]:
# ─── prepare dataset ──────────────────────────────────────────────────────
train_set = AudioDataset("./data/train.csv", "./data_raw/train/audio")

# ─── compute sample weights for balanced sampling ────────────────────────
labels = [lbl for _, lbl in train_set]             # list of ints
class_counts = torch.bincount(torch.tensor(labels))
class_weights = 1.0 / class_counts.float()          # weight per class
sample_weights = [class_weights[lbl] for lbl in labels]

# ─── sampler & loader ─────────────────────────────────────────────────────
sampler = WeightedRandomSampler(
    sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)
train_loader = DataLoader(
    train_set,
    batch_size=256,
    sampler=sampler,     # use sampler instead of shuffle
    drop_last=True
)

# ─── test loader unchanged ────────────────────────────────────────────────
test_set = AudioDataset("./data/test.csv", "./data_raw/train/audio")
test_loader = DataLoader(
    test_set,
    batch_size=256,
    shuffle=False,
    drop_last=False
)

NameError: name 'device' is not defined

In [None]:
from collections import Counter
import torch


# ─── count labels in one epoch ─────────────────────────────────────────
label_counts = Counter()
total_samples = 0

for specs, labels in train_loader:
    labels = labels.cpu().tolist()
    label_counts.update(labels)
    total_samples += len(labels)

# ─── print percentage per class ─────────────────────────────────────────
for cls_idx, count in sorted(label_counts.items()):
    pct = count / total_samples * 100
    print(f"class {cls_idx:2d}: {pct:5.2f}% ({count}/{total_samples})")

class  0:  8.37% (4284/51200)
class  1:  8.31% (4257/51200)
class  2:  8.17% (4181/51200)
class  3:  8.28% (4239/51200)
class  4:  8.39% (4296/51200)
class  5:  8.26% (4230/51200)
class  6:  8.30% (4248/51200)
class  7:  8.29% (4247/51200)
class  8:  8.25% (4223/51200)
class  9:  8.34% (4270/51200)
class 10:  8.68% (4445/51200)
class 11:  8.36% (4280/51200)


## with oversampling

In [12]:
# ─── prepare dataset ──────────────────────────────────────────────────────
train_set = AudioDataset("./data/train.csv", "./data_raw/train/audio")
labels = [lbl for _, lbl in train_set]              # list of ints
counts = torch.bincount(torch.tensor(labels))       # #samples per class

# ─── “soft” oversampling: cap max weight at e.g. 3× minority vs majority ──
inv = 1.0 / counts.float()                          # raw inverse freq
max_mult = 3.0
capped = torch.minimum(inv, inv.max() * max_mult)

sample_weights = [capped[lbl].item() for lbl in labels]

sampler = WeightedRandomSampler(
    sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

train_loader = DataLoader(
    train_set,
    batch_size=256,
    sampler=sampler,
    drop_last=True
)

# ─── class‑weighted loss to keep natural skew partially ─────────────────
class_weights = (1.0 / counts.float()).to(device)
class_weights = torch.minimum(class_weights, class_weights.max() * max_mult)
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

# ─── test loader ────────────────────────────────────────────────────────
test_set = AudioDataset("./data/test.csv", "./data_raw/train/audio")
test_loader = DataLoader(
    test_set,
    batch_size=256,
    shuffle=False,
    drop_last=False
)

In [13]:
from collections import Counter
import torch

# ─── count labels in one epoch ─────────────────────────────────────────
label_counts = Counter()
total_samples = 0

for specs, labels in train_loader:
    labels = labels.cpu().tolist()
    label_counts.update(labels)
    total_samples += len(labels)

# ─── print percentage per class ─────────────────────────────────────────
for cls_idx, count in sorted(label_counts.items()):
    pct = count / total_samples * 100
    print(f"class {cls_idx:2d}: {pct:5.2f}% ({count}/{total_samples})")

class  0:  8.49% (4347/51200)
class  1:  8.19% (4192/51200)
class  2:  8.41% (4308/51200)
class  3:  8.31% (4257/51200)
class  4:  8.41% (4307/51200)
class  5:  8.42% (4310/51200)
class  6:  8.39% (4295/51200)
class  7:  8.21% (4206/51200)
class  8:  8.55% (4377/51200)
class  9:  8.27% (4236/51200)
class 10:  8.23% (4216/51200)
class 11:  8.10% (4149/51200)
