In [1]:
!pip -q install torchcodec


In [2]:
from google.colab import drive
drive.mount("/content/drive"),

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


(None,)

In [3]:
!df -h /content
!du -sh "/content/drive/MyDrive/openmic-2018-2"
!du -sh "/content/drive/MyDrive/deep_learning/sentetik-dataset"


Filesystem      Size  Used Avail Use% Mounted on
overlay         236G   45G  191G  20% /
2.8G	/content/drive/MyDrive/openmic-2018-2
673M	/content/drive/MyDrive/deep_learning/sentetik-dataset


In [4]:
!mkdir -p /content/openmic-2018-2
!mkdir -p /content/sentetik-dataset

# Copy OpenMIC audio folder + metadata files
!cp -r "/content/drive/MyDrive/openmic-2018-2/audio" "/content/openmic-2018-2/"
!cp "/content/drive/MyDrive/openmic-2018-2/openmic-2018-aggregated-labels.csv" "/content/openmic-2018-2/" || true
!cp "/content/drive/MyDrive/openmic-2018-2/openmic-2018-metadata.csv" "/content/openmic-2018-2/" || true

# Copy Synthetic audio + labels
!cp -r "/content/drive/MyDrive/deep_learning/sentetik-dataset/audio" "/content/sentetik-dataset/"
!cp "/content/drive/MyDrive/deep_learning/sentetik-dataset/labels.csv" "/content/sentetik-dataset/"


In [7]:
from pathlib import Path
import pandas as pd

CSV_PATH = Path("/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv")
df = pd.read_csv(CSV_PATH, low_memory=False)

# Patch to local /content paths (since you copied audio locally)
df["path"] = df["path"].str.replace(
    "/content/drive/MyDrive/openmic-2018-2",
    "/content/openmic-2018-2",
    regex=False
).str.replace(
    "/content/drive/MyDrive/deep_learning/sentetik-dataset",
    "/content/sentetik-dataset",
    regex=False
)

# Sanity: verify some paths exist
missing = sum(not Path(p).exists() for p in df["path"].sample(500, random_state=0))
print("Missing paths in sample(500):", missing)
print(df["source"].value_counts())


Missing paths in sample(500): 0
source
openmic      20000
synthetic     2200
Name: count, dtype: int64


In [8]:
import numpy as np

OPENMIC_TAGS = [
    "accordion","banjo","bass","cello","clarinet","cymbals","drums","flute","guitar",
    "mallet_percussion","mandolin","organ","piano","saxophone","synthesizer",
    "trombone","trumpet","ukulele","violin","voice"
]
Y_COLS = [f"y_{t}" for t in OPENMIC_TAGS]
M_COLS = [f"m_{t}" for t in OPENMIC_TAGS]

OPENMIC_DIR = Path("/content/openmic-2018-2")
meta_candidates = list(OPENMIC_DIR.glob("*metadata*.csv")) + list(OPENMIC_DIR.glob("*meta*.csv"))
print("Metadata candidates:", [p.name for p in meta_candidates])

# Default: synthetic always train
df["split"] = "train"

# Try official split if metadata exists
if meta_candidates:
    meta_path = meta_candidates[0]
    meta = pd.read_csv(meta_path, low_memory=False)
    print("Using metadata:", meta_path.name)

    split_col = None
    for c in ["split", "subset", "partition"]:
        if c in meta.columns:
            split_col = c
            break

    if split_col is not None:
        if "sample_key" not in meta.columns:
            if "filename" in meta.columns:
                meta["sample_key"] = meta["filename"].astype(str).str.replace(".ogg","", regex=False)
            else:
                raise RuntimeError("Metadata has no sample_key/filename to join.")

        meta_small = meta[["sample_key", split_col]].copy()
        meta_small["filename"] = meta_small["sample_key"].astype(str) + ".ogg"

        open_mask = df["source"].astype(str).eq("openmic")
        joined = df.loc[open_mask, ["filename"]].merge(meta_small[["filename", split_col]], on="filename", how="left")

        s = joined[split_col].astype(str).str.lower()
        s = s.replace({"validation":"valid","val":"valid"})
        df.loc[open_mask, "split"] = s.values
    else:
        print("No split column found; using random split for OpenMIC.")
        meta_candidates = []
else:
    print("No metadata found; using random split for OpenMIC.")

# Random split if no official split
if not meta_candidates:
    rng = np.random.default_rng(42)
    open_idx = df.index[df["source"].astype(str).eq("openmic")].to_numpy()
    rng.shuffle(open_idx)

    n = len(open_idx)
    n_train = int(0.8*n)
    n_valid = int(0.1*n)

    df.loc[open_idx[n_train:n_train+n_valid], "split"] = "valid"
    df.loc[open_idx[n_train+n_valid:], "split"] = "test"

print(df["split"].value_counts(dropna=False))


Metadata candidates: ['openmic-2018-metadata.csv', 'openmic-2018-metadata.csv']
Using metadata: openmic-2018-metadata.csv
No split column found; using random split for OpenMIC.
split
train    18200
test      2000
valid     2000
Name: count, dtype: int64


In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torchaudio

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TARGET_SR = 16000
DURATION_SEC = 10
TARGET_LEN = TARGET_SR * DURATION_SEC

class AudioTagDataset(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True)
        self.paths = self.df["path"].astype(str).tolist()
        self.y = torch.tensor(self.df[Y_COLS].values, dtype=torch.float32)
        self.m = torch.tensor(self.df[M_COLS].values, dtype=torch.float32)
        self.source = self.df["source"].astype(str).tolist()

    def __len__(self):
        return len(self.df)

    def _pad_or_crop(self, wav):
        if wav.numel() >= TARGET_LEN:
            return wav[:TARGET_LEN]
        return F.pad(wav, (0, TARGET_LEN - wav.numel()))

    def __getitem__(self, idx):
        wav, sr = torchaudio.load(self.paths[idx])   # [C,T]
        wav = wav.mean(dim=0)                        # mono [T]
        if sr != TARGET_SR:
            wav = torchaudio.functional.resample(wav, sr, TARGET_SR)
        wav = self._pad_or_crop(wav)
        mx = wav.abs().max().clamp_min(1e-8)
        wav = (wav / mx).to(torch.float32)
        return wav, self.y[idx], self.m[idx], self.source[idx]

train_df = df[df["split"].eq("train")].copy()
valid_df = df[df["split"].eq("valid")].copy()
test_df  = df[df["split"].eq("test")].copy()

train_ds = AudioTagDataset(train_df)
valid_ds = AudioTagDataset(valid_df)
test_ds  = AudioTagDataset(test_df)

print(len(train_ds), len(valid_ds), len(test_ds))


18200 2000 2000


# 4 — Log-mel transform on GPU

In [10]:
# Recreate mel transform (same params)
mel = torchaudio.transforms.MelSpectrogram(
    sample_rate=TARGET_SR,
    n_fft=1024,
    hop_length=320,
    win_length=1024,
    n_mels=128,
    f_min=30,
    f_max=7600,
    power=2.0,
).to(DEVICE)

def wav_to_logmel(wav_batch: torch.Tensor) -> torch.Tensor:
    """
    wav_batch: [B, T] float32/float16 on GPU
    returns:   [B, 128, frames] float32, finite
    """
    # Always compute features in float32 (avoid AMP issues)
    wav_batch = wav_batch.float()

    # Disable autocast for feature extraction
    with torch.amp.autocast("cuda", enabled=False):
        x = mel(wav_batch)                    # [B,128,F] non-negative
        x = torch.clamp(x, min=1e-10)         # prevent log(0)
        x = torch.log(x)                      # log-power mel (stable)

        # per-sample normalization
        mean = x.mean(dim=(1,2), keepdim=True)
        std  = x.std(dim=(1,2), keepdim=True).clamp_min(1e-4)
        x = (x - mean) / std

        # hard sanitize (guarantee finite)
        x = torch.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0)

    return x


# 5 — DataLoaders (with synthetic oversampling)

In [11]:
#BATCH_SIZE = 64  # if OOM -> 32
#
#weights = np.where(train_df["source"].astype(str).values == "synthetic", 4.0, 1.0).astype(np.float64)
#sampler = WeightedRandomSampler(weights, num_samples=len(train_ds), replacement=True)
#
#train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2, pin_memory=True)
#valid_loader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
#test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

BATCH_SIZE = 256  # A100 start point (if OOM -> 192 or 128)

weights = np.where(train_df["source"].astype(str).values == "synthetic", 3.0, 1.0).astype(np.float64)
sampler = WeightedRandomSampler(weights, num_samples=len(train_ds), replacement=True)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=4, pin_memory=True)
valid_loader = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)



# 6A—B. Confirm what the checkpoint contains and how to rebuild the module.

In [12]:
import torch
from pathlib import Path

CKPT_PATH = Path("/content/drive/MyDrive/model_weights/pretraining/pretraining_cbam_irmas_features.pth")
assert CKPT_PATH.exists(), f"Checkpoint not found: {CKPT_PATH}"

ckpt = torch.load(CKPT_PATH, map_location="cpu")
print("Loaded object type:", type(ckpt))

if isinstance(ckpt, dict):
    print("Top-level keys:", list(ckpt.keys())[:40])
    # Try common locations
    for k in ["model", "state_dict", "model_state_dict", "net", "weights"]:
        if k in ckpt and isinstance(ckpt[k], dict):
            print(f"Found candidate state dict under key '{k}'. Example keys:", list(ckpt[k].keys())[:20])
else:
    # could be tensor/array/list = features, not weights
    print("This looks like features or a tensor, not a state_dict.")


Loaded object type: <class 'collections.OrderedDict'>
Top-level keys: ['0.weight', '0.bias', '3.ca.mlp.0.weight', '3.ca.mlp.0.bias', '3.ca.mlp.2.weight', '3.ca.mlp.2.bias', '3.sa.conv.weight', '3.sa.conv.bias', '4.weight', '4.bias', '7.ca.mlp.0.weight', '7.ca.mlp.0.bias', '7.ca.mlp.2.weight', '7.ca.mlp.2.bias', '7.sa.conv.weight', '7.sa.conv.bias']


In [13]:
import torch
from pathlib import Path

CKPT_PATH = Path("/content/drive/MyDrive/model_weights/pretraining/pretraining_cbam_irmas_features.pth")
state = torch.load(CKPT_PATH, map_location="cpu")  # OrderedDict

for k in state.keys():
    print(k, tuple(state[k].shape))

print("\nconv0 weight shape:", tuple(state["0.weight"].shape))
print("conv4 weight shape:", tuple(state["4.weight"].shape))

# infer reduction for CBAM at index 3
hidden3 = state["3.ca.mlp.0.weight"].shape[0]
ch3 = state["3.ca.mlp.0.weight"].shape[1]
print("\nCBAM3 channels:", ch3, "hidden:", hidden3, "=> reduction ~", ch3 // hidden3)

# infer reduction for CBAM at index 7
hidden7 = state["7.ca.mlp.0.weight"].shape[0]
ch7 = state["7.ca.mlp.0.weight"].shape[1]
print("CBAM7 channels:", ch7, "hidden:", hidden7, "=> reduction ~", ch7 // hidden7)


0.weight (32, 1, 3, 3)
0.bias (32,)
3.ca.mlp.0.weight (4, 32)
3.ca.mlp.0.bias (4,)
3.ca.mlp.2.weight (32, 4)
3.ca.mlp.2.bias (32,)
3.sa.conv.weight (1, 2, 7, 7)
3.sa.conv.bias (1,)
4.weight (64, 32, 3, 3)
4.bias (64,)
7.ca.mlp.0.weight (8, 64)
7.ca.mlp.0.bias (8,)
7.ca.mlp.2.weight (64, 8)
7.ca.mlp.2.bias (64,)
7.sa.conv.weight (1, 2, 7, 7)
7.sa.conv.bias (1,)

conv0 weight shape: (32, 1, 3, 3)
conv4 weight shape: (64, 32, 3, 3)

CBAM3 channels: 32 hidden: 4 => reduction ~ 8
CBAM7 channels: 64 hidden: 8 => reduction ~ 8


# Reinitialize model + optimizer

In [14]:
# Rebuild features and reload pretrained weights

CKPT_PATH = Path("/content/drive/MyDrive/model_weights/pretraining/pretraining_cbam_irmas_features.pth")
state = torch.load(CKPT_PATH, map_location="cpu")

# --- CBAM modules (same as before) ---
class ChannelAttention(nn.Module):
    def __init__(self, channels, reduction=8):
        super().__init__()
        hidden = max(1, channels // reduction)
        self.mlp = nn.Sequential(
            nn.Linear(channels, hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, channels),
        )
    def forward(self, x):
        avg = x.mean(dim=(2,3))
        mx  = x.amax(dim=(2,3))
        attn = torch.sigmoid(self.mlp(avg) + self.mlp(mx)).unsqueeze(-1).unsqueeze(-1)
        return x * attn

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        padding = kernel_size // 2
        self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=padding, bias=True)
    def forward(self, x):
        avg = x.mean(dim=1, keepdim=True)
        mx  = x.amax(dim=1, keepdim=True)
        attn = torch.sigmoid(self.conv(torch.cat([avg, mx], dim=1)))
        return x * attn

class CBAM(nn.Module):
    def __init__(self, channels, reduction=8):
        super().__init__()
        self.ca = ChannelAttention(channels, reduction=reduction)
        self.sa = SpatialAttention(kernel_size=7)
    def forward(self, x):
        return self.sa(self.ca(x))

features = nn.Sequential(
    nn.Conv2d(1, 32, 3, padding=1, bias=True),
    nn.ReLU(inplace=True),
    nn.Identity(),
    CBAM(32, reduction=8),
    nn.Conv2d(32, 64, 3, padding=1, bias=True),
    nn.ReLU(inplace=True),
    nn.Identity(),
    CBAM(64, reduction=8),
)
features.load_state_dict(state, strict=True)

class CBAMTagger(nn.Module):
    def __init__(self, features, n_tags):
        super().__init__()
        self.features = features
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, n_tags)
        )
    def forward(self, logmel):
        x = logmel.unsqueeze(1)
        x = self.features(x)
        x = self.pool(x)
        return self.head(x)

model = CBAMTagger(features, n_tags=len(OPENMIC_TAGS)).to(DEVICE)
print("Reinitialized model + reloaded pretrained features.")


Reinitialized model + reloaded pretrained features.


# Cell 6E - Build the full CBAM tagger (features + pooling + new 20-class head)

In [15]:
class CBAMTagger(nn.Module):
    def __init__(self, features: nn.Module, n_tags: int = 20):
        super().__init__()
        self.features = features
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 256),    # 64 comes from conv4 out_channels
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, n_tags) # logits
        )

    def forward(self, logmel):
        # logmel: [B, n_mels, time]
        x = logmel.unsqueeze(1)       # [B,1,M,T]
        x = self.features(x)          # [B,64,*,*]
        x = self.pool(x)              # [B,64,1,1]
        return self.head(x)           # [B,20]

model = CBAMTagger(features, n_tags=len(OPENMIC_TAGS)).to(DEVICE)
print("Model ready on", DEVICE)


Model ready on cuda


In [16]:
# Initialize last layer bias from TRAIN priors (masked)
p = (train_df[Y_COLS].values * train_df[M_COLS].values).sum(axis=0) / np.clip(train_df[M_COLS].values.sum(axis=0), 1, None)
p = np.clip(p, 1e-4, 1-1e-4)
bias = np.log(p / (1 - p))

with torch.no_grad():
    model.head[-1].bias.copy_(torch.tensor(bias, dtype=torch.float32, device=DEVICE))

print("Bias init done. Bias stats:", float(model.head[-1].bias.min()), float(model.head[-1].bias.mean()), float(model.head[-1].bias.max()))


Bias init done. Bias stats: -3.729701519012451 -2.7224838733673096 -1.9965993165969849


Consider using tensor.detach() first. (Triggered internally at /pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:836.)
  print("Bias init done. Bias stats:", float(model.head[-1].bias.min()), float(model.head[-1].bias.mean()), float(model.head[-1].bias.max()))


In [17]:
Yt = torch.tensor(train_df[Y_COLS].values, dtype=torch.float32)
Mt = torch.tensor(train_df[M_COLS].values, dtype=torch.float32)

pos = (Yt * Mt).sum(dim=0)
tot = Mt.sum(dim=0)
neg = tot - pos

pos_weight = (neg / pos.clamp_min(1.0)).clamp(max=20.0).to(DEVICE)  # cap 20 is usually stable
print("pos_weight min/median/max:",
      float(pos_weight.min()), float(pos_weight.median()), float(pos_weight.max()))

bce_w = nn.BCEWithLogitsLoss(reduction="none", pos_weight=pos_weight)

def masked_bce_loss(logits, y, m):
    loss = bce_w(logits, y) * m
    denom = m.sum().clamp_min(1.0)
    return loss.sum() / denom


pos_weight min/median/max: 7.363970756530762 16.71871566772461 20.0


# Cell 7 — Masked BCE loss

In [21]:
# bce = nn.BCEWithLogitsLoss(reduction="none")
#
# def masked_bce_loss(logits, y, m):
#     loss = bce(logits, y) * m
#     denom = m.sum().clamp_min(1.0)
#     return loss.sum() / denom
#

# Cell 8 — Quick forward pass sanity check (recommended)

In [18]:
model.train()
wav, y, m, src = next(iter(train_loader))
wav = wav.to(DEVICE)

logmel = wav_to_logmel(wav)
print("logmel finite?", torch.isfinite(logmel).all().item(), "shape:", tuple(logmel.shape))
logits = model(logmel)
print("logits finite?", torch.isfinite(logits).all().item(), "shape:", tuple(logits.shape))


logmel finite? True shape: (256, 128, 501)
logits finite? True shape: (256, 20)


# Cell 9 — Train (AMP + best checkpoint to Drive)

In [16]:
# model.train()
# for i, (wav, y, m, src) in enumerate(train_loader):
#     if i == 200:
#         break
#     # one step (copy from train loop)
# print("Smoke test passed.")

In [19]:
def eval_val_loss(model, loader):
    model.eval()
    total, denom = 0.0, 0.0
    with torch.no_grad():
        for wav, y, m, src in loader:
            wav = wav.to(DEVICE, non_blocking=True)
            y = y.to(DEVICE, non_blocking=True)
            m = m.to(DEVICE, non_blocking=True)
            logits = model(wav_to_logmel(wav))
            L = bce_w(logits, y) * m
            total += L.sum().item()
            denom += m.sum().item()
    return total / max(1.0, denom)


In [20]:
import time
import torch
from pathlib import Path
from sklearn.metrics import f1_score
import numpy as np

def eval_macro_f1_thresh(model, loader, thr=0.2):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for wav, y, m, src in loader:
            wav = wav.to(DEVICE, non_blocking=True)
            probs = torch.sigmoid(model(wav_to_logmel(wav))).cpu().numpy()
            ys.append(y.numpy())
            ps.append(probs)
    y_true = np.concatenate(ys, axis=0).astype(int)
    y_prob = np.concatenate(ps, axis=0)
    y_pred = (y_prob >= thr).astype(int)
    return f1_score(y_true, y_pred, average="macro", zero_division=0)

EPOCHS = 15
LR = 1e-3
CLIP_NORM = 1.0
LOG_EVERY = 50

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scaler = torch.amp.GradScaler("cuda")

BEST_PATH = Path("/content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth")
BEST_PATH.parent.mkdir(parents=True, exist_ok=True)

best_loss = float("inf")
patience = 3
bad = 0
steps_per_epoch = len(train_loader)

for epoch in range(1, EPOCHS + 1):
    model.train()
    t0 = time.time()
    running = 0.0

    for step, (wav, y, m, src) in enumerate(train_loader, start=1):
        wav = wav.to(DEVICE, non_blocking=True)
        y = y.to(DEVICE, non_blocking=True)
        m = m.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.amp.autocast("cuda"):
            logmel = wav_to_logmel(wav)
            logits = model(logmel)
            loss = masked_bce_loss(logits, y, m)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_NORM)
        scaler.step(optimizer)
        scaler.update()

        running += loss.item()
        if step % LOG_EVERY == 0:
            torch.cuda.synchronize()
            elapsed = time.time() - t0
            sec_per_step = elapsed / step
            eta = (steps_per_epoch - step) * sec_per_step
            print(f"epoch {epoch} step {step}/{steps_per_epoch} loss {running/LOG_EVERY:.4f} "
                  f"sec/step {sec_per_step:.3f} ETA {eta/60:.1f}m")
            running = 0.0

    val_loss = eval_val_loss(model, valid_loader)
    val_f1_02 = eval_macro_f1_thresh(model, valid_loader, thr=0.2)
    print(f"epoch {epoch} DONE | {((time.time()-t0)/60):.1f}m | VAL loss {val_loss:.4f} | VAL macro-F1@0.2 {val_f1_02:.4f}")

    if val_loss < best_loss:
        best_loss = val_loss
        bad = 0
        torch.save({"model_state_dict": model.state_dict(), "val_loss": best_loss}, BEST_PATH)
        print("Saved best:", BEST_PATH, "val_loss:", best_loss)
    else:
        bad += 1
        if bad >= patience:
            print(f"Early stopping. Best val_loss: {best_loss:.4f}")
            break


epoch 1 step 50/72 loss 1.9773 sec/step 1.251 ETA 0.5m
epoch 1 DONE | 1.8m | VAL loss 1.2061 | VAL macro-F1@0.2 0.0865
Saved best: /content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth val_loss: 1.2061275756835939
epoch 2 step 50/72 loss 1.1842 sec/step 1.248 ETA 0.5m
epoch 2 DONE | 1.8m | VAL loss 1.0553 | VAL macro-F1@0.2 0.0880
Saved best: /content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth val_loss: 1.0553174926757813
epoch 3 step 50/72 loss 1.0883 sec/step 1.242 ETA 0.5m
epoch 3 DONE | 1.8m | VAL loss 1.0404 | VAL macro-F1@0.2 0.0897
Saved best: /content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth val_loss: 1.0404491088867187
epoch 4 step 50/72 loss 1.0717 sec/step 1.244 ETA 0.5m
epoch 4 DONE | 1.8m | VAL loss 1.0266 | VAL macro-F1@0.2 0.0904
Saved best: /content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth val_loss: 1.02661611328125
epoch 5 step 50/72 loss 1.0508 sec/ste

In [21]:
BEST_PATH = Path("/content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth")
ckpt = torch.load(BEST_PATH, map_location=DEVICE)
model.load_state_dict(ckpt["model_state_dict"])

test_loss = eval_val_loss(model, test_loader)
test_f1_02 = eval_macro_f1_thresh(model, test_loader, thr=0.2)
test_f1_05 = eval_macro_f1_thresh(model, test_loader, thr=0.5)

print("TEST loss:", test_loss)
print("TEST macro-F1@0.2:", test_f1_02)
print("TEST macro-F1@0.5:", test_f1_05)


TEST loss: 0.8252420532226562
TEST macro-F1@0.2: 0.12857139411928345
TEST macro-F1@0.5: 0.18686141860443403
