In [1]:
!pip -q install torchcodec

In [2]:
from pathlib import Path
import os, random, math, time
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
from torch.utils.data import Dataset, DataLoader

from google.colab import drive
drive.mount("/content/drive")

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)


CSV_PATH = Path("/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv")

OPENMIC_LOCAL = "/content/drive/MyDrive/openmic-2018-2"
SYNTH_LOCAL   = "/content/drive/MyDrive/deep_learning/sentetik-dataset"

W_CBAM   = "/content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth"
W_MSCRNN = "/content/drive/MyDrive/model_weights/finetuning/mscrnn_openmic_plus_synth_best.pth"
W_PASST  = "/content/drive/MyDrive/model_weights/finetuning/passt_openmic_plus_synth_best.pth"

for p in [CSV_PATH, Path(W_CBAM), Path(W_MSCRNN), Path(W_PASST)]:
    print(p, "exists?", p.exists())


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
DEVICE: cuda
/content/drive/MyDrive/deep_learning/combined_openmic_and_synth.csv exists? True
/content/drive/MyDrive/model_weights/finetuning/cbam_openmic_plus_synth_best.pth exists? True
/content/drive/MyDrive/model_weights/finetuning/mscrnn_openmic_plus_synth_best.pth exists? True
/content/drive/MyDrive/model_weights/finetuning/passt_openmic_plus_synth_best.pth exists? True


In [3]:
df = pd.read_csv(CSV_PATH, low_memory=False)

df["path"] = (
    df["path"]
      .str.replace("/content/drive/MyDrive/openmic-2018-2", OPENMIC_LOCAL, regex=False)
      .str.replace("/content/drive/MyDrive/deep_learning/sentetik-dataset", SYNTH_LOCAL, regex=False)
)

sample = df.sample(500, random_state=0)
missing = sum(not Path(p).exists() for p in sample["path"])
print("Missing paths in sample(500):", missing)
print(df["source"].value_counts())
print("Columns sample:", df.columns[:20].tolist())


Missing paths in sample(500): 0
source
openmic      20000
synthetic     2200
Name: count, dtype: int64
Columns sample: ['source', 'path', 'filename', 'polyphony', 'chosen_families', 'y_accordion', 'y_banjo', 'y_bass', 'y_cello', 'y_clarinet', 'y_cymbals', 'y_drums', 'y_flute', 'y_guitar', 'y_mallet_percussion', 'y_mandolin', 'y_organ', 'y_piano', 'y_saxophone', 'y_synthesizer']


In [4]:
TAGS = [
    "accordion","banjo","bass","cello","clarinet","cymbals","drums","flute","guitar",
    "mallet_percussion","mandolin","organ","piano","saxophone","synthesizer",
    "trombone","trumpet","ukulele","violin","voice"
]

Y_COLS = [f"y_{t}" for t in TAGS]
M_COLS = [f"m_{t}" for t in TAGS]

assert all(c in df.columns for c in Y_COLS), "Missing some y_* columns"
assert all(c in df.columns for c in M_COLS), "Missing some m_* columns"

df[Y_COLS] = df[Y_COLS].astype(np.float32)
df[M_COLS] = df[M_COLS].astype(np.float32)

print("OK: labels ready")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)


OK: labels ready
Using device: cuda


In [5]:
from sklearn.model_selection import train_test_split

open_df  = df[df["source"]=="openmic"].reset_index(drop=True)
synth_df = df[df["source"]=="synthetic"].reset_index(drop=True)

train_open, temp_open = train_test_split(open_df, test_size=0.2, random_state=SEED)
val_open, test_open   = train_test_split(temp_open, test_size=0.5, random_state=SEED)

train_df = pd.concat([train_open, synth_df], ignore_index=True).reset_index(drop=True)
val_df   = val_open.reset_index(drop=True)
test_df  = test_open.reset_index(drop=True)

print("train:", len(train_df), "val:", len(val_df), "test:", len(test_df))
print("val sources:\n", val_df["source"].value_counts())
print("test sources:\n", test_df["source"].value_counts())


train: 18200 val: 2000 test: 2000
val sources:
 source
openmic    2000
Name: count, dtype: int64
test sources:
 source
openmic    2000
Name: count, dtype: int64


In [6]:
TARGET_SR = 16000
CLIP_S = 10
TARGET_LEN = TARGET_SR * CLIP_S

def load_audio_mono_16k(path: str):
    wav, sr = torchaudio.load(path)  # [C,T]
    if wav.size(0) > 1:
        wav = wav.mean(dim=0, keepdim=True)  # [1,T]
    if sr != TARGET_SR:
        wav = torchaudio.functional.resample(wav, sr, TARGET_SR)
    wav = wav.squeeze(0)  # [T]

    if wav.numel() < TARGET_LEN:
        wav = F.pad(wav, (0, TARGET_LEN - wav.numel()))
    else:
        wav = wav[:TARGET_LEN]
    return wav.unsqueeze(0)  # [1,T]

class AudioMultiLabelDataset(Dataset):
    def __init__(self, frame: pd.DataFrame):
        self.paths = frame["path"].tolist()
        self.y = frame[Y_COLS].values.astype(np.float32)
        self.m = frame[M_COLS].values.astype(np.float32)

    def __len__(self): return len(self.paths)

    def __getitem__(self, idx):
        wav = load_audio_mono_16k(self.paths[idx])  # [1,T]
        y = torch.from_numpy(self.y[idx])           # [20]
        m = torch.from_numpy(self.m[idx])           # [20]
        return wav, y, m

BATCH = 64
NUM_WORKERS = 2

train_loader = DataLoader(AudioMultiLabelDataset(train_df), batch_size=BATCH, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(AudioMultiLabelDataset(val_df), batch_size=BATCH, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
test_loader  = DataLoader(AudioMultiLabelDataset(test_df), batch_size=BATCH, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)

wav, y, m = next(iter(val_loader))
print("wav:", tuple(wav.shape), "y:", tuple(y.shape), "m:", tuple(m.shape))


wav: (64, 1, 160000) y: (64, 20) m: (64, 20)


In [7]:
mel = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=1024,
    hop_length=320,
    win_length=1024,
    n_mels=128,
    center=True,
    power=2.0
).to(DEVICE)

def wav_to_logmel(wav_batch: torch.Tensor):
    # [B,1,T] -> [B,128,TT]
    x = wav_batch.squeeze(1)         # [B,T]
    M = mel(x)                       # [B,128,TT]
    M = torch.log(M + 1e-6)
    M = torch.nan_to_num(M, nan=0.0, posinf=0.0, neginf=0.0)

    mean = M.mean(dim=(1,2), keepdim=True)
    std  = M.std(dim=(1,2), keepdim=True).clamp_min(1e-6)
    return (M - mean) / std


In [10]:
class ChannelAttention(nn.Module):
    def __init__(self, channels, reduction=8):
        super().__init__()
        hidden = channels // reduction
        self.mlp = nn.Sequential(
            nn.Linear(channels, hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, channels),
        )

    def forward(self, x):
        # x: [B,C,H,W]
        avg = x.mean(dim=(2,3))                 # [B,C]
        mx  = x.amax(dim=(2,3))                 # [B,C]
        att = self.mlp(avg) + self.mlp(mx)      # [B,C]
        att = torch.sigmoid(att).unsqueeze(-1).unsqueeze(-1)  # [B,C,1,1]
        return x * att

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        pad = kernel_size // 2
        self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=pad)

    def forward(self, x):
        # x: [B,C,H,W]
        avg = x.mean(dim=1, keepdim=True)     # [B,1,H,W]
        mx  = x.amax(dim=1, keepdim=True)     # [B,1,H,W]
        att = torch.sigmoid(self.conv(torch.cat([avg, mx], dim=1)))  # [B,1,H,W]
        return x * att

class CBAM(nn.Module):
    def __init__(self, channels, reduction=8):
        super().__init__()
        self.ca = ChannelAttention(channels, reduction=reduction)
        self.sa = SpatialAttention(kernel_size=7)

    def forward(self, x):
        x = self.ca(x)
        x = self.sa(x)
        return x

def build_cbam_features():
    return nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, padding=1),  # idx 0 (matches 0.weight)
        nn.ReLU(inplace=True),                       # idx 1
        nn.MaxPool2d(2),                             # idx 2
        CBAM(32, reduction=8),                       # idx 3 (matches 3.ca..., 3.sa...)
        nn.Conv2d(32, 64, kernel_size=3, padding=1), # idx 4 (matches 4.weight)
        nn.ReLU(inplace=True),                       # idx 5
        nn.MaxPool2d(2),                             # idx 6
        CBAM(64, reduction=8),                       # idx 7 (matches 7.ca..., 7.sa...)
    )

class CBAMTagger(nn.Module):
    def __init__(self, features: nn.Module, n_tags: int = 20):
        super().__init__()
        self.features = features
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, n_tags)
        )

    def forward(self, logmel):
        x = logmel.unsqueeze(1)   # [B,1,M,T]
        x = self.features(x)      # [B,64,*,*]
        x = self.pool(x)          # [B,64,1,1]
        return self.head(x)       # [B,20]

cbam = CBAMTagger(build_cbam_features(), n_tags=len(TAGS)).to(DEVICE)

ckpt = torch.load(W_CBAM, map_location="cpu")

if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
    sd = ckpt["model_state_dict"]
else:
    sd = ckpt  # already a plain state_dict

# If trained with DataParallel, remove "module." prefix
sd = {k.replace("module.", ""): v for k, v in sd.items()}

missing, unexpected = cbam.load_state_dict(sd, strict=True)
print("Loaded. Missing:", missing, "Unexpected:", unexpected)

cbam.eval()
print("CBAM loaded.")


Loaded. Missing: [] Unexpected: []
CBAM loaded.


In [12]:
class MultiScaleConvBlock(nn.Module):
    def __init__(self, in_ch=1, out_ch=32):
        super().__init__()
        self.b1 = nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1)
        self.b2 = nn.Conv2d(in_ch, out_ch, kernel_size=5, padding=2)
        self.b3 = nn.Conv2d(in_ch, out_ch, kernel_size=7, padding=3)
        self.bn = nn.BatchNorm2d(out_ch*3)
        self.act = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(kernel_size=(2,2))

    def forward(self, x):
        y = torch.cat([self.b1(x), self.b2(x), self.b3(x)], dim=1)
        y = self.act(self.bn(y))
        return self.pool(y)

class MSCRNN(nn.Module):
    def __init__(self, n_tags=20, base=32, rnn_hidden=256, rnn_layers=2, dropout=0.3):
        super().__init__()
        self.ms1 = MultiScaleConvBlock(1, base)
        self.ms2 = MultiScaleConvBlock(base*3, base)
        self.ms3 = MultiScaleConvBlock(base*3, base)
        self.drop2d = nn.Dropout2d(dropout)

        self.rnn = nn.GRU(
            input_size=base*3,
            hidden_size=rnn_hidden,
            num_layers=rnn_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if rnn_layers > 1 else 0.0,
        )

        self.attn = nn.Linear(rnn_hidden*2, 1)
        self.fc = nn.Linear(rnn_hidden*2, n_tags)

    def forward(self, logmel):
        x = logmel.unsqueeze(1)  # [B,1,F,T]
        x = self.drop2d(self.ms1(x))
        x = self.drop2d(self.ms2(x))
        x = self.drop2d(self.ms3(x))

        x = x.mean(dim=2)        # [B,C,T]
        x = x.transpose(1,2)     # [B,T,C]
        x, _ = self.rnn(x)       # [B,T,2H]

        a = torch.softmax(self.attn(x), dim=1)  # [B,T,1]
        z = (a * x).sum(dim=1)                  # [B,2H]
        return self.fc(z)                       # [B,20]

mscrnn = MSCRNN(n_tags=len(TAGS)).to(DEVICE)

ckpt = torch.load(W_MSCRNN, map_location="cpu")

# checkpoint -> actual state_dict
if isinstance(ckpt, dict) and "model" in ckpt:
    sd = ckpt["model"]
elif isinstance(ckpt, dict) and "model_state_dict" in ckpt:
    sd = ckpt["model_state_dict"]
else:
    sd = ckpt

# common prefix cleanup (DataParallel)
sd = {k.replace("module.", ""): v for k, v in sd.items()}

missing, unexpected = mscrnn.load_state_dict(sd, strict=True)
print("Loaded. Missing:", missing, "Unexpected:", unexpected)

mscrnn.eval()
print("MS-CRNN loaded.")


Loaded. Missing: [] Unexpected: []
MS-CRNN loaded.


In [15]:
!pip -q install hear21passt

from hear21passt.base import get_basic_model, get_model_passt

passt = get_basic_model(mode="logits")
passt.net = get_model_passt(arch="passt_s_swa_p16_128_ap476", n_classes=len(TAGS))
passt.load_state_dict(torch.load(W_PASST, map_location="cpu"))

ckpt = torch.load(W_PASST, map_location="cpu")

if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
    sd = ckpt["model_state_dict"]
elif isinstance(ckpt, dict) and "model" in ckpt:
    sd = ckpt["model"]
else:
    sd = ckpt

sd = {k.replace("module.", ""): v for k, v in sd.items()}

missing, unexpected = passt.load_state_dict(sd, strict=True)
print("Loaded. Missing:", missing, "Unexpected:", unexpected)

passt.to(DEVICE).eval()
print("PaSST loaded.")





 Loading PASST TRAINED ON AUDISET 


PaSST(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((768,), ep

In [23]:
@torch.no_grad()
def predict_probs_cbam_or_ms(model, loader, desc):
    all_p, all_y, all_m = [], [], []
    for wav, y, m in tqdm(loader, desc=desc):
        wav = wav.to(DEVICE, non_blocking=True)  # [B,1,T]
        logmel = wav_to_logmel(wav)              # [B,128,TT]
        logits = model(logmel)                   # [B,20]
        probs = torch.sigmoid(logits).cpu().numpy().astype(np.float32)

        all_p.append(probs)
        all_y.append(y.numpy().astype(np.float32))
        all_m.append(m.numpy().astype(np.float32))

    return np.concatenate(all_p), np.concatenate(all_y), np.concatenate(all_m)

@torch.no_grad()
def predict_probs_passt(model, loader, desc):
    all_p, all_y, all_m = [], [], []
    for wav, y, m in tqdm(loader, desc=desc):
        wav = wav.to(DEVICE, non_blocking=True)   # [B,1,T]
        logmel = wav_to_logmel(wav)               # [B,F,T]
        x = logmel.unsqueeze(1)                   # [B,1,F,T]

        out = model.net(x)

        # PaSST may return (logits, ...) or just logits
        logits = out[0] if isinstance(out, (tuple, list)) else out   # [B,20]

        probs = torch.sigmoid(logits).cpu().numpy().astype(np.float32)
        all_p.append(probs)
        all_y.append(y.cpu().numpy().astype(np.float32))
        all_m.append(m.cpu().numpy().astype(np.float32))

    return np.concatenate(all_p), np.concatenate(all_y), np.concatenate(all_m)




In [21]:
p_cbam_val,  y_val,  m_val  = predict_probs_cbam_or_ms(cbam,  val_loader,  "VAL CBAM")
p_ms_val,    _,      _      = predict_probs_cbam_or_ms(mscrnn,val_loader,  "VAL MS-CRNN")


p_cbam_test, y_test, m_test = predict_probs_cbam_or_ms(cbam,  test_loader, "TEST CBAM")
p_ms_test,   _,      _      = predict_probs_cbam_or_ms(mscrnn,test_loader, "TEST MS-CRNN")


print("VAL shapes:", p_cbam_val.shape, p_ms_val.shape)
print("TEST shapes:", p_cbam_test.shape, p_ms_test.shape)


VAL CBAM: 100%|██████████| 32/32 [00:25<00:00,  1.28it/s]
VAL MS-CRNN: 100%|██████████| 32/32 [00:25<00:00,  1.26it/s]
TEST CBAM: 100%|██████████| 32/32 [14:57<00:00, 28.06s/it]
TEST MS-CRNN: 100%|██████████| 32/32 [00:24<00:00,  1.29it/s]

VAL shapes: (2000, 20) (2000, 20)
TEST shapes: (2000, 20) (2000, 20)





In [24]:
p_passt_val, _,      _      = predict_probs_passt(passt,     val_loader,  "VAL PaSST")
p_passt_test,_,      _      = predict_probs_passt(passt,     test_loader, "TEST PaSST")

print(p_passt_val.shape)
print(p_passt_test.shape)

VAL PaSST: 100%|██████████| 32/32 [00:44<00:00,  1.39s/it]
TEST PaSST: 100%|██████████| 32/32 [00:43<00:00,  1.36s/it]

(2000, 20)
(2000, 20)





In [25]:

def macro_f1_at_thr(probs, y_true, m, thr=0.5):
    mask = (m > 0.5)
    yb = (probs >= thr).astype(np.int32)

    f1s = []
    for k in range(y_true.shape[1]):
        mk = mask[:,k]
        if mk.sum() == 0:
            continue
        yt = y_true[mk,k].astype(np.int32)
        yp = yb[mk,k].astype(np.int32)

        tp = int(((yp==1)&(yt==1)).sum())
        fp = int(((yp==1)&(yt==0)).sum())
        fn = int(((yp==0)&(yt==1)).sum())

        prec = tp/(tp+fp+1e-9)
        rec  = tp/(tp+fn+1e-9)
        f1   = 2*prec*rec/(prec+rec+1e-9)
        f1s.append(f1)

    return float(np.mean(f1s)) if len(f1s) else 0.0


In [26]:
def ens_probs(w_cbam, w_ms, w_passt, p1, p2, p3):
    return w_cbam*p1 + w_ms*p2 + w_passt*p3

best = {"f1":-1, "w":None, "thr":None}

cands = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
thrs = [0.2,0.3,0.4,0.5]

for w_passt in cands:
    for w_ms in cands:
        w_cbam = 1.0 - w_passt - w_ms
        if w_cbam < 0:
            continue
        p_ens = ens_probs(w_cbam, w_ms, w_passt, p_cbam_val, p_ms_val, p_passt_val)
        for thr in thrs:
            f1 = macro_f1_at_thr(p_ens, y_val, m_val, thr=thr)
            if f1 > best["f1"]:
                best = {"f1": f1, "w": (w_cbam, w_ms, w_passt), "thr": thr}

print("BEST on VAL:", best)


BEST on VAL: {'f1': 0.4753918985618341, 'w': (0.0, 0.6, 0.4), 'thr': 0.5}


In [27]:
w_cbam, w_ms, w_passt = best["w"]
thr = best["thr"]

p_ens_test = ens_probs(w_cbam, w_ms, w_passt, p_cbam_test, p_ms_test, p_passt_test)

print("Ensemble weights:", best["w"], "tuned thr:", thr)
print("TEST macro-F1@0.2:", macro_f1_at_thr(p_ens_test, y_test, m_test, thr=0.2))
print("TEST macro-F1@0.5:", macro_f1_at_thr(p_ens_test, y_test, m_test, thr=0.5))
print("TEST macro-F1@tuned:", macro_f1_at_thr(p_ens_test, y_test, m_test, thr=thr))


Ensemble weights: (0.0, 0.6, 0.4) tuned thr: 0.5
TEST macro-F1@0.2: 0.27883046136217404
TEST macro-F1@0.5: 0.4769652414680067
TEST macro-F1@tuned: 0.4769652414680067


In [28]:
def per_class_metrics(probs, y_true, m, thr):
    rows = []
    mask = (m > 0.5)
    yb = (probs >= thr).astype(np.int32)

    for i, tag in enumerate(TAGS):
        mk = mask[:,i]
        if mk.sum() == 0:
            continue
        yt = y_true[mk,i].astype(np.int32)
        yp = yb[mk,i].astype(np.int32)

        tp = int(((yp==1)&(yt==1)).sum())
        fp = int(((yp==1)&(yt==0)).sum())
        fn = int(((yp==0)&(yt==1)).sum())

        prec = tp/(tp+fp+1e-9)
        rec  = tp/(tp+fn+1e-9)
        f1   = 2*prec*rec/(prec+rec+1e-9)

        rows.append({
            "tag": tag, "thr": thr, "valid_n": int(mk.sum()), "pos_n": int(yt.sum()),
            "precision": prec, "recall": rec, "f1": f1, "tp": tp, "fp": fp, "fn": fn
        })

    return pd.DataFrame(rows).sort_values("f1", ascending=False)

df_ens_05 = per_class_metrics(p_ens_test, y_test, m_test, thr=0.5)
df_ens_tuned = per_class_metrics(p_ens_test, y_test, m_test, thr=thr)

print("Top-10 @0.5")
display(df_ens_05.head(10))
print("Bottom-10 @0.5")
display(df_ens_05.tail(10))

OUT1 = Path("/content/drive/MyDrive/deep_learning/ensemble_test_per_class_thr0p5.csv")
OUT2 = Path("/content/drive/MyDrive/deep_learning/ensemble_test_per_class_thr_tuned.csv")
df_ens_05.to_csv(OUT1, index=False)
df_ens_tuned.to_csv(OUT2, index=False)
print("Saved:", OUT1)
print("Saved:", OUT2)


Top-10 @0.5


Unnamed: 0,tag,thr,valid_n,pos_n,precision,recall,f1,tp,fp,fn
12,piano,0.5,2000,131,0.737589,0.793893,0.764706,104,37,27
6,drums,0.5,2000,106,0.508671,0.830189,0.630824,88,85,18
5,cymbals,0.5,2000,120,0.550633,0.725,0.625899,87,71,33
11,organ,0.5,2000,75,0.523364,0.746667,0.615385,56,51,19
8,guitar,0.5,2000,115,0.648936,0.530435,0.583732,61,33,54
18,violin,0.5,2000,132,0.464455,0.742424,0.571429,98,113,34
10,mandolin,0.5,2000,95,0.379487,0.778947,0.510345,74,121,21
9,mallet_percussion,0.5,2000,60,0.508475,0.5,0.504202,30,29,30
14,synthesizer,0.5,2000,124,0.360902,0.774194,0.492308,96,170,28
1,banjo,0.5,2000,73,0.335404,0.739726,0.461538,54,107,19


Bottom-10 @0.5


Unnamed: 0,tag,thr,valid_n,pos_n,precision,recall,f1,tp,fp,fn
2,bass,0.5,2000,52,0.412698,0.5,0.452174,26,37,26
16,trumpet,0.5,2000,127,0.33463,0.677165,0.447917,86,171,41
3,cello,0.5,2000,92,0.309623,0.804348,0.44713,74,165,18
13,saxophone,0.5,2000,110,0.318182,0.7,0.4375,77,165,33
17,ukulele,0.5,2000,76,0.278626,0.960526,0.431953,73,189,3
15,trombone,0.5,2000,98,0.239044,0.612245,0.34384,60,191,38
19,voice,0.5,2000,89,0.703704,0.213483,0.327586,19,8,70
7,flute,0.5,2000,64,0.365385,0.296875,0.327586,19,33,45
4,clarinet,0.5,2000,53,0.196809,0.698113,0.307054,37,151,16
0,accordion,0.5,2000,44,0.156566,0.704545,0.256198,31,167,13


Saved: /content/drive/MyDrive/deep_learning/ensemble_test_per_class_thr0p5.csv
Saved: /content/drive/MyDrive/deep_learning/ensemble_test_per_class_thr_tuned.csv
