In [None]:
#imports
import os
import json
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [None]:
#PATHS
MODEL_PATH = "/content/drive/MyDrive/multilabel_cnn_improved.keras"
MEL_DIR = "/content/drive/MyDrive/irmas_multilabel_mels"
LABEL_CSV = "/content/drive/MyDrive/multilabel_labels.csv"

In [None]:
# LOAD MODEL
model = load_model(MODEL_PATH)
print("Model loaded")

# CONFIG (MUST MATCH TRAINING)
IMG_SIZE = 128

INSTRUMENTS = [
    "cel","cla","flu","gac","gel",
    "org","pia","sax","tru","vio","voi"
]

Model loaded


In [None]:
# LOAD LABELS
# -------------------------------
df = pd.read_csv(LABEL_CSV)

X, y = [], []

for _, row in df.iterrows():
    img_path = os.path.join(
        MEL_DIR, row["file"].replace(".wav", ".png")
    )

    if not os.path.exists(img_path):
        continue

    img = tf.keras.preprocessing.image.load_img(
        img_path, target_size=(IMG_SIZE, IMG_SIZE)
    )
    img = tf.keras.preprocessing.image.img_to_array(img) / 255.0

    X.append(img)
    y.append(row[INSTRUMENTS].values)

X = np.array(X)
y = np.array(y)

In [None]:
# CREATE "THRESHOLD-VAL" SPLIT
# -------------------------------
_, X_thr, _, y_thr = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Threshold tuning samples:", X_thr.shape[0])
# PREDICT PROBABILITIES
# -------------------------------
y_probs = model.predict(X_thr, verbose=1)

Threshold tuning samples: 200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 529ms/step


In [None]:
# FIND BEST THRESHOLD PER CLASS
# -------------------------------
from sklearn.metrics import f1_score
import numpy as np

# FIX LABEL DTYPE (CRITICAL)
y_thr = np.array(y_thr, dtype=np.float32)
y_thr = (y_thr > 0.5).astype(int)

best_thresholds = {}

for i, inst in enumerate(INSTRUMENTS):
    best_f1 = 0.0
    best_t = 0.5

    for t in np.arange(0.1, 0.9, 0.05):
        preds = (y_probs[:, i] >= t).astype(int)

        f1 = f1_score(
            y_thr[:, i],
            preds,
            zero_division=0
        )

        if f1 > best_f1:
            best_f1 = f1
            best_t = round(float(t), 2)

    best_thresholds[inst] = best_t
    print(f"{inst}: threshold={best_t}, F1={best_f1:.3f}")

cel: threshold=0.3, F1=0.392
cla: threshold=0.3, F1=0.359
flu: threshold=0.3, F1=0.463
gac: threshold=0.4, F1=0.434
gel: threshold=0.4, F1=0.523
org: threshold=0.3, F1=0.421
pia: threshold=0.35, F1=0.402
sax: threshold=0.35, F1=0.406
tru: threshold=0.35, F1=0.424
vio: threshold=0.3, F1=0.419
voi: threshold=0.35, F1=0.456


In [None]:
# SAVE THRESHOLDS
# -------------------------------
with open("per_class_thresholds.json", "w") as f:
    json.dump(best_thresholds, f, indent=4)

print("\nThreshold tuning done")
print("Saved: per_class_thresholds.json")


Threshold tuning done
Saved: per_class_thresholds.json
