In [7]:
# Probably more imports than are really necessary...
import os
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from tqdm import tqdm
import librosa
import numpy as np
import miditoolkit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, average_precision_score, accuracy_score
import random
import librosa


In [2]:
TAGS = ['rock', 'oldies', 'jazz', 'pop', 'dance', 'blues', 'punk', 'chill', 'electronic', 'country']
tag_to_index = {tag: i for i, tag in enumerate(TAGS)}


# do multi-hot encoding

def multi_hot_encode(tags):
    """
    Given a list of tag strings, return a multi-hot encoded tensor.
    Example input: ['jazz', 'pop']
    Output: tensor([0., 0., 1., 1., 0., 0., 0., 0., 0., 0.])
    """
    vec = torch.zeros(len(TAGS), dtype=torch.float32)
    for tag in tags:
        if tag in tag_to_index:
            vec[tag_to_index[tag]] = 1.0
        else:
            raise ValueError(f"Unknown tag: {tag}")
    return vec


In [3]:
dataroot3 = "data/student_files/task3_audio_classification/"

In [4]:
def save_model(model, filepath='sol_1.pt'):
    """Save a PyTorch model to a file"""
    torch.save(model.state_dict(), filepath)
    print(f"Model saved to {filepath}")

def load_model(model_class, filepath='sol_1.pt', *args, **kwargs):
    """Load a PyTorch model from a file"""
    model = model_class(*args, **kwargs)  # instantiate the model
    model.load_state_dict(torch.load(filepath))
    model.eval()  # optional: sets dropout/batchnorm to eval mode
    print(f"Model loaded from {filepath}")
    return model

In [35]:
from mido import MidiFile
from sklearn.model_selection import train_test_split
from itertools import islice
import librosa

SAMPLE_RATE = 16000

# create train loader 


def get_lowest_pitch(file_path):
    # Initialize lowest_note to a high value (since MIDI notes are from 0 to 127)
    lowest_note = 128  
    mid = MidiFile(file_path)
    
    for track in mid.tracks:
        for msg in track:
            if msg.type == 'note_on' and msg.velocity > 0:
                if msg.note < lowest_note:
                    lowest_note = msg.note
    
    # Return None if no note is found
    return lowest_note if lowest_note != 128 else None

def get_highest_pitch(file_path):
    # Initialize highest_note to a low value (since MIDI notes are from 0 to 127)
    highest_note = -1  
    mid = MidiFile(file_path)
    
    for track in mid.tracks:
        for msg in track:
            if msg.type == 'note_on' and msg.velocity > 0:
                if msg.note > highest_note:
                    highest_note = msg.note
                    
    # Return None if no note is found
    return highest_note if highest_note != -1 else None

def get_unique_pitch_num(file_path):
    mid = MidiFile(file_path)
    notes = set()
    
    for track in mid.tracks:
        for msg in track:
            if msg.type == 'note_on' and msg.velocity > 0:
                notes.add(msg.note)
    
    return len(notes)

def get_average_pitch_value(file_path):
    #Q8: Your code goes here
    mid = MidiFile(file_path)
    
    notes = []
    for track in mid.tracks:
        for msg in track:
            if msg.type == 'note_on' and msg.velocity > 0:
                notes.append(msg.note)
    
    if notes:
        return sum(notes) / len(notes)
    else:
        return None

def extract_waveform(path):
    waveform, sr = librosa.load(path, sr=SAMPLE_RATE)  # waveform: 1D NumPy array
    waveform = torch.FloatTensor(waveform)              # Convert to 1D torch tensor

    if sr != SAMPLE_RATE:
        resample = torchaudio.transforms.Resample(orig_freq=sr, new_freq=SAMPLE_RATE)
        waveform = resample(waveform.unsqueeze(0)).squeeze(0)  # (1, N) → (N,)

    # Pad or trim to target length (10 seconds)
    target_len = SAMPLE_RATE * 10
    if waveform.shape[0] < target_len:
        pad_len = target_len - waveform.shape[0]
        waveform = F.pad(waveform, (0, pad_len))
    else:
        waveform = waveform[:target_len]

    return waveform.numpy()  # shape: (160000,)


def extract_q(w):
    # Your code here
    result = librosa.cqt(y=w, sr=SAMPLE_RATE)
    result = librosa.amplitude_to_db(np.abs(result))
    q =torch.FloatTensor(result)
    
    mean = q.mean(dim=1)  # shape: (84,)
    std = q.std(dim=1)    # shape: (84,)
    return torch.cat([mean, std], dim=0)  # shape: (168,)

def extract_mfcc(w):
    # Your code here:
    # load using librosa.feature.mfcc
    mfcc = librosa.feature.mfcc(y=w, sr=SAMPLE_RATE, n_mfcc = 13)
    # extract mean and 
    means = np.mean(mfcc, axis=1)
    
    stds = np.std(mfcc, axis=1)
    # concatenate
    features = np.concatenate([means, stds])
    
    return torch.FloatTensor(features)

def extract_spec(w):
    # Your code here
    # load
    stft = librosa.stft(y=w)
    # take squared absolute values
    spec = np.abs(stft) ** 2
    
    spec = torch.FloatTensor(spec)
    
    mean = spec.mean(dim=1)  # shape (128,)
    std = spec.std(dim=1)    # shape (128,)
    return torch.cat([mean, std], dim=0)  # shape (256,)


import torch
import pretty_midi
from sklearn.model_selection import train_test_split

def features(path):
    full_path = dataroot3 + '/' + path
    try:
        w = extract_waveform(full_path)
        # midi_obj = pretty_midi.PrettyMIDI(full_path)
        # w = midi_obj.fluidsynth(fs=SAMPLE_RATE)

        if w is None or len(w) < SAMPLE_RATE // 10:  # e.g. less than 0.1s
            raise ValueError("Waveform too short or empty")

        mfcc = extract_mfcc(w)
        spec = extract_spec(w)
        q = extract_q(w)
        
        features = torch.cat(
            [
                mfcc, 
                spec, 
                q
            ]) 

        return features
    except Exception as e:
        print(f"Failed to process {full_path}: {e}")
        return None

from joblib import Parallel, delayed

def create_train_features(size=None, val_split=0.2, n_jobs=4):
    # Load data
    with open(dataroot3 + "/train.json", 'r') as f:
        train_json = eval(f.read())
    
    # Limit size if specified
    if size is not None:
        train_json = dict(list(train_json.items())[:size])
    
    # Parallel feature extraction
    keys = list(train_json.keys())
    values = list(train_json.values())

    X = Parallel(n_jobs=n_jobs)(
        delayed(features)(key) for key in keys
    )
    y = torch.stack([multi_hot_encode(tags) for tags in values])
    
    # Convert lists to tensors
    X = torch.stack([torch.tensor(x, dtype=torch.float32) for x in X])
    Y = torch.tensor(y, dtype=torch.int64)
    
    # Return all data if no validation split needed
    if val_split <= 0:
        return X, Y
    
    # Split into training and validation sets
    # X_train, X_val, Y_train, Y_val = train_test_split(
    #     X, Y, test_size=val_split, random_state=42, shuffle=True
    # )
    
    return X, Y

In [36]:
(extract_waveform("data/student_files/task3_audio_classification//train/3590.wav"))

array([ 0.00018311,  0.00024414,  0.00018311, ..., -0.06781006,
       -0.01745605,  0.02740479], dtype=float32)

In [37]:
X_data, y_data  = create_train_features()
X_data.shape, y_data.shape

  X = torch.stack([torch.tensor(x, dtype=torch.float32) for x in X])
  Y = torch.tensor(y, dtype=torch.int64)


(torch.Size([4000, 2244]), torch.Size([4000, 10]))

In [38]:
# Save data
import pickle

train_data_dict = {'x': X_data, 'y': y_data}

with open("task3_train_data_3.pkl", "wb") as file:
    pickle.dump(train_data_dict, file)

In [ ]:
# load data
with open("task3_train_data_3.pkl", "rb") as file:
    data = pickle.load(file)

X_data = data['x']
y_data = data['y']

In [76]:
from skmultilearn.model_selection import iterative_train_test_split
import numpy as np

# Convert tensors to numpy
X_np = X_data.numpy()
y_np = y_data.numpy()

# Perform stratified split (e.g., 80% train, 20% val)
X_train, y_train, X_val_global, y_val_global = iterative_train_test_split(X_np, y_np, test_size=0.1)

print(X_train.shape, y_train.shape)
print(X_val_global.shape, y_val_global.shape)


(3600, 2244) (3600, 10)
(400, 2244) (400, 10)


In [39]:
print(len(X_data[0]))
feature_size = (len(X_data[0]))

2244


In [86]:
from collections import Counter


def verify_data(y_train, mel=None, mfcc=None, num_classes=10):
    """Check label distribution and optional input stats. 
       Returns pos_weight tensor for BCEWithLogitsLoss to handle class imbalance.

       Parameters:
       - y_train (Tensor or ndarray): shape (N, num_classes), binary multi-label.
       - mel (Tensor or ndarray): optional, for range checking.
       - mfcc (Tensor or ndarray): optional, for range checking.
    """
    if isinstance(y_train, np.ndarray):
        y_train = torch.tensor(y_train)
    
    label_counter = Counter()
    total_assignments = 0
    sample_count = y_train.size(0)

    for i in range(num_classes):
        class_count = (y_train[:, i] == 1).sum().item()
        label_counter[i] += class_count
        total_assignments += class_count

    print(f"Total samples: {sample_count}")
    print(f"Total class assignments (1s): {total_assignments}\n")

    print("Class frequency distribution:")
    counts = []
    for i in range(num_classes):
        count = label_counter[i]
        counts.append(count)
        print(f"  Class {i}: {count} assignments ({count / total_assignments:.2%})")

    # Compute pos_weight = (N - count) / count
    label_counts_tensor = torch.tensor(counts, dtype=torch.float)
    pos_weight = (sample_count - label_counts_tensor) / label_counts_tensor

    print("\nComputed pos_weight (for BCEWithLogitsLoss):")
    for i, w in enumerate(pos_weight):
        print(f"  Class {i}: {w.item():.4f}")

    # Optional: check mel / mfcc value ranges
    if mel is not None and isinstance(mel, np.ndarray):
        mel = torch.tensor(mel)
    if mfcc is not None and isinstance(mfcc, np.ndarray):
        mfcc = torch.tensor(mfcc)

    if mel is not None:
        if torch.isnan(mel).any() or torch.isinf(mel).any():
            print("WARNING: NaN or Inf values found in mel data!")
        else:
            print(f"mel range: [{mel.min().item():.4f}, {mel.max().item():.4f}]")

    if mfcc is not None:
        if torch.isnan(mfcc).any() or torch.isinf(mfcc).any():
            print("WARNING: NaN or Inf values found in mfcc data!")
        else:
            print(f"mfcc range: [{mfcc.min().item():.4f}, {mfcc.max().item():.4f}]")

    return pos_weight

In [87]:
pos_weight = verify_data(y_train)
pos_weight = torch.log1p(pos_weight) 
pos_weight

Total samples: 3600
Total class assignments (1s): 4165

Class frequency distribution:
  Class 0: 1765 assignments (42.38%)
  Class 1: 147 assignments (3.53%)
  Class 2: 351 assignments (8.43%)
  Class 3: 617 assignments (14.81%)
  Class 4: 152 assignments (3.65%)
  Class 5: 220 assignments (5.28%)
  Class 6: 162 assignments (3.89%)
  Class 7: 40 assignments (0.96%)
  Class 8: 476 assignments (11.43%)
  Class 9: 235 assignments (5.64%)

Computed pos_weight (for BCEWithLogitsLoss):
  Class 0: 1.0397
  Class 1: 23.4898
  Class 2: 9.2564
  Class 3: 4.8347
  Class 4: 22.6842
  Class 5: 15.3636
  Class 6: 21.2222
  Class 7: 89.0000
  Class 8: 6.5630
  Class 9: 14.3191


tensor([0.7128, 3.1983, 2.3279, 1.7638, 3.1648, 2.7951, 3.1011, 4.4998, 2.0233,
        2.7291])

In [116]:
import torch
import torch.nn as nn

class MLPClassifier(nn.Module):
    def __init__(self, input_dim, num_classes=1):  # num_classes = 1 for binary output
        super().__init__()
        self.__init_args__ = (input_dim,)
        self.__init_kwargs__ = {'num_classes': num_classes}
        self.input_dim = input_dim

        # More expressive feature extractor
        self.feature_net = nn.Sequential(
            nn.Linear(self.input_dim, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
        )

        # Attention mechanism to emphasize important features
        self.attention = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),  # output matches feature dim
            nn.Sigmoid()  # attention weights between 0 and 1
        )

        # Final classifier head
        self.classifier = nn.Sequential(
            nn.Linear(128, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)  # Output logits
        )

    def forward(self, x):
        features = self.feature_net(x)
        attention_weights = torch.sigmoid(self.attention(features))
        weighted_features = features * attention_weights
        logits = self.classifier(weighted_features)
        return logits  # raw logit

    def extract_features(self, x):
        """Intermediate representation before attention and classification"""
        return self.feature_net(x)

In [121]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import average_precision_score
import numpy as np
import copy
import time

class SklearnMLPClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=100, lr=1e-3, batch_size=32, device='cpu', n_classes=1, verbose=True):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        self.device = device
        self.n_classes = n_classes
        self.verbose = verbose
        self.history = []
        self._build_model()

    def _build_model(self):
        self.model = MLPClassifier(input_dim=self.input_dim, num_classes=self.n_classes).to(self.device)
        self.criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(self.device))
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-4)

    from sklearn.model_selection import train_test_split
    from sklearn.metrics import average_precision_score

    def fit(self, X_train, y_train, X_val=None, y_val=None):
        device = torch.device(self.device)
        if self.verbose:
            print(f"\n🖥️  Using device: {device}")
            if device.type == "mps":
                torch.mps.empty_cache()
    
        # Automatically create a validation set if not provided (e.g., when using MultiOutputClassifier)
        if X_val is None or y_val is None:
            X_train, X_val, y_train, y_val = train_test_split(
                X_train, y_train, test_size=0.2, stratify=y_train if y_train.ndim == 1 else None
            )
    
        X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
        y_train = torch.tensor(y_train, dtype=torch.float32, device=device)
        X_val = torch.tensor(X_val, dtype=torch.float32, device=device)
        y_val = torch.tensor(y_val, dtype=torch.float32, device=device)
    
        train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=self.batch_size, shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=self.batch_size, shuffle=False)
    
        scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=0.5, patience=5)
        best_val_map = float('-inf')
        best_model_state = None
        epochs_without_improvement = 0
        patience = 20
        start_time = time.time()
    
        for epoch in range(1, self.epochs + 1):
            self.model.train()
            epoch_loss = 0.0
            num_batches = 0
            all_preds_train, all_targets_train = [], []
    
            for xb, yb in train_loader:
                xb, yb = xb.to(device), yb.to(device)
                self.optimizer.zero_grad()
                logits = self.model(xb)
    
                # Ensure yb has the same shape as logits
                if yb.ndim == 1:
                    yb = yb.unsqueeze(1)
    
                loss = self.criterion(logits, yb.float())
                loss.backward()
                self.optimizer.step()
    
                epoch_loss += loss.item()
                num_batches += 1
    
                preds = torch.sigmoid(logits).detach().cpu()
                all_preds_train.append(preds)
                all_targets_train.append(yb.detach().cpu())
    
            avg_train_loss = epoch_loss / num_batches
            all_preds_train = torch.cat(all_preds_train).numpy()
            all_targets_train = torch.cat(all_targets_train).numpy()
    
            try:
                preds_binary_train = (all_preds_train > 0.5).astype(int)
                train_map = average_precision_score(all_targets_train, preds_binary_train, average="macro")

                # train_map = average_precision_score(all_targets_train, all_preds_train, average="macro")
            except ValueError:
                train_map = float("nan")
    
            # Validation
            self.model.eval()
            val_loss = 0.0
            val_batches = 0
            all_preds_val, all_targets_val = [], []
    
            with torch.no_grad():
                for xb, yb in val_loader:
                    xb, yb = xb.to(device), yb.to(device)
                    logits = self.model(xb)
    
                    if yb.ndim == 1:
                        yb = yb.unsqueeze(1)
    
                    preds = torch.sigmoid(logits)
                    loss = self.criterion(logits, yb.float())
    
                    val_loss += loss.item()
                    val_batches += 1
                    all_preds_val.append(preds.cpu())
                    all_targets_val.append(yb.cpu())
    
            avg_val_loss = val_loss / val_batches
            all_preds_val = torch.cat(all_preds_val).numpy()
            all_targets_val = torch.cat(all_targets_val).numpy()
    
            try:
                preds_binary_val = (all_preds_val > 0.5).astype(int)
                val_map = average_precision_score(all_targets_val, preds_binary_val, average="macro")

                # val_map = average_precision_score(all_targets_val, all_preds_val, average="macro")
            except ValueError:
                val_map = float("nan")
    
            current_lr = self.optimizer.param_groups[0]['lr']
            elapsed = time.time() - start_time
    
            if self.verbose:
                print(f"\n✅ Epoch {epoch}/{self.epochs} — "
                      f"Train Loss: {avg_train_loss:.4f} | Train mAP: {train_map:.4f} "
                      f"| Val Loss: {avg_val_loss:.4f} | Val mAP: {val_map:.4f} "
                      f"| LR: {current_lr:.6f} | Elapsed: {elapsed:.1f}s")
    
            self.history.append({
                "epoch": epoch,
                "train_loss": avg_train_loss,
                "train_map": train_map,
                "val_loss": avg_val_loss,
                "val_map": val_map,
                "lr": current_lr
            })
    
            scheduler.step(avg_val_loss)
    
            if val_map > best_val_map:
                best_val_map = val_map
                best_model_state = copy.deepcopy(self.model.state_dict())
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    if self.verbose:
                        print(f"\n🛑 Early stopping at epoch {epoch}. No val mAP improvement for {patience} epochs.")
                    break
    
            if device.type == "mps":
                torch.mps.empty_cache()
    
        if best_model_state:
            self.model.load_state_dict(best_model_state)
    
        if self.verbose:
            print(f"\n🏁 Training complete. Best val mAP: {best_val_map:.4f}. Total time: {time.time() - start_time:.2f}s")
    
        self.classes_ = [0, 1]
        return self


    def predict(self, X):
        self.model.eval()
        X_tensor = torch.tensor(X, dtype=torch.float32, device=self.device)
        with torch.no_grad():
            logits = self.model(X_tensor)
            probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
            print("logits.shape:", logits.shape)
        return (probs > 0.5).astype(int)

    def predict_proba(self, X):
        self.model.eval()
        X_tensor = torch.tensor(X, dtype=torch.float32, device=self.device)
        with torch.no_grad():
            logits = self.model(X_tensor)
            probs = torch.sigmoid(logits).cpu().numpy()
        return probs

    def score(self, X, y):
        y_pred = self.predict_proba(X)
        return average_precision_score(y, y_pred)

    def get_params(self, deep=True):
        return {
            'input_dim': self.input_dim,
            'epochs': self.epochs,
            'lr': self.lr,
            'batch_size': self.batch_size,
            'device': self.device,
            'n_classes': self.n_classes,
            'verbose': self.verbose
        }

    def set_params(self, **params):
        for k, v in params.items():
            setattr(self, k, v)
        self._build_model()
        return self


In [102]:
from sklearn.multioutput import MultiOutputClassifier

base_clf = SklearnMLPClassifier(input_dim=feature_size, n_classes=1, epochs=200, verbose=True)
multi_clf = MultiOutputClassifier(base_clf)

multi_clf.fit(X_data, y_data)
# y_pred = multi_clf.predict(X_val)



🖥️  Using device: cpu


  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.6037 | Train mAP: 0.6797 | Val Loss: 0.5306 | Val mAP: 0.7996 | LR: 0.001000 | Elapsed: 0.4s

✅ Epoch 2/200 — Train Loss: 0.5405 | Train mAP: 0.7705 | Val Loss: 0.5040 | Val mAP: 0.8367 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 3/200 — Train Loss: 0.5265 | Train mAP: 0.7875 | Val Loss: 0.5096 | Val mAP: 0.8356 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 4/200 — Train Loss: 0.5126 | Train mAP: 0.8065 | Val Loss: 0.4856 | Val mAP: 0.8368 | LR: 0.001000 | Elapsed: 1.2s

✅ Epoch 5/200 — Train Loss: 0.4902 | Train mAP: 0.8237 | Val Loss: 0.4681 | Val mAP: 0.8560 | LR: 0.001000 | Elapsed: 1.4s

✅ Epoch 6/200 — Train Loss: 0.4936 | Train mAP: 0.8216 | Val Loss: 0.4862 | Val mAP: 0.8420 | LR: 0.001000 | Elapsed: 1.7s

✅ Epoch 7/200 — Train Loss: 0.4815 | Train mAP: 0.8343 | Val Loss: 0.4682 | Val mAP: 0.8551 | LR: 0.001000 | Elapsed: 2.0s

✅ Epoch 8/200 — Train Loss: 0.4746 | Train mAP: 0.8387 | Val Loss: 0.4692 | Val mAP: 0.8549 | LR: 0.001000 | Elapsed: 2.3s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.3300 | Train mAP: 0.0376 | Val Loss: 0.2396 | Val mAP: 0.1044 | LR: 0.001000 | Elapsed: 0.4s

✅ Epoch 2/200 — Train Loss: 0.1815 | Train mAP: 0.0629 | Val Loss: 0.1916 | Val mAP: 0.1135 | LR: 0.001000 | Elapsed: 0.8s

✅ Epoch 3/200 — Train Loss: 0.1726 | Train mAP: 0.0731 | Val Loss: 0.1703 | Val mAP: 0.1317 | LR: 0.001000 | Elapsed: 1.0s

✅ Epoch 4/200 — Train Loss: 0.1698 | Train mAP: 0.0628 | Val Loss: 0.1564 | Val mAP: 0.1723 | LR: 0.001000 | Elapsed: 1.3s

✅ Epoch 5/200 — Train Loss: 0.1652 | Train mAP: 0.0706 | Val Loss: 0.1659 | Val mAP: 0.1901 | LR: 0.001000 | Elapsed: 1.6s

✅ Epoch 6/200 — Train Loss: 0.1668 | Train mAP: 0.0745 | Val Loss: 0.1498 | Val mAP: 0.2170 | LR: 0.001000 | Elapsed: 1.9s

✅ Epoch 7/200 — Train Loss: 0.1590 | Train mAP: 0.0979 | Val Loss: 0.1429 | Val mAP: 0.2707 | LR: 0.001000 | Elapsed: 2.2s

✅ Epoch 8/200 — Train Loss: 0.1606 | Train mAP: 0.1131 | Val Loss: 0.1492 | Val mAP: 0.2227 | LR: 0.001000 | Elapsed: 2.4s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.3729 | Train mAP: 0.1528 | Val Loss: 0.3342 | Val mAP: 0.3528 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.2887 | Train mAP: 0.2851 | Val Loss: 0.2998 | Val mAP: 0.3130 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.2836 | Train mAP: 0.3015 | Val Loss: 0.2718 | Val mAP: 0.3900 | LR: 0.001000 | Elapsed: 0.8s

✅ Epoch 4/200 — Train Loss: 0.2686 | Train mAP: 0.3596 | Val Loss: 0.2640 | Val mAP: 0.4229 | LR: 0.001000 | Elapsed: 1.1s

✅ Epoch 5/200 — Train Loss: 0.2514 | Train mAP: 0.3944 | Val Loss: 0.2528 | Val mAP: 0.4820 | LR: 0.001000 | Elapsed: 1.3s

✅ Epoch 6/200 — Train Loss: 0.2477 | Train mAP: 0.4354 | Val Loss: 0.2402 | Val mAP: 0.4817 | LR: 0.001000 | Elapsed: 1.6s

✅ Epoch 7/200 — Train Loss: 0.2369 | Train mAP: 0.4652 | Val Loss: 0.2429 | Val mAP: 0.5026 | LR: 0.001000 | Elapsed: 1.8s

✅ Epoch 8/200 — Train Loss: 0.2411 | Train mAP: 0.4657 | Val Loss: 0.2299 | Val mAP: 0.5356 | LR: 0.001000 | Elapsed: 2.1s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.5057 | Train mAP: 0.2166 | Val Loss: 0.4329 | Val mAP: 0.4335 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.4217 | Train mAP: 0.3307 | Val Loss: 0.3771 | Val mAP: 0.5157 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.3965 | Train mAP: 0.4011 | Val Loss: 0.3622 | Val mAP: 0.5121 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 4/200 — Train Loss: 0.3934 | Train mAP: 0.3905 | Val Loss: 0.3688 | Val mAP: 0.5112 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 5/200 — Train Loss: 0.3722 | Train mAP: 0.4481 | Val Loss: 0.3520 | Val mAP: 0.5409 | LR: 0.001000 | Elapsed: 1.1s

✅ Epoch 6/200 — Train Loss: 0.3719 | Train mAP: 0.4518 | Val Loss: 0.3467 | Val mAP: 0.5443 | LR: 0.001000 | Elapsed: 1.4s

✅ Epoch 7/200 — Train Loss: 0.3682 | Train mAP: 0.4546 | Val Loss: 0.3664 | Val mAP: 0.5358 | LR: 0.001000 | Elapsed: 1.6s

✅ Epoch 8/200 — Train Loss: 0.3683 | Train mAP: 0.4605 | Val Loss: 0.3600 | Val mAP: 0.5246 | LR: 0.001000 | Elapsed: 1.9s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.2835 | Train mAP: 0.0432 | Val Loss: 0.2162 | Val mAP: 0.0997 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.1856 | Train mAP: 0.0591 | Val Loss: 0.2025 | Val mAP: 0.0736 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.1770 | Train mAP: 0.0747 | Val Loss: 0.1877 | Val mAP: 0.1029 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 4/200 — Train Loss: 0.1705 | Train mAP: 0.0869 | Val Loss: 0.1702 | Val mAP: 0.1164 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 5/200 — Train Loss: 0.1733 | Train mAP: 0.0888 | Val Loss: 0.1818 | Val mAP: 0.1054 | LR: 0.001000 | Elapsed: 1.1s

✅ Epoch 6/200 — Train Loss: 0.1609 | Train mAP: 0.1074 | Val Loss: 0.1718 | Val mAP: 0.1382 | LR: 0.001000 | Elapsed: 1.4s

✅ Epoch 7/200 — Train Loss: 0.1664 | Train mAP: 0.0949 | Val Loss: 0.1666 | Val mAP: 0.1237 | LR: 0.001000 | Elapsed: 1.7s

✅ Epoch 8/200 — Train Loss: 0.1605 | Train mAP: 0.1258 | Val Loss: 0.1671 | Val mAP: 0.1268 | LR: 0.001000 | Elapsed: 2.0s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.3748 | Train mAP: 0.0663 | Val Loss: 0.2897 | Val mAP: 0.1561 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.2324 | Train mAP: 0.1092 | Val Loss: 0.2312 | Val mAP: 0.2239 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.2301 | Train mAP: 0.1261 | Val Loss: 0.2180 | Val mAP: 0.2004 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 4/200 — Train Loss: 0.2198 | Train mAP: 0.1356 | Val Loss: 0.2189 | Val mAP: 0.2293 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 5/200 — Train Loss: 0.2095 | Train mAP: 0.1991 | Val Loss: 0.2076 | Val mAP: 0.2776 | LR: 0.001000 | Elapsed: 1.1s

✅ Epoch 6/200 — Train Loss: 0.2042 | Train mAP: 0.2143 | Val Loss: 0.1908 | Val mAP: 0.2598 | LR: 0.001000 | Elapsed: 1.3s

✅ Epoch 7/200 — Train Loss: 0.1942 | Train mAP: 0.3042 | Val Loss: 0.1867 | Val mAP: 0.3268 | LR: 0.001000 | Elapsed: 1.5s

✅ Epoch 8/200 — Train Loss: 0.1927 | Train mAP: 0.2928 | Val Loss: 0.1820 | Val mAP: 0.3227 | LR: 0.001000 | Elapsed: 1.8s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.3046 | Train mAP: 0.0551 | Val Loss: 0.2539 | Val mAP: 0.1435 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.1896 | Train mAP: 0.0883 | Val Loss: 0.1815 | Val mAP: 0.2246 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.1850 | Train mAP: 0.0733 | Val Loss: 0.1883 | Val mAP: 0.2275 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 4/200 — Train Loss: 0.1796 | Train mAP: 0.0939 | Val Loss: 0.1647 | Val mAP: 0.2445 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 5/200 — Train Loss: 0.1707 | Train mAP: 0.1174 | Val Loss: 0.1572 | Val mAP: 0.2671 | LR: 0.001000 | Elapsed: 1.1s

✅ Epoch 6/200 — Train Loss: 0.1722 | Train mAP: 0.1203 | Val Loss: 0.1627 | Val mAP: 0.2945 | LR: 0.001000 | Elapsed: 1.4s

✅ Epoch 7/200 — Train Loss: 0.1617 | Train mAP: 0.1537 | Val Loss: 0.1551 | Val mAP: 0.2241 | LR: 0.001000 | Elapsed: 1.6s

✅ Epoch 8/200 — Train Loss: 0.1514 | Train mAP: 0.2118 | Val Loss: 0.1550 | Val mAP: 0.2105 | LR: 0.001000 | Elapsed: 1.9s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.2310 | Train mAP: 0.0133 | Val Loss: 0.1504 | Val mAP: 0.0307 | LR: 0.001000 | Elapsed: 0.3s

✅ Epoch 2/200 — Train Loss: 0.0802 | Train mAP: 0.0119 | Val Loss: 0.0898 | Val mAP: 0.0210 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.0736 | Train mAP: 0.0125 | Val Loss: 0.0811 | Val mAP: 0.1292 | LR: 0.001000 | Elapsed: 0.8s

✅ Epoch 4/200 — Train Loss: 0.0734 | Train mAP: 0.0108 | Val Loss: 0.0722 | Val mAP: 0.0329 | LR: 0.001000 | Elapsed: 1.0s

✅ Epoch 5/200 — Train Loss: 0.0679 | Train mAP: 0.0186 | Val Loss: 0.0682 | Val mAP: 0.0204 | LR: 0.001000 | Elapsed: 1.3s

✅ Epoch 6/200 — Train Loss: 0.0638 | Train mAP: 0.0255 | Val Loss: 0.0753 | Val mAP: 0.0185 | LR: 0.001000 | Elapsed: 1.5s

✅ Epoch 7/200 — Train Loss: 0.0635 | Train mAP: 0.0214 | Val Loss: 0.0618 | Val mAP: 0.0244 | LR: 0.001000 | Elapsed: 1.8s

✅ Epoch 8/200 — Train Loss: 0.0645 | Train mAP: 0.0293 | Val Loss: 0.0611 | Val mAP: 0.0236 | LR: 0.001000 | Elapsed: 2.0s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.3146 | Train mAP: 0.7444 | Val Loss: 0.1815 | Val mAP: 0.8617 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.1803 | Train mAP: 0.8205 | Val Loss: 0.1531 | Val mAP: 0.8513 | LR: 0.001000 | Elapsed: 0.4s

✅ Epoch 3/200 — Train Loss: 0.1590 | Train mAP: 0.8468 | Val Loss: 0.1641 | Val mAP: 0.8324 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 4/200 — Train Loss: 0.1508 | Train mAP: 0.8592 | Val Loss: 0.1671 | Val mAP: 0.8182 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 5/200 — Train Loss: 0.1482 | Train mAP: 0.8630 | Val Loss: 0.1661 | Val mAP: 0.7987 | LR: 0.001000 | Elapsed: 1.1s

✅ Epoch 6/200 — Train Loss: 0.1413 | Train mAP: 0.8752 | Val Loss: 0.1495 | Val mAP: 0.8452 | LR: 0.001000 | Elapsed: 1.4s

✅ Epoch 7/200 — Train Loss: 0.1310 | Train mAP: 0.8851 | Val Loss: 0.1422 | Val mAP: 0.8572 | LR: 0.001000 | Elapsed: 1.6s

✅ Epoch 8/200 — Train Loss: 0.1328 | Train mAP: 0.8846 | Val Loss: 0.1468 | Val mAP: 0.8422 | LR: 0.001000 | Elapsed: 1.9s

✅ Epoch

  X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
  X_val = torch.tensor(X_val, dtype=torch.float32, device=device)



✅ Epoch 1/200 — Train Loss: 0.3272 | Train mAP: 0.0874 | Val Loss: 0.2675 | Val mAP: 0.2120 | LR: 0.001000 | Elapsed: 0.2s

✅ Epoch 2/200 — Train Loss: 0.2423 | Train mAP: 0.1124 | Val Loss: 0.2251 | Val mAP: 0.2686 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 3/200 — Train Loss: 0.2297 | Train mAP: 0.1520 | Val Loss: 0.2079 | Val mAP: 0.2547 | LR: 0.001000 | Elapsed: 0.7s

✅ Epoch 4/200 — Train Loss: 0.2291 | Train mAP: 0.1674 | Val Loss: 0.2121 | Val mAP: 0.2492 | LR: 0.001000 | Elapsed: 0.9s

✅ Epoch 5/200 — Train Loss: 0.2194 | Train mAP: 0.1932 | Val Loss: 0.2070 | Val mAP: 0.2698 | LR: 0.001000 | Elapsed: 1.2s

✅ Epoch 6/200 — Train Loss: 0.2137 | Train mAP: 0.2115 | Val Loss: 0.2001 | Val mAP: 0.2514 | LR: 0.001000 | Elapsed: 1.4s

✅ Epoch 7/200 — Train Loss: 0.2120 | Train mAP: 0.2313 | Val Loss: 0.2032 | Val mAP: 0.2995 | LR: 0.001000 | Elapsed: 1.7s

✅ Epoch 8/200 — Train Loss: 0.2036 | Train mAP: 0.2536 | Val Loss: 0.1963 | Val mAP: 0.3114 | LR: 0.001000 | Elapsed: 2.2s

✅ Epoch

In [111]:
from joblib import Parallel, delayed
import torch
import json

def create_test_features(path, n_jobs=4):
    # Load test data
    with open(path, 'r') as f:
        test_json = eval(f.read())  # or use json.load(f) if it's valid JSON

    keys = list(test_json)

    # Parallel feature extraction
    X = Parallel(n_jobs=n_jobs)(
        delayed(features)(key) for key in keys
    )
    
    # Convert to tensor format
    X = torch.stack([torch.tensor(x, dtype=torch.float32) for x in X])

    return keys, X


In [112]:
keys, X = create_test_features(dataroot3+"/test.json", n_jobs=4)
y_pred = multi_clf.predict(X.numpy())  # MultiOutputClassifier expects NumPy

# add code to print to predictions3.json. format should be key:prediction converted back using an index to tag 

KeyboardInterrupt: 

In [105]:
y_pred.shape

(1000, 10)

In [106]:
# Convert multi-hot predictions to tag lists
predictions = {}
for key, pred_vector in zip(keys, y_pred):
    tag_list = [TAGS[i] for i, val in enumerate(pred_vector) if val == 1]
    predictions[key] = tag_list

# Save predictions to predictions3.json
output_path = "predictions3.json"
with open(output_path, "w") as f:
    json.dump(predictions, f, indent=2)

print(f"✅ Predictions written to {output_path}")


✅ Predictions written to predictions3.json


LGBM classifier

In [124]:
from lightgbm import LGBMClassifier

# Wrap LightGBM in MultiOutputClassifier

base_model = LGBMClassifier(
    objective='binary',  # each output is binary (0 or 1)
    n_estimators=100,
    learning_rate=0.1
)

model = MultiOutputClassifier(base_model)
model.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 1765, number of negative: 1835
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.139361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 572220
[LightGBM] [Info] Number of data points in the train set: 3600, number of used features: 2244
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.490278 -> initscore=-0.038894
[LightGBM] [Info] Start training from score -0.038894
[LightGBM] [Info] Number of positive: 147, number of negative: 3453
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.043263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 572220
[LightGBM] [Info] Number of data points in the train set: 3600, number of used features: 2244
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.040833 -> initscore=-3.156566
[LightGBM] [Info] Start training from score -3.156566
[LightGBM] 

In [125]:
keys, X = create_test_features(dataroot3+"/test.json", n_jobs=4)
y_pred = model.predict(X.numpy())  # MultiOutputClassifier expects NumPy
# add code to print to predictions3.json. format should be key:prediction converted back using an index to tag 

  X = torch.stack([torch.tensor(x, dtype=torch.float32) for x in X])


In [126]:
y_pred.shape

(1000, 10)

In [127]:
# Convert multi-hot predictions to tag lists
predictions = {}
for key, pred_vector in zip(keys, y_pred):
    tag_list = [TAGS[i] for i, val in enumerate(pred_vector) if val == 1]
    predictions[key] = tag_list

# Save predictions to predictions3.json
output_path = "predictions3.json"
with open(output_path, "w") as f:
    json.dump(predictions, f, indent=2)

print(f"✅ Predictions written to {output_path}")

✅ Predictions written to predictions3.json


Custom MLP classifier

In [122]:
model = SklearnMLPClassifier(input_dim=feature_size, n_classes=10, epochs=200, verbose=True)
model.fit(X_train, y_train, X_val_global, y_val_global)


🖥️  Using device: cpu

✅ Epoch 1/200 — Train Loss: 0.5250 | Train mAP: 0.1530 | Val Loss: 0.4342 | Val mAP: 0.2036 | LR: 0.001000 | Elapsed: 0.5s

✅ Epoch 2/200 — Train Loss: 0.4214 | Train mAP: 0.1813 | Val Loss: 0.4036 | Val mAP: 0.2281 | LR: 0.001000 | Elapsed: 1.0s

✅ Epoch 3/200 — Train Loss: 0.4012 | Train mAP: 0.1990 | Val Loss: 0.3959 | Val mAP: 0.2376 | LR: 0.001000 | Elapsed: 1.6s

✅ Epoch 4/200 — Train Loss: 0.3936 | Train mAP: 0.2083 | Val Loss: 0.3894 | Val mAP: 0.2218 | LR: 0.001000 | Elapsed: 2.3s

✅ Epoch 5/200 — Train Loss: 0.3784 | Train mAP: 0.2172 | Val Loss: 0.3935 | Val mAP: 0.2382 | LR: 0.001000 | Elapsed: 2.9s

✅ Epoch 6/200 — Train Loss: 0.3792 | Train mAP: 0.2176 | Val Loss: 0.3840 | Val mAP: 0.2377 | LR: 0.001000 | Elapsed: 3.5s

✅ Epoch 7/200 — Train Loss: 0.3736 | Train mAP: 0.2238 | Val Loss: 0.3800 | Val mAP: 0.2400 | LR: 0.001000 | Elapsed: 3.9s

✅ Epoch 8/200 — Train Loss: 0.3654 | Train mAP: 0.2324 | Val Loss: 0.3781 | Val mAP: 0.2459 | LR: 0.001000 |

In [119]:
keys, X = create_test_features(dataroot3+"/test.json", n_jobs=4)
y_pred = model.predict(X.numpy())

logits.shape: torch.Size([1000, 10])


  X = torch.stack([torch.tensor(x, dtype=torch.float32) for x in X])


In [120]:
# Convert multi-hot predictions to tag lists
predictions = {}
for key, pred_vector in zip(keys, y_pred):
    tag_list = [TAGS[i] for i, val in enumerate(pred_vector) if val == 1]
    predictions[key] = tag_list

# Save predictions to predictions3.json
output_path = "predictions3.json"
with open(output_path, "w") as f:
    json.dump(predictions, f, indent=2)

print(f"✅ Predictions written to {output_path}")


✅ Predictions written to predictions3.json
