In [1]:
import torch
from transformers import ASTModel
from sklearn.metrics import accuracy_score, roc_auc_score
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import tqdm
import pickle
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [None]:
class EffectClassifier(torch.nn.Module):
    def __init__(self, n_classes, embed_dim=768):
        super(EffectClassifier, self).__init__()
        self.cnn = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1)),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            torch.nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1)),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            torch.nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1)),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            torch.nn.Flatten()
        )
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(128 * 1764, embed_dim),  # Adjust input size to match flattened output
            torch.nn.ReLU(),
            torch.nn.LayerNorm(embed_dim),
            torch.nn.Linear(embed_dim, embed_dim),
            torch.nn.ReLU(),
            torch.nn.LayerNorm(embed_dim),
            torch.nn.Linear(embed_dim, embed_dim),
        )
        self.attn = torch.nn.MultiheadAttention(embed_dim * 2, num_heads=2, dropout=.1, batch_first=True)
        self.fc = torch.nn.Linear(embed_dim * 2, embed_dim)
        self.cls = torch.nn.Linear(embed_dim, n_classes)
    def forward(self, x_wet, x_dry):
        x_wet = self.cnn(x_wet.unsqueeze(1))  # Adjust unsqueeze dimension
        x_dry = self.cnn(x_dry.unsqueeze(1))  # Adjust unsqueeze dimension
        x_wet = self.mlp(x_wet)
        x_dry = self.mlp(x_dry)
        x = torch.cat([x_wet, x_dry], dim=1)
        x, _ = self.attn(x, x, x)  # Unpack attn output
        x = self.cls(self.fc(x))
        return x

In [3]:
from dataset.data_generator import DataGenerator
from pedalboard import Chorus, Reverb, Delay, Distortion, Gain
effects_parameters = {
    "Reverb": {
        "room_size": (0, 1),
        "damping": (0, 1),
        "wet_level": (0, 1),
        "dry_level": (0, 1),
        "width": (0, 1),
        "freeze_mode": (0, 1)
    },
    "Delay": {
        "delay_seconds": (0, 2),
        "feedback": (0, 1),
        "mix": (0, 1)
    },
    "Gain": {
        "gain_db": (-60, 24)
    },
    "Chorus": {
        "rate_hz": (0.1, 5.0),
        "depth": (0, 1),
        "centre_delay_ms": (0, 50),
        "feedback": (-1, 1),
        "mix": (0, 1)
    },
    "Distortion": {
        "drive_db": (0, 60)
    }
    }

effects = [Chorus, Reverb, Delay, Gain, Distortion]

generator = DataGenerator(effects_parameters, effects)

In [4]:
import json
import pandas as pd

with open('data/nsynth-train.jsonwav/nsynth-train/examples.json', 'r') as f:
    data = json.load(f)
df = pd.DataFrame.from_records(data)
df = df.T
guitar_df = df[df['instrument_family_str'] == 'guitar']
elctric_guitar_df = guitar_df[guitar_df['instrument_source_str'] == "electronic"]
elctric_guitar_df = elctric_guitar_df.sample(1000)
dry_tones = [dry_tone + ".wav" for dry_tone in elctric_guitar_df['note_str'].tolist()]

In [5]:
dataset = generator.create_data(10, 'data/nsynth-train.jsonwav/nsynth-train/audio',dry_tones=dry_tones,max_chain_length=1)

100%|██████████| 1000/1000 [00:33<00:00, 30.13it/s]


In [6]:
train_data, test_data = train_test_split(dataset, test_size=0.2)
test_data, val_data = train_test_split(test_data, test_size=0.5)
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
test_loader = DataLoader(test_data, batch_size=4, shuffle=False)
val_loader = DataLoader(val_data, batch_size=4, shuffle=False)

In [7]:
def eval(model, loss_fn, dl):
    model.eval()
    total_loss = 0
    labels = []
    labels_ = []
    preds = []
    logits = []
    for batch in tqdm.tqdm(dl):
        wet_features = batch['wet_tone_features'].to(device)
        dry_features = batch['dry_tone_features'].to(device)
        label = batch['effects'].to(device)
        with torch.no_grad():
            logits_ = model(wet_features, dry_features)
        loss = loss_fn(logits_, label)
        total_loss += loss.item()
        for i in range(logits_.shape[0]):
            preds.append(torch.argmax(logits_[i], dim=0).cpu().numpy())
            labels.append(torch.argmax(label[i], dim=0).cpu().numpy())
            labels_.append(torch.nn.functional.one_hot(torch.argmax(label[i], dim=0), num_classes=5).cpu().numpy())
            logits.append(logits_[i].cpu().numpy())
    loss = total_loss
    print(f"Test: Accuracy:{accuracy_score(labels, preds)} | AUROC: {roc_auc_score(labels_, logits)} | Total Loss:{total_loss}")
    return loss

In [8]:
def train(model, optimizer, loss_fn, train_loader,test_loader,lr_scheduler, epochs=10):
    model.train()
    min_loss = 99999999
    labels = []
    labels_ = []
    preds = []
    logits = []
    for epoch in range(epochs):
        total_loss = 0
        for batch in tqdm.tqdm(train_loader):
            optimizer.zero_grad()
            wet_features = batch['wet_tone_features'].to(device)
            dry_features = batch['dry_tone_features'].to(device)
            label = batch['effects'].to(device)
            output = model(wet_features,dry_features)
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            for i in range(output.shape[0]):
                preds.append(torch.argmax(output[i], dim=0).detach().cpu().numpy())
                labels.append(torch.argmax(label[i], dim=0).detach().cpu().numpy())
                labels_.append(torch.nn.functional.one_hot(torch.argmax(label[i], dim=0), num_classes=5).detach().cpu().numpy())
                logits.append(output[i].detach().cpu().numpy())
        print(f"Train: Epoch {epoch+1} | Accuracy: {accuracy_score(labels,preds)} | AUROC: {roc_auc_score(labels_,logits)} | Loss: {total_loss}")
        loss = eval(model, loss_fn, test_loader)
        lr_scheduler.step(loss)
        if loss < min_loss:
            print(f"saving model at epoch {epoch+1}")
            min_loss = loss
            torch.save(model.state_dict(), "saved_models/multiclass_model.pth")
    return

In [9]:
model = EffectClassifier(5).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=.000002)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, verbose=True)
loss_fn = torch.nn.CrossEntropyLoss()



In [10]:
train(model, optimizer, loss_fn, train_loader, test_loader,scheduler, epochs=20)

100%|██████████| 2000/2000 [03:56<00:00,  8.44it/s]


Train: Epoch 1 | Accuracy: 0.47975 | AUROC: 0.7641257337047731 | Loss: 2327.3662753999233


100%|██████████| 250/250 [00:05<00:00, 48.13it/s]


Test: Accuracy:0.605 | AUROC: 0.8405208019501714 | Total Loss:219.37120484560728
saving model at epoch 1


100%|██████████| 2000/2000 [03:53<00:00,  8.56it/s]


Train: Epoch 2 | Accuracy: 0.594125 | AUROC: 0.820378244181706 | Loss: 1436.5103344139643


100%|██████████| 250/250 [00:05<00:00, 48.30it/s]


Test: Accuracy:0.785 | AUROC: 0.9063004219655728 | Total Loss:146.46221427666023
saving model at epoch 2


100%|██████████| 2000/2000 [03:53<00:00,  8.56it/s]


Train: Epoch 3 | Accuracy: 0.6649166666666667 | AUROC: 0.8490211266843231 | Loss: 1033.6475739325397


100%|██████████| 250/250 [00:05<00:00, 48.97it/s]


Test: Accuracy:0.819 | AUROC: 0.9168070894551693 | Total Loss:117.70435094553977
saving model at epoch 3


100%|██████████| 2000/2000 [03:53<00:00,  8.58it/s]


Train: Epoch 4 | Accuracy: 0.7075 | AUROC: 0.8673932065244584 | Loss: 857.264041812974


100%|██████████| 250/250 [00:05<00:00, 47.94it/s]


Test: Accuracy:0.859 | AUROC: 0.9268608151814741 | Total Loss:98.57876202929765
saving model at epoch 4


100%|██████████| 2000/2000 [03:54<00:00,  8.54it/s]


Train: Epoch 5 | Accuracy: 0.7386 | AUROC: 0.8799275507938124 | Loss: 703.027470884932


100%|██████████| 250/250 [00:05<00:00, 47.65it/s]


Test: Accuracy:0.863 | AUROC: 0.9456443549456036 | Total Loss:91.146695549367
saving model at epoch 5


100%|██████████| 2000/2000 [03:53<00:00,  8.56it/s]


Train: Epoch 6 | Accuracy: 0.7635 | AUROC: 0.8885772313050179 | Loss: 616.1245798440068


100%|██████████| 250/250 [00:05<00:00, 48.58it/s]


Test: Accuracy:0.887 | AUROC: 0.9493194666155995 | Total Loss:82.30676212569233
saving model at epoch 6


100%|██████████| 2000/2000 [03:53<00:00,  8.56it/s]


Train: Epoch 7 | Accuracy: 0.7823571428571429 | AUROC: 0.8958620031142367 | Loss: 555.7871979666525


100%|██████████| 250/250 [00:05<00:00, 47.79it/s]


Test: Accuracy:0.892 | AUROC: 0.947817721701839 | Total Loss:75.57696102559566
saving model at epoch 7


100%|██████████| 2000/2000 [03:54<00:00,  8.53it/s]


Train: Epoch 8 | Accuracy: 0.798515625 | AUROC: 0.9016017421996102 | Loss: 495.0430393658753


100%|██████████| 250/250 [00:05<00:00, 48.29it/s]


Test: Accuracy:0.907 | AUROC: 0.95238541703663 | Total Loss:70.83886298639118
saving model at epoch 8


100%|██████████| 2000/2000 [03:54<00:00,  8.52it/s]


Train: Epoch 9 | Accuracy: 0.8112361111111112 | AUROC: 0.906441195056195 | Loss: 460.4503088057754


100%|██████████| 250/250 [00:05<00:00, 47.35it/s]


Test: Accuracy:0.878 | AUROC: 0.9437237561596126 | Total Loss:77.39265092008281


100%|██████████| 2000/2000 [03:54<00:00,  8.52it/s]


Train: Epoch 10 | Accuracy: 0.822525 | AUROC: 0.9106188012995465 | Loss: 415.40767438795683


100%|██████████| 250/250 [00:05<00:00, 48.11it/s]


Test: Accuracy:0.89 | AUROC: 0.9570848309708024 | Total Loss:76.94627921532083


100%|██████████| 2000/2000 [03:54<00:00,  8.53it/s]


Train: Epoch 11 | Accuracy: 0.832284090909091 | AUROC: 0.9143696692700786 | Loss: 379.84724001528775


100%|██████████| 250/250 [00:05<00:00, 47.94it/s]


Test: Accuracy:0.904 | AUROC: 0.9581524531412111 | Total Loss:75.04134442388886


100%|██████████| 2000/2000 [03:54<00:00,  8.52it/s]


Train: Epoch 12 | Accuracy: 0.8413854166666667 | AUROC: 0.9175363564333106 | Loss: 318.85044758519143


100%|██████████| 250/250 [00:05<00:00, 47.41it/s]


Test: Accuracy:0.908 | AUROC: 0.9556451573702403 | Total Loss:66.99143869107502
saving model at epoch 12


100%|██████████| 2000/2000 [03:55<00:00,  8.51it/s]


Train: Epoch 13 | Accuracy: 0.8492692307692308 | AUROC: 0.9204272129711185 | Loss: 296.9192143608889


100%|██████████| 250/250 [00:02<00:00, 90.41it/s] 


Test: Accuracy:0.915 | AUROC: 0.9556221810014573 | Total Loss:66.74520796797879
saving model at epoch 13


100%|██████████| 2000/2000 [03:59<00:00,  8.34it/s]


Train: Epoch 14 | Accuracy: 0.8563125 | AUROC: 0.923022711086173 | Loss: 276.194009509396


100%|██████████| 250/250 [00:05<00:00, 47.36it/s]


Test: Accuracy:0.907 | AUROC: 0.9579265238797156 | Total Loss:69.99895143139156


100%|██████████| 2000/2000 [03:55<00:00,  8.51it/s]


Train: Epoch 15 | Accuracy: 0.862625 | AUROC: 0.9252555852104116 | Loss: 265.3064521079383


100%|██████████| 250/250 [00:05<00:00, 47.32it/s]


Test: Accuracy:0.912 | AUROC: 0.9590125767285308 | Total Loss:67.14544354036661


100%|██████████| 2000/2000 [03:52<00:00,  8.60it/s]


Train: Epoch 16 | Accuracy: 0.86828125 | AUROC: 0.927286458754567 | Loss: 244.48767403190323


100%|██████████| 250/250 [00:05<00:00, 47.30it/s]


Test: Accuracy:0.907 | AUROC: 0.9547649163209115 | Total Loss:72.75404021641225


100%|██████████| 2000/2000 [03:55<00:00,  8.50it/s]


Train: Epoch 17 | Accuracy: 0.8736029411764706 | AUROC: 0.9291890082095671 | Loss: 221.1672965685575


100%|██████████| 250/250 [00:05<00:00, 47.23it/s]


Test: Accuracy:0.91 | AUROC: 0.9551390581328038 | Total Loss:72.18898732495018


100%|██████████| 2000/2000 [03:55<00:00,  8.50it/s]


Train: Epoch 18 | Accuracy: 0.8784166666666666 | AUROC: 0.9309046008028286 | Loss: 207.38291606088478


100%|██████████| 250/250 [00:05<00:00, 47.28it/s]


Test: Accuracy:0.909 | AUROC: 0.9562284416450124 | Total Loss:76.53139106442723


100%|██████████| 2000/2000 [03:55<00:00,  8.50it/s]


Train: Epoch 19 | Accuracy: 0.8828026315789473 | AUROC: 0.9324792916107901 | Loss: 201.06766048076042


100%|██████████| 250/250 [00:05<00:00, 47.27it/s]


Test: Accuracy:0.901 | AUROC: 0.9579326912720483 | Total Loss:75.51038340871219


100%|██████████| 2000/2000 [03:55<00:00,  8.51it/s]


Train: Epoch 20 | Accuracy: 0.88705625 | AUROC: 0.9339888047120134 | Loss: 174.89882247113474


100%|██████████| 250/250 [00:05<00:00, 47.44it/s]


Test: Accuracy:0.905 | AUROC: 0.9571179200925372 | Total Loss:76.3948657559149


In [11]:
eval(model, loss_fn, val_loader)

100%|██████████| 250/250 [00:06<00:00, 39.86it/s]

Test: Accuracy:0.776 | AUROC: 0.9073032759029969 | Total Loss:200.8124302340484





200.8124302340484