In [1]:
import numpy as np
import random
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from data import BirdClefTrainAudio, BirdClefHarmonics
from model import HarmonicModel
from mimir import training
import time
import pickle
import os

In [2]:
np.random.seed(20250403)
random.seed(20250403)
torch.manual_seed(20250403)
torch.cuda.manual_seed(20250403)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [3]:
data_folder = "data"

In [4]:
audio = BirdClefTrainAudio(data_folder, max_duration=5, sr=16000)
cachefile="data.pkl"
if os.path.isfile(cachefile):
    with open(cachefile, "rb") as f:
        ds = pickle.load(f)
else:
    start = time.time()
    ds = BirdClefHarmonics(audio, fmin=500, fmax=4000)
    print(f"Loading data took {time.time()-start} seconds")
    with open(cachefile, "wb") as f:
        pickle.dump(ds, f)

In [5]:
loss = nn.CrossEntropyLoss(weight=torch.tensor(audio.label_weights()).to(training.DEVICE))

In [6]:
def accuracy(preds, y):
    return sum(preds.argmax(1) == y)

In [7]:
hps = training.HyperParameters(model_params={'n_labels': audio.n_labels, 'n_harmonics': 10,
                                             "num_filter_maps": 64, "kernel_size": 128, "hidden": 128}, 
                               optimizer_params={'lr': 1e-3})

In [8]:
results, model = training.train(data=ds, model_class = HarmonicModel, hyper_params=hps, loss_fn = loss,
                                name="convmodel", pad=True, batch_size=16, metrics={"Accuracy": accuracy})

Epoch   0: Loss=5.28320 val, 5.27421 train Accuracy=0.31653 val 0.34853 train 
Epoch   1: Loss=5.28194 val, 5.26336 train Accuracy=0.36415 val 0.36160 train 
Epoch   2: Loss=5.27255 val, 5.24881 train Accuracy=0.34734 val 0.41435 train 
Epoch   3: Loss=5.26859 val, 5.24209 train Accuracy=0.34174 val 0.38961 train 
Epoch   4: Loss=5.26385 val, 5.23843 train Accuracy=0.30812 val 0.38961 train 
Epoch   5: Loss=5.26144 val, 5.22988 train Accuracy=0.31373 val 0.37156 train 
Epoch   6: Loss=5.26240 val, 5.22605 train Accuracy=0.35854 val 0.37654 train 
Epoch   7: Loss=5.25970 val, 5.22256 train Accuracy=0.36415 val 0.41139 train 
Epoch   8: Loss=5.26111 val, 5.22101 train Accuracy=0.33053 val 0.37778 train 
Epoch   9: Loss=5.25427 val, 5.21586 train Accuracy=0.34734 val 0.37405 train 
Epoch  10: Loss=5.25165 val, 5.21123 train Accuracy=0.35294 val 0.37031 train 
Epoch  11: Loss=5.25038 val, 5.21096 train Accuracy=0.33333 val 0.33422 train 
Epoch  12: Loss=5.25259 val, 5.21058 train Accuracy=