In [1]:
import numpy as np
import random
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from data import BirdClefTrainAudio, BirdClefHarmonics
from model import HarmonicModel
from mimir import training
import time
import pickle
import os

In [2]:
np.random.seed(20250403)
random.seed(20250403)
torch.manual_seed(20250403)
torch.cuda.manual_seed(20250403)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [3]:
data_folder = "data"

In [4]:
audio = BirdClefTrainAudio(data_folder, max_duration=5, sr=16000)
cachefile="data.pkl"
if os.path.isfile(cachefile):
    with open(cachefile, "rb") as f:
        ds = pickle.load(f)
else:
    start = time.time()
    ds = BirdClefHarmonics(audio, fmin=500, fmax=4000)
    print(f"Loading data took {time.time()-start} seconds")
    with open(cachefile, "wb") as f:
        pickle.dump(ds, f)

In [5]:
loss = nn.CrossEntropyLoss(weight=torch.tensor(audio.label_weights()).to(training.DEVICE))

In [6]:
def accuracy(preds, y):
    return sum(preds.argmax(1) == y)

In [7]:
hps = training.HyperParameters(model_params={'n_labels': audio.n_labels, 'n_harmonics': 10,
                                             "num_filter_maps": 64, "kernel_size": 128, "hidden": 128, "dropout": 0.5}, 
                               optimizer_params={'lr': 2e-4})

In [8]:
results, model = training.train(data=ds, model_class = HarmonicModel, hyper_params=hps, loss_fn = loss,
                                name="harmonicmodel", pad=True, batch_size=16, metrics={"Accuracy": accuracy})

Epoch   0: Loss=5.28267 val, 5.27317 train Accuracy=0.40896 val 0.29812 train 
Epoch   1: Loss=5.27517 val, 5.26048 train Accuracy=0.39216 val 0.34044 train 
Epoch   2: Loss=5.26877 val, 5.25006 train Accuracy=0.44538 val 0.43504 train 
Epoch   3: Loss=5.26397 val, 5.24278 train Accuracy=0.42857 val 0.46678 train 
Epoch   4: Loss=5.25931 val, 5.23502 train Accuracy=0.46779 val 0.49541 train 
Epoch   5: Loss=5.25813 val, 5.22990 train Accuracy=0.51261 val 0.52342 train 
Epoch   6: Loss=5.25652 val, 5.22516 train Accuracy=0.38375 val 0.43940 train 
Epoch   7: Loss=5.25296 val, 5.22118 train Accuracy=0.48459 val 0.51284 train 
Epoch   8: Loss=5.25020 val, 5.21684 train Accuracy=0.45658 val 0.42010 train 
Epoch   9: Loss=5.24787 val, 5.21458 train Accuracy=0.47339 val 0.47923 train 
Epoch  10: Loss=5.24730 val, 5.20949 train Accuracy=0.40056 val 0.40766 train 
Epoch  11: Loss=5.24603 val, 5.20714 train Accuracy=0.41176 val 0.40703 train 
Epoch  12: Loss=5.24494 val, 5.20524 train Accuracy=