In [1]:
import numpy as np
import librosa
import torch
import pandas as pd
import torchaudio
from torchvision import datasets
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.optim import Adam
from sklearn.metrics import f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
class BirdClefDataset(Dataset):
    def __init__(self, df, target_sample_rate, duration):
        self.audio_paths = df['filename'].values
        self.labels = df['primary_label_encoded'].values
        self.target_sample_rate = target_sample_rate
        self.num_samples = target_sample_rate * duration
    
    def __len__(self):
        return len(self.audio_paths)
    
    def __getitem__(self, index):
        audio_path = f"data/{self.audio_paths[index]}"
        signal, sr = torchaudio.load(audio_path)

        # Check if our sample rate is the same as the target sameple rate. If not, resample
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            signal = resampler(signal)
        
        # Check shape and verify it is correct
        if signal.shape[0] > 1:
            signal = torch.mean(signal, axis=0, keepdim=True)
        
        # Check the number of samples and pad/truncate as needed
        if signal.shape[1] > self.num_samples:
            signal = signal[:, :self.num_samples]
        
        elif signal.shape[1] < self.num_samples:
            num_missing_samples = self.num_samples - signal.shape[1]
            last_dim_padding = (0, num_missing_samples)
            signal = F.pad(signal, last_dim_padding)
        
        label = torch.tensor(self.labels[index])

        return signal, label

In [17]:
df = pd.read_csv('data/train_metadata.csv')

encoder = LabelEncoder()
df['primary_label_encoded'] = encoder.fit_transform(df['primary_label'])
df.head()


Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,time,url,filename,primary_label_encoded
0,afrsil1,[],"['call', 'flight call']",12.391,-1.493,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,2.5,08:00,https://www.xeno-canto.org/125458,afrsil1/XC125458.ogg,0
1,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],19.8801,-155.7254,Euodice cantans,African Silverbill,Dan Lane,Creative Commons Attribution-NonCommercial-Sha...,3.5,08:30,https://www.xeno-canto.org/175522,afrsil1/XC175522.ogg,0
2,afrsil1,[],"['call', 'song']",16.2901,-16.0321,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:30,https://www.xeno-canto.org/177993,afrsil1/XC177993.ogg,0
3,afrsil1,[],"['alarm call', 'call']",17.0922,54.2958,Euodice cantans,African Silverbill,Oscar Campbell,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:00,https://www.xeno-canto.org/205893,afrsil1/XC205893.ogg,0
4,afrsil1,[],['flight call'],21.4581,-157.7252,Euodice cantans,African Silverbill,Ross Gallardy,Creative Commons Attribution-NonCommercial-Sha...,3.0,16:30,https://www.xeno-canto.org/207431,afrsil1/XC207431.ogg,0


In [47]:
(X_train, X_test, y_train, y_test) = train_test_split(df, df['primary_label_encoded'], test_size= .2, random_state=7)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=7)

sr = 32_000
n_fft = 1024
hop_length = 512
train_batch_size = 256
valid_batch_size = 256
num_classes = 152
duration = 7
n_mels = 64

In [48]:
def get_data():

    train_dataset = BirdClefDataset(X_train, sr, duration)
    valid_dataset = BirdClefDataset(X_val, sr, duration)

    train_loader = DataLoader(train_dataset, train_batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, valid_batch_size, shuffle=False)

    return train_loader, valid_loader


In [59]:
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(224000, 1000)
        self.fc2 = nn.Linear(1000, 100)
        self.fc3 = nn.Linear(100, 64)
        self.fc4 = nn.Linear(64, num_classes)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)

        return x


device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [62]:
train_loader, valid_loader = get_data()

# Train Loop
load = True
model = SimpleModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)
epochs = 150

if load:
    model.load_state_dict(torch.load('./model_saved_weights/model_numpy.bin'))

# This has overtrained a ton. We are at .597 loss on training and 6 on validation
best_f1 = 0.011680021168441034
total_f1 = []

In [63]:
for epoch in range(epochs):
    loop = tqdm(train_loader, position=0)
    model.train()
    for i, (x, y) in enumerate(loop):
        y = y.type(torch.LongTensor)
        x = x.to(device)
        y = y.to(device)

        outputs = model(x)
        _, predictions = torch.max(outputs, 1)
        
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loop.set_description(f"Epoch [{epoch + 1}/{epochs}]")
        loop.set_postfix(loss=loss.item())

    # Run validation loop
    if (epoch + 1) % 2 == 0:
        model.eval()

        loop_validation = tqdm(valid_loader, position=0)
        pred = []
        label = []

        for i, (X, y) in enumerate(loop_validation):
            y = y.type(torch.LongTensor)
            y = y.to(device)
            X = X.to(device)

            outputs = model(X)
            _, predictions = torch.max(outputs, 1)

            loss = criterion(outputs, y)

            pred.extend(predictions.view(-1).cpu().detach().numpy())
            label.extend(y.view(-1).cpu().detach().numpy())

            loop_validation.set_description(f"Validation Epoch [{epoch + 1}/{epochs}")
            loop_validation.set_postfix(loss=loss.item())

    valid_f1 = f1_score(label, pred, average='macro')
    total_f1.append(valid_f1)

    with open('f1_score_numpy.txt', 'a') as f:
        f.write(f"{valid_f1}\n")

    if valid_f1 > best_f1:
        print(f"Validation F1 Improved - {best_f1} ---> {valid_f1}")
        best_f1 = valid_f1
        torch.save(model.state_dict(), f'./model_saved_weights/model_numpy.bin')
        print(f"Saved model checkpoint at ./model_numpy.bin")


Epoch [1/150]: 100%|██████████| 38/38 [07:53<00:00, 12.47s/it, loss=5.19]


Validation F1 Improved - 0 ---> 0.0002561910491912559
Saved model checkpoint at ./model_numpy.bin


Epoch [2/150]: 100%|██████████| 38/38 [07:31<00:00, 11.88s/it, loss=4.6] 
Validation Epoch [2/150: 100%|██████████| 10/10 [02:05<00:00, 12.54s/it, loss=4.9]


Validation F1 Improved - 0.0002561910491912559 ---> 0.0026632090593132923
Saved model checkpoint at ./model_numpy.bin


Epoch [3/150]: 100%|██████████| 38/38 [07:50<00:00, 12.39s/it, loss=4.29]
Epoch [4/150]: 100%|██████████| 38/38 [07:28<00:00, 11.80s/it, loss=3.83]
Validation Epoch [4/150: 100%|██████████| 10/10 [01:52<00:00, 11.25s/it, loss=4.83]


Validation F1 Improved - 0.0026632090593132923 ---> 0.003987108425821112
Saved model checkpoint at ./model_numpy.bin


Epoch [5/150]: 100%|██████████| 38/38 [07:31<00:00, 11.88s/it, loss=3.14]
Epoch [6/150]: 100%|██████████| 38/38 [07:26<00:00, 11.75s/it, loss=3.15]
Validation Epoch [6/150: 100%|██████████| 10/10 [01:53<00:00, 11.34s/it, loss=4.57]


Validation F1 Improved - 0.003987108425821112 ---> 0.007702160531071673
Saved model checkpoint at ./model_numpy.bin


Epoch [7/150]: 100%|██████████| 38/38 [08:10<00:00, 12.91s/it, loss=2.83]
Epoch [8/150]: 100%|██████████| 38/38 [07:20<00:00, 11.59s/it, loss=2.64]
Validation Epoch [8/150: 100%|██████████| 10/10 [02:09<00:00, 12.95s/it, loss=4.62]


Validation F1 Improved - 0.007702160531071673 ---> 0.00955722538549094
Saved model checkpoint at ./model_numpy.bin


Epoch [9/150]: 100%|██████████| 38/38 [07:54<00:00, 12.49s/it, loss=2.8] 
Epoch [10/150]: 100%|██████████| 38/38 [07:38<00:00, 12.06s/it, loss=1.79]
Validation Epoch [10/150: 100%|██████████| 10/10 [01:52<00:00, 11.23s/it, loss=4.55]


Validation F1 Improved - 0.00955722538549094 ---> 0.010020675971215073
Saved model checkpoint at ./model_numpy.bin


Epoch [11/150]: 100%|██████████| 38/38 [07:23<00:00, 11.66s/it, loss=1.32]
Epoch [12/150]: 100%|██████████| 38/38 [07:19<00:00, 11.57s/it, loss=1.18]
Validation Epoch [12/150: 100%|██████████| 10/10 [01:51<00:00, 11.14s/it, loss=4.68]
Epoch [13/150]: 100%|██████████| 38/38 [07:17<00:00, 11.51s/it, loss=1.15]
Epoch [14/150]: 100%|██████████| 38/38 [07:19<00:00, 11.55s/it, loss=1.24]
Validation Epoch [14/150: 100%|██████████| 10/10 [01:50<00:00, 11.05s/it, loss=4.94]
Epoch [15/150]: 100%|██████████| 38/38 [07:15<00:00, 11.47s/it, loss=1.09] 
Epoch [16/150]: 100%|██████████| 38/38 [07:16<00:00, 11.49s/it, loss=0.945]
Validation Epoch [16/150: 100%|██████████| 10/10 [01:58<00:00, 11.83s/it, loss=5.18]


Validation F1 Improved - 0.010020675971215073 ---> 0.010135766329597076
Saved model checkpoint at ./model_numpy.bin


Epoch [17/150]: 100%|██████████| 38/38 [07:57<00:00, 12.57s/it, loss=0.778]
Epoch [18/150]: 100%|██████████| 38/38 [07:51<00:00, 12.41s/it, loss=0.58] 
Validation Epoch [18/150: 100%|██████████| 10/10 [02:03<00:00, 12.34s/it, loss=7.01]


Validation F1 Improved - 0.010135766329597076 ---> 0.011680021168441034
Saved model checkpoint at ./model_numpy.bin


Epoch [19/150]: 100%|██████████| 38/38 [07:50<00:00, 12.39s/it, loss=0.367]
Epoch [20/150]: 100%|██████████| 38/38 [07:49<00:00, 12.36s/it, loss=0.825]
Validation Epoch [20/150: 100%|██████████| 10/10 [01:59<00:00, 11.91s/it, loss=5.59]
Epoch [21/150]: 100%|██████████| 38/38 [07:49<00:00, 12.35s/it, loss=1.78] 
Epoch [22/150]: 100%|██████████| 38/38 [07:49<00:00, 12.35s/it, loss=0.525]
Validation Epoch [22/150: 100%|██████████| 10/10 [01:58<00:00, 11.86s/it, loss=5.75]
Epoch [23/150]: 100%|██████████| 38/38 [07:36<00:00, 12.00s/it, loss=0.829]
Epoch [24/150]: 100%|██████████| 38/38 [07:17<00:00, 11.51s/it, loss=0.475]
Validation Epoch [24/150: 100%|██████████| 10/10 [01:50<00:00, 11.03s/it, loss=6.01]
Epoch [25/150]: 100%|██████████| 38/38 [07:17<00:00, 11.53s/it, loss=0.167]
Epoch [26/150]: 100%|██████████| 38/38 [07:17<00:00, 11.51s/it, loss=0.482]
Validation Epoch [26/150: 100%|██████████| 10/10 [01:50<00:00, 11.04s/it, loss=5.69]
Epoch [27/150]: 100%|██████████| 38/38 [07:17<00:00,

KeyboardInterrupt: 