In [None]:
import os
import numpy as np
import pandas as pd
import soundfile as sf
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, sampler, Subset
from torchvision.datasets import DatasetFolder, ImageFolder
from torchvision.transforms import Compose
import torchvision.models as models
import torchaudio
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB, Resample
from fastai.vision.all import *
import librosa
%matplotlib inline

# Classes

In [None]:
path = "../input/birdclef-2021/"

classes = os.listdir(path+"train_short_audio")
classes.sort()
print(classes, len(classes))

# Helper Functions

In [None]:
# Source: https://www.kaggle.com/drcapa/birdclef-2021-starter
def plot_audio_file(data):
    """ Plot the audio data"""
    
    sr = 32000
    fig = plt.figure(figsize=(8, 4))
    x = range(len(data))
    y = data
    plt.plot(x, y)
    plt.plot(x, y, color='red')
    plt.legend(loc='upper center')
    plt.grid()

# Dataset and Transformations

We will leverage the functionality of DatasetFolder from torchvision and expand it to read audio files from the "train_short_audio" folder.

In [None]:
def to_tensor(x):
    return torch.Tensor(x)

def expand_dim(x):
    return x.unsqueeze(0)

def padding(x):
    t_size = 32000*5
    size = t_size//len(x) + 1
    x = torch.cat([x]*size, 0)
    return x[0:t_size]
    

transforms = Compose([
    to_tensor,
    padding,
    MelSpectrogram(sample_rate=32000, n_mels=128),
    AmplitudeToDB(),
    expand_dim
])


def loader(file):
    data, samplerate = sf.read(file)
    return data

class BirdClefShort(DatasetFolder):
    #https://pytorch.org/vision/stable/_modules/torchvision/datasets/folder.html#DatasetFolder
    #Modify code to return a One Hot vector instead of the ID of the class
    def _find_classes(self, dir: str):
        """
        Finds the class folders in a dataset.

        Args:
            dir (string): Root directory path.

        Returns:
            tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary.

        Ensures:
            No class is a subdirectory of another.
        """
        classes = [d.name for d in os.scandir(dir) if d.is_dir()]
        classes.sort()
        class_to_idx = {cls_name: np.eye(len(classes))[i] for i, cls_name in enumerate(classes)}
        return classes, class_to_idx

main_ds = BirdClefShort("../input/birdclef-2021/train_short_audio", loader=loader, extensions=".ogg", transform=transforms, target_transform=to_tensor)

In [None]:
plt.figure(figsize=(16,16))
plt.imshow(main_ds[12][0].numpy().transpose(1,2,0), cmap='hot')


In [None]:
train_ids, valid_ids = train_test_split(range(len(main_ds)), test_size=0.33, random_state=2021)
train_ds, valid_ds = Subset(main_ds, train_ids), Subset(main_ds, valid_ids)

dls = DataLoaders.from_dsets(train_ds, valid_ds, batch_size=64, num_workers=4, shuffle=True)

# Model

In [None]:
model = models.resnet18(pretrained=True)
model.conv1= nn.Conv2d(1, model.conv1.out_channels, 
                      kernel_size=model.conv1.kernel_size[0], 
                      stride=model.conv1.stride[0], 
                      padding=model.conv1.padding[0])

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(classes))


# Train

In [None]:
cbs=[SaveModelCallback(monitor='f1_score',comp=np.greater)]

if torch.cuda.is_available(): dls.cuda(), model.cuda()
learn = Learner(dls, model, metrics= [accuracy_multi, F1ScoreMulti()], loss_func= nn.BCEWithLogitsLoss(), opt_func=ranger, cbs=cbs)
#if mixed_precision_training: learn.to_fp16()

In [None]:
learn.fit_one_cycle(3, lr_max=1e-3)

In [None]:
learn.save("last", with_opt=False)
torch.save(learn.model.state_dict(),f'model_classic.pth')