# Prototype Deep Learning Architecture
This is a test notebook, it fetches melspectograms from Google Drive. Can't be used for submission because Noteboks with Internet enabled aren't valid# Prototype Deep Learning Architecture
This is a test notebook, it fetches melspectograms from Google Drive. Can't be used for submission because Noteboks with Internet enabled aren't valid

In [None]:
# When starting a session, install this and restart the kernel and clean the output
!pip install torchaudio==0.8

In [None]:
# LOAD THE DEPENDENCIES

import os
import glob
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import librosa
import numpy as np

from sklearn.utils import shuffle
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter

import torchaudio
import torchaudio.transforms as T
from torchvision import models, transforms
from skimage import io, transform

In [None]:
# Make sure that torchaudio version is 0.8.0 Otherwise the dataloader will fail
print(torch.__version__)
print(torchaudio.__version__)

In [None]:
def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None):
  fig, axs = plt.subplots(1, 1)
  axs.set_title(title or 'Spectrogram (db)')
  axs.set_ylabel(ylabel)
  axs.set_xlabel('frame')
  im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect)
  if xmax:
    axs.set_xlim((0, xmax))
  fig.colorbar(im, ax=axs)
  plt.show(block=False)

In [None]:
# Global vars
RANDOM_SEED = 1337
SAMPLE_RATE = 32000
SIGNAL_LENGTH = 5 # seconds
SPEC_SHAPE = (48, 128) # height x width
FMIN = 500
FMAX = 12500

In [None]:
# Load metadata file
train = pd.read_csv('../input/birdclef-2021/train_metadata.csv',)

# Second, assume that birds with the most training samples are also the most common
# A species needs at least 200 recordings with a rating above 4 to be considered common
birds_count = {}
for bird_species, count in zip(train.primary_label.unique(), 
                               train.groupby('primary_label')['primary_label'].count().values):
    birds_count[bird_species] = count
most_represented_birds = [key for key,value in birds_count.items()] 

TRAIN = train.query('primary_label in @most_represented_birds')
LABELS = sorted(TRAIN.primary_label.unique())

# Let's see how many species and samples we have left
print('NUMBER OF SPECIES IN TRAIN DATA:', len(LABELS))
print('NUMBER OF SAMPLES IN TRAIN DATA:', len(TRAIN))
print('LABELS:', most_represented_birds)

In [None]:
pd.read_csv('../input/birdclef-2021/test.csv')

In [None]:
meta = pd.read_csv('../input/birdclef-2021/train_metadata.csv')

In [None]:
meta.head()

In [None]:
configuration_dict = {'number_of_epochs': 3, 'batch_size': 8, 'dropout': 0.3, 'base_lr': 0.005, 
                      'number_of_mel_filters': 64, 'resample_freq': 22050}

In [None]:
labels_to_int = {x: i for i,x in enumerate(LABELS)}

In [None]:
int_to_label = {i: x for i,x in enumerate(LABELS)}

In [None]:
labels_to_int['acafly']

In [None]:
int_to_label[0]

In [None]:
class Melspectrogram(Dataset):
    def __init__(self, csv_path, base_path, resample_freq=0):
        self.file_path = base_path
        self.file_names = []
        self.labels = []
        self.folders = []
        self.n_mels = configuration_dict.get('number_of_mel_filters', 64)
        self.resample = resample_freq
        
        csvData = pd.read_csv(csv_path)
        for i in range(0,len(csvData)):
            self.file_names.append(csvData.iloc[i, 9])
            self.labels.append(csvData.iloc[i, 0])
            self.folders.append(csvData.iloc[i, 0])
    
    def __getitem__(self, index):
        #format the file path and load the file
        path = os.path.join(self.file_path, self.folders[index] +"/"+self.file_names[index])
        sig, rate = librosa.load(path, sr=32000, offset=None, duration=10)
    
        hop_length = int(SIGNAL_LENGTH * SAMPLE_RATE / (SPEC_SHAPE[1] - 1))
        mel_spec = librosa.feature.melspectrogram(y=sig, 
                                                  sr=SAMPLE_RATE, 
                                                  n_fft=1024, 
                                                  hop_length=hop_length, 
                                                  n_mels=SPEC_SHAPE[0], 
                                                  fmin=FMIN, 
                                                  fmax=FMAX)
    
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max) 
        
        # Normalize
        mel_spec -= mel_spec.min()
        mel_spec /= mel_spec.max()
        
        mel = torch.tensor([mel_spec])
        mel = transforms.functional.resize(mel, [48, 224])
        return mel, labels_to_int[self.labels[index]]
    
    def __len__(self):
        return len(self.file_names)


In [None]:
csv_path = '../input/birdclef-2021/train_metadata.csv'
base_path = '../input/birdclef-2021/train_short_audio'
train_set = Melspectrogram(csv_path, base_path)

In [None]:
train_set

In [None]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 128, 
                                           shuffle = True, pin_memory=True, num_workers=1)

In [None]:
def get_model(num_classes):
    model = models.resnet18(pretrained=True)
    model.conv1=nn.Conv2d(1, model.conv1.out_channels, kernel_size=model.conv1.kernel_size[0], 
                      stride=model.conv1.stride[0], padding=model.conv1.padding[0])
    if hasattr(model, "fc"):
        nb_ft = model.fc.in_features
        model.fc = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "_fc"):
        nb_ft = model._fc.in_features
        model._fc = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "classifier"):
        nb_ft = model.classifier.in_features
        model.classifier = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "last_linear"):
        nb_ft = model.last_linear.in_features
        model.last_linear = nn.Linear(nb_ft, num_classes)

    return model

model = get_model(397)

In [None]:
optimizer = optim.SGD(model.parameters(), lr = configuration_dict.get('base_lr', 0.001), momentum = 0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = configuration_dict.get('number_of_epochs')//3, gamma = 0.1)
criterion = nn.CrossEntropyLoss()

In [None]:
device = torch.cuda.current_device() if torch.cuda.is_available() else torch.device('cpu')
print('Device to use: {}'.format(device))
model.to(device)

In [None]:
def train(model, epoch):
    model.train()
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        iteration = epoch * len(train_loader) + batch_idx
        if batch_idx % 20 == 0: #print training stats
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
                  .format(epoch, batch_idx * len(inputs), len(train_loader.dataset), 
                          100. * batch_idx / len(train_loader), loss))

In [None]:
for epoch in range(10)):
    train(model, epoch)