# Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import soundfile as sf
import librosa

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

# Path

In [None]:
path = '/kaggle/input/birdclef-2021/'
os.listdir(path)

# Functions
We define some helper functions.

In [None]:
def read_ogg_file(full_path):
    """ Read ogg audio file and return numpay array and samplerate"""
    data, samplerate = sf.read(full_path)
    return data, samplerate

from skimage.transform import resize
import numpy as np

def spec_to_image(spec):    
    spec = resize(spec, (224, 400))
    eps=1e-6
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    spec_scaled = np.asarray(spec_scaled)
    return spec_scaled

# Load Data

In [None]:
import pickle

with open ('../input/birdclef-2021-pretrained-model/labels.pkl', 'rb') as fp:
    labels = pickle.load(fp)

print('Number of unique bird labels:', len(labels))

In [None]:
labels

We encode the labels and write them into a data frame:

# Parameter
Based on the EDA we define some parameters:

In [None]:
import torch

data_lenght = 160000
audio_lenght = 5
batch_size = 4
num_labels = len(labels)

if torch.cuda.is_available():
    device=torch.device('cuda:0')
else:
    device=torch.device('cpu')

# Define Model

In [None]:
import librosa
from torch.utils.data import Dataset, DataLoader

class AudioData(Dataset):
    def __init__(self, path, list_IDs, df, data_type):
        self.data_type = data_type
        self.path = path
        self.df = df
        self.data = []
        self.row_ids = []
        
        for i, ID in enumerate(list_IDs):
            prefix = str(self.df.loc[ID, 'audio_id'])+'_'+self.df.loc[ID, 'site']
            file_list = [s for s in os.listdir(self.path) if prefix in s]
            if len(file_list) == 0:
                # Dummy for missing test audio files
                audio_file_fft = np.zeros((data_lenght//2))
                spectrogram = librosa.feature.melspectrogram(audio_file_fft)
                spec_db=librosa.power_to_db(spectrogram,top_db=80)
            else:
                file = file_list[0]#[s for s in os.listdir(self.path) if prefix in s][0]
                audio_file, audio_sr = read_ogg_file(self.path+file)
                audio_file = audio_file[int((self.df.loc[ID, 'seconds']-5)/audio_lenght)*data_lenght:int(self.df.loc[ID, 'seconds']/audio_lenght)*data_lenght]
                audio_file_fft = np.abs(np.fft.fft(audio_file)[: len(audio_file)//2])
#                 # scale data
#                 audio_file_fft = (audio_file_fft-audio_file_fft.mean())/audio_file_fft.std()
            
                spectrogram = librosa.feature.melspectrogram(audio_file_fft, sr=audio_sr)
                spec_db=librosa.power_to_db(spectrogram,top_db=80)
            
            img = spec_to_image(spec_db)
            mel_spec = np.stack((img, img, img))

            row_id = str(self.df.loc[ID, 'row_id'])
            
            self.data.append(mel_spec)
            self.row_ids.append(row_id)
            
#             if data_type == "train" and len(file_list) > 0:
#                 #agmentaion
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.row_ids[idx]

In [None]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp ../input/pretrained-pytorch-models/resnet50-19c8e357.pth /root/.cache/torch/hub/checkpoints/

In [None]:
from torchvision.models import resnet50
from torch import nn

class BirdCLEFModel(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        resnet = resnet50(pretrained=True)
        resnet.fc = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(in_features=resnet.fc.in_features, out_features=n_classes)
        )
        self.base_model = resnet
        self.sigm = nn.Sigmoid()

    def forward(self, x):
        return self.sigm(self.base_model(x))

# Predict Test Data

In [None]:
model = BirdCLEFModel(num_labels)
model.load_state_dict(torch.load("../input/birdclef-2021-pretrained-model/20epoch_mseloss.pt"))
model.to(device)
model.eval()

In [None]:
def prediction(test_audios, _dir):
    warnings.filterwarnings("ignore")
    prediction_dfs = []
    for audio_path in test_audios:
        seconds = []
        audio_ids= []
        sites = []
        row_ids = []
        for second in range(5, 605, 5):
            audio_id = audio_path.name.split("_")[0]
            site = audio_path.name.split("_")[1]
            row_id = "_".join(audio_path.name.split("_")[:2]) + f"_{second}"
            seconds.append(second)
            audio_ids.append(audio_id)
            sites.append(site)
            row_ids.append(row_id)

        test_df = pd.DataFrame({
            "row_id": row_ids,
            "audio_id": audio_ids,
            "site": sites,
            "seconds": seconds
        })
        
        list_IDs_test = list(test_df.index)
        test_data = AudioData(_dir, list_IDs_test, test_df, "test")
        test_loader = DataLoader(test_data, batch_size=1, shuffle=True)
        
        rows = []
        birds = []
        for ind, data in enumerate(test_loader):
            x, row_id = data
            x = x.to(device, dtype=torch.float32)
            y_hat = model(x)
            predicted = y_hat.cpu().detach().numpy()
            predicted = np.round(predicted)

            types = []

            for col in range(len(predicted[0])):
                if predicted[0][col] == 1.:
                    types.append(labels[col])

            
            if len(types) > 1 and 'nocall' in types:
                types.remove('nocall')
            elif len(types) == 0:
                types.append('nocall')

            string = " ".join(types)

            rows.append(row_id[0])
            birds.append(string)

        prediction_df = pd.DataFrame(list(zip(rows, birds)), columns =['row_id', 'birds'])
        prediction_dfs.append(prediction_df)

    print(len(prediction_dfs))
    df = pd.concat(prediction_dfs, axis=0, sort=False).reset_index(drop=True)
    return df

In [None]:
from pathlib import Path
TEST = (len(list(Path("../input/birdclef-2021/test_soundscapes/").glob("*.ogg"))) != 0)
if TEST:
    data_dir = "../input/birdclef-2021/test_soundscapes/"
else:
    data_dir = "../input/birdclef-2021/train_soundscapes/"

DATADIR = Path(data_dir)
all_audios = list(DATADIR.glob("*.ogg"))
submission = prediction(all_audios, data_dir)
submission.to_csv("submission.csv", index=False)