# Notes

In this kenel, I'm going to use a classical **ResneSt50** for bird identification.

* The inference is based on these [resnest50 weights](https://www.kaggle.com/kneroma/kkiller-birdclef-models-public). Please, don't forget upvoting the dataset to make it more visible for others
* The inference pipeline is optimized as much as I can in order to reduce execution time

In [None]:
!nvidia-smi

In [None]:
# try:
#     import resnest
# except ModuleNotFoundError:
#     !pip install -q "../input/resnest50-fast-package/resnest-0.0.6b20200701/resnest"

In [None]:
try:
    import efficientnet_pytorch
except ModuleNotFoundError:
    !pip install -q "../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master"

In [None]:
import numpy as np
import librosa as lb
import soundfile as sf
import pandas as pd
import cv2
from pathlib import Path
import re

import torch
from torch import nn
from  torch.utils.data import Dataset, DataLoader

from tqdm.notebook import tqdm

import time
#from resnest.torch import resnest50
from efficientnet_pytorch import EfficientNet

# Configs

In [None]:
NUM_CLASSES = 273
SR = 32_000
DURATION = 5
#THRESH = 0.25
THRESH = 0.80

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)


In [None]:
# First, get a list of soundscape files to process.
# We'll use the test_soundscape directory if it contains "ogg" files
# (which it only does when submitting the notebook), 
# otherwise we'll use the train_soundscape folder to make predictions.

TEST_AUDIO_ROOT = Path("../input/birdclef-2021/test_soundscapes")
SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
TARGET_PATH = None

In [None]:
len(list(TEST_AUDIO_ROOT.glob("*.ogg")))

In [None]:
if not len(list(TEST_AUDIO_ROOT.glob("*.ogg"))):
    TEST_AUDIO_ROOT = Path("../input/birdclef-2021/train_soundscapes")
    SAMPLE_SUB_PATH = None
    # SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
    TARGET_PATH = Path("../input/birdclef-2021/train_soundscape_labels.csv")

# Data

In [None]:
class MelSpecComputer:
    def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax
        kwargs["n_fft"] = kwargs.get("n_fft", self.sr//10)
        kwargs["hop_length"] = kwargs.get("hop_length", self.sr//(10*4))
        self.kwargs = kwargs

    def __call__(self, y):

        melspec = lb.feature.melspectrogram(
            y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs,
        )

        melspec = lb.power_to_db(melspec).astype(np.float32)
        return melspec

In [None]:
def mono_to_color(X, eps=1e-6, mean=None, std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V

def crop_or_pad(y, length):
    if len(y) < length:
        y = np.concatenate([y, length - np.zeros(len(y))])
    elif len(y) > length:
        y = y[:length]
    return y

In [None]:
class BirdCLEFDataset(Dataset):
    def __init__(self, data, sr=SR, n_mels=128, fmin=0, fmax=None, duration=DURATION, step=None, res_type="kaiser_fast", resample=True):
        
        self.data = data
        
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax or self.sr//2

        self.duration = duration
        self.audio_length = self.duration*self.sr
        self.step = step or self.audio_length
        
        self.res_type = res_type
        self.resample = resample

        self.mel_spec_computer = MelSpecComputer(sr=self.sr, n_mels=self.n_mels, fmin=self.fmin,
                                                 fmax=self.fmax)
    def __len__(self):
        return len(self.data)
    
    @staticmethod
    def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image
    
    def audio_to_image(self, audio):
        melspec = self.mel_spec_computer(audio) 
        image = mono_to_color(melspec)
        image = self.normalize(image)
        return image

    def read_file(self, filepath):
        audio, orig_sr = sf.read(filepath, dtype="float32")

        if self.resample and orig_sr != self.sr:
            audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)
          
        audios = []
        for i in range(self.audio_length, len(audio) + self.step, self.step):
            start = max(0, i - self.audio_length)
            end = start + self.audio_length
            audios.append(audio[start:end])
            
        if len(audios[-1]) < self.audio_length:
            audios = audios[:-1]
            
        images = [self.audio_to_image(audio) for audio in audios]
        images = np.stack(images)
        
        return images
    
        
    def __getitem__(self, idx):
        return self.read_file(self.data.loc[idx, "filepath"])

In [None]:
data = pd.DataFrame(
     [(path.stem, *path.stem.split("_"), path) for path in Path(TEST_AUDIO_ROOT).glob("*.ogg")],
    columns = ["filename", "id", "site", "date", "filepath"]
)
print(data.shape)
data.head()

In [None]:
#Use new rich metadata from private dataset
df_train = pd.read_csv("../input/birdcled2021train-metadata-limited-birds-rich-set/train_metadata_filtered_rich.csv")

In [None]:
LABEL_IDS = {label: label_id for label_id,label in enumerate(sorted(df_train["primary_label"].unique()))}
INV_LABEL_IDS = {val: key for key,val in LABEL_IDS.items()}

In [None]:
#LABEL_IDS

In [None]:
df_train["label_id"].min(), df_train["label_id"].max()

# Inference

In [None]:
test_data = BirdCLEFDataset(data=data)
len(test_data), test_data[0].shape

In [None]:
# def load_net(checkpoint_path, num_classes=NUM_CLASSES):
#     net = resnest50(pretrained=False)
#     net.fc = nn.Linear(net.fc.in_features, num_classes)
#     dummy_device = torch.device("cpu")
#     d = torch.load(checkpoint_path, map_location=dummy_device)
#     for key in list(d.keys()):
#         d[key.replace("model.", "")] = d.pop(key)
#     net.load_state_dict(d)
#     net = net.to(DEVICE)
#     net = net.eval()
#     return net

In [None]:
def load_efficient_net(checkpoint_path, num_classes=NUM_CLASSES):
    net = EfficientNet.from_name('efficientnet-b5')
    net._fc = nn.Linear(net._fc.in_features, num_classes)
    #model._fc = nn.Linear(in_features=model._fc.in_features, out_features=6)
    dummy_device = torch.device("cpu")
    d = torch.load(checkpoint_path, map_location=dummy_device)
    for key in list(d.keys()):
        d[key.replace("model.", "")] = d.pop(key)
    #model.load_state_dict(torch.load('../input/pytorch-efficientnet/best_model.pth'))
    net.load_state_dict(d)
    net = net.to(DEVICE)
    net = net.eval()
    return net

In [None]:

# checkpoint_paths = [
#     Path("../input/kkiller-birdclef-models-public/birdclef_resnest50_fold0_epoch_10_f1_val_06471_20210417161101.pth"),
# ]


# nets = [
#         load_net(checkpoint_path.as_posix()) for checkpoint_path in checkpoint_paths
# ]

In [None]:

checkpoint_efficient_paths = [
    Path("../input/birdclef-enb5-fold0-epoch-17-f1-val-07796/birdclef_efficientnet-b5_fold0_epoch_17_f1_val_07796_20210524050547.pth"),
]


efficient_nets = [
        load_efficient_net(checkpoint_efficient_path.as_posix()) for checkpoint_efficient_path in checkpoint_efficient_paths
]

In [None]:
@torch.no_grad()
def get_thresh_preds(out, thresh=None):
    thresh = thresh or THRES
    o = (-out).argsort(1)
    npreds = (out > thresh).sum(1)
    preds = []
    for oo, npred in zip(o, npreds):
        preds.append(oo[:npred].cpu().numpy().tolist())
    return preds

In [None]:
def get_bird_names(preds):
    bird_names = []
    for pred in preds:
        if not pred:
            bird_names.append("nocall")
        else:
            bird_names.append(" ".join([INV_LABEL_IDS[bird_id] for bird_id in pred]))
    return bird_names

In [None]:
def predict(nets, test_data, names=True):
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            pred = 0.
            for net in nets:
                o = net(xb)
                o = torch.sigmoid(o)

                pred += o

            pred /= len(nets)
            
            if names:
                pred = get_bird_names(get_thresh_preds(pred))

            preds.append(pred)
    return preds

In [None]:
# pred_probas = predict(nets, test_data, names=False)
# print(len(pred_probas))

In [None]:
pred_probas_efficient = predict(efficient_nets, test_data, names=False)
print(len(pred_probas_efficient))

In [None]:
#len(pred_probas)
len(pred_probas_efficient)

In [None]:
#pred_probas[0]

In [None]:
pred_probas_efficient[0]

In [None]:
#pred_probas[0]+pred_probas_efficient[0]

In [None]:
#pred_probas_avg =[(pred_probas[i]+pred_probas_efficient[i])/2 for i in range(0,len(pred_probas))]
#pred_probas_avg =[(pred_probas[i]*3+pred_probas_efficient[i]*1)/4 for i in range(0,len(pred_probas))]

In [None]:
#len(pred_probas_avg)

In [None]:
#pred_probas_avg[0]

In [None]:
#preds_avg = [get_bird_names(get_thresh_preds(pred, thresh=THRESH)) for pred in pred_probas_avg]

In [None]:
#preds_avg[:2]

In [None]:
#preds = [get_bird_names(get_thresh_preds(pred, thresh=THRESH)) for pred in pred_probas]
# preds[:2]

In [None]:
#preds[:2]

In [None]:
preds_efficient = [get_bird_names(get_thresh_preds(pred, thresh=THRESH)) for pred in pred_probas_efficient]
# preds[:2]

In [None]:
#preds_efficient[:2]

In [None]:
def preds_as_df(data, preds):
    sub = {
        "row_id": [],
        "birds": [],
    }
    
    for row, pred in zip(data.itertuples(False), preds):
        row_id = [f"{row.id}_{row.site}_{5*i}" for i in range(1, len(pred)+1)]
        sub["birds"] += pred
        sub["row_id"] += row_id
        
    sub = pd.DataFrame(sub)
    
    if SAMPLE_SUB_PATH:
        sample_sub = pd.read_csv(SAMPLE_SUB_PATH, usecols=["row_id"])
        sub = sample_sub.merge(sub, on="row_id", how="left")
        sub["birds"] = sub["birds"].fillna("nocall")
    return sub

In [None]:
# sub = preds_as_df(data, preds)
# print(sub.shape)
# sub

In [None]:
sub_efficient = preds_as_df(data, preds_efficient)
print(sub_efficient.shape)
sub_efficient

In [None]:
# sub_avg = preds_as_df(data, preds_avg)
# print(sub_avg.shape)
# sub_avg

In [None]:
sub_efficient.to_csv("submission.csv", index=False)

In [None]:
#CPMP's code
def fast_f1_score(predictions, target):
    tp = (predictions * target).sum(1)
    fp = (predictions * (1 - target)).sum(1)
    fn = ((1 - predictions) * target).sum(1)
    f1 = tp / (tp + (fp + fn) / 2)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return f1.mean(), precision.mean(), recall.mean()
    #return {"f1": f1, "prec": precision, "rec": recall}

# Small validation

In [None]:
def get_metrics(s_true, s_pred):
    s_true = set(s_true.split())
    s_pred = set(s_pred.split())
    n, n_true, n_pred = len(s_true.intersection(s_pred)), len(s_true), len(s_pred)
    
    prec = n/n_pred
    rec = n/n_true
    f1 = 2*prec*rec/(prec + rec) if prec + rec else 0
    
    return {"f1": f1, "prec": prec, "rec": rec, "n_true": n_true, "n_pred": n_pred, "n": n}

In [None]:
# if TARGET_PATH:
#     sub_target = pd.read_csv(TARGET_PATH)
#     sub_target = sub_target.merge(sub, how="left", on="row_id")
    
#     print(sub_target["birds_x"].notnull().sum(), sub_target["birds_x"].notnull().sum())
#     assert sub_target["birds_x"].notnull().all()
#     assert sub_target["birds_y"].notnull().all()
    
#     df_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_target.birds_x, sub_target.birds_y)])
#     #import pdb;pdb.set_trace()
#     #cpmp_f1_mean, cpmp_precision_mean, cpmp_recall_mean = fast_f1_score(sub_target.birds_y, sub_target.birds_x)
    
#     #print (f"f1_mean = {cpmp_f1_mean}, precision_mean = {cpmp_precision_mean}, recall_mean = {cpmp_recall_mean}")
#     print(df_metrics.mean())

In [None]:
# sub_target[sub_target.birds_y != "nocall"]

In [None]:
# sub_target[sub_target.birds_x != "nocall"]

In [None]:
if TARGET_PATH:
    sub_efficient_target = pd.read_csv(TARGET_PATH)
    sub_efficient_target = sub_efficient_target.merge(sub_efficient, how="left", on="row_id")
    
    print(sub_efficient_target["birds_x"].notnull().sum(), sub_efficient_target["birds_x"].notnull().sum())
    
    assert sub_efficient_target["birds_x"].notnull().all()
    assert sub_efficient_target["birds_y"].notnull().all()
    
    df_sufficient_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_efficient_target.birds_x, sub_efficient_target.birds_y)])
 
    #cpmp_f1_mean, cpmp_precision_mean, cpmp_recall_mean = fast_f1_score(sub_efficient_target.birds_y, sub_efficient_target.birds_x)
    #print (f"f1_mean = {cpmp_f1_mean}, precision_mean = {cpmp_precision_mean}, recall_mean = {cpmp_recall_mean}")

    print(df_sufficient_metrics.mean())

In [None]:
sub_efficient_target[sub_efficient_target.birds_y != "nocall"]

In [None]:
sub_efficient_target[sub_efficient_target.birds_x != "nocall"]

In [None]:
# if TARGET_PATH:
#     sub_avg_target = pd.read_csv(TARGET_PATH)
#     sub_avg_target = sub_avg_target.merge(sub_avg, how="left", on="row_id")
    
#     print(sub_avg_target["birds_x"].notnull().sum(), sub_avg_target["birds_x"].notnull().sum())
    
#     assert sub_avg_target["birds_x"].notnull().all()
#     assert sub_avg_target["birds_y"].notnull().all()
    
#     df_avg_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_avg_target.birds_x, sub_avg_target.birds_y)])
 
#     #cpmp_f1_mean, cpmp_precision_mean, cpmp_recall_mean = fast_f1_score(sub_efficient_target.birds_y, sub_efficient_target.birds_x)
#     #print (f"f1_mean = {cpmp_f1_mean}, precision_mean = {cpmp_precision_mean}, recall_mean = {cpmp_recall_mean}")

#     print(df_avg_metrics.mean())

In [None]:
# sub_avg_target[sub_avg_target.birds_y != "nocall"]

In [None]:
# sub_avg_target[sub_avg_target.birds_x != "nocall"]