In [None]:
!nvidia-smi

In [None]:
try:
    import resnest
except ModuleNotFoundError:
    !pip install -q "../input/resnest50-fast-package/resnest-0.0.6b20200701/resnest"

In [None]:
effnet_path = '../input/efficientnet-pytorch/'
import sys
sys.path.append(effnet_path)

In [None]:
from efficientnet_pytorch import EfficientNet
import resnest.torch as resnest_torch
import torchvision.models as models
import numpy as np
import collections
import librosa as lb
import soundfile as sf
import pandas as pd
import cv2
from pathlib import Path
import re

import torch
from torch import nn
from  torch.utils.data import Dataset, DataLoader

from tqdm.notebook import tqdm

import time
from resnest.torch import resnest50
from efficientnet_pytorch import EfficientNet


pd.set_option('display.max_rows', 750)

In [None]:
model_config_list = [
    
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/kkiller-birdclef-models-public/birdclef_resnest50_fold0_epoch_10_f1_val_06471_20210417161101.pth")},
    {'model_name': 'resnext50_32x4d', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnext50_32x4d_fold4_epoch_13.pth")},
    {'model_name': 'resnext50_32x4d', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnext50_32x4d_fold0_epoch_11.pth")},
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/resnest50_fold0_epoch_10_686.pth")},
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold0_epoch_13.pth")},

    # 
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold2_epoch_13.pth")},
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/resnest50_fold1_epoch_10_681.pth")},
#     {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold1_epoch_10.pth")},
    {'model_name': 'resnext50_32x4d', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnext50_32x4d_fold2_epoch_12.pth")},
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/resnest50_fold3_epoch_11_677.pth")},
    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/resnest50_fold2_epoch_11_676.pth")},

    {'model_name': 'resnet34', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnet34_fold0_epoch_10.pth")},
    {'model_name': 'resnet50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnet50_fold3_epoch_13.pth")},
    {'model_name': 'resnet50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnet50_fold2_epoch_14.pth")},
    {'model_name': 'resnet34', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnet34_fold4_epoch_10.pth")},
    {'model_name': 'resnet50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnet50_fold1_epoch_10.pth")},

    {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold2_epoch_23.pth")},
    {'model_name': 'resnet50', 'thresh': 0.11, 'checkpoint': Path("../input/bird-models/birdclef_resnet50_fold4_epoch_13.pth")},
   
] 

In [None]:
[x['checkpoint'].exists() for x in model_config_list]

In [None]:
[str(x['checkpoint']) for x in model_config_list]

# Configs

In [None]:
NUM_CLASSES = 397
SR = 32_000
DURATION = 5
THRESH = 0.29
AP = 24
SS = 12

In [None]:
# 0	24	12	0.3	0.728181
# 4430	29	17	0.3	0.727944



DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)

TEST_AUDIO_ROOT = Path("../input/birdclef-2021/test_soundscapes")
SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
TARGET_PATH = None
    
if not len(list(TEST_AUDIO_ROOT.glob("*.ogg"))):
    TEST_AUDIO_ROOT = Path("../input/birdclef-2021/train_soundscapes")
    SAMPLE_SUB_PATH = None
    # SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
    TARGET_PATH = Path("../input/birdclef-2021/train_soundscape_labels.csv")

In [None]:
model_config_list = model_config_list[:SS]

In [None]:
print(len(model_config_list))

# Data

In [None]:
class MelSpecComputer:
    def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax
        kwargs["n_fft"] = kwargs.get("n_fft", self.sr//10)
        kwargs["hop_length"] = kwargs.get("hop_length", self.sr//(10*4))
        self.kwargs = kwargs

    def __call__(self, y):

        melspec = lb.feature.melspectrogram(
            y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs,
        )

        melspec = lb.power_to_db(melspec).astype(np.float32)
        return melspec

In [None]:
def mono_to_color(X, eps=1e-6, mean=None, std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V

def crop_or_pad(y, length):
    if len(y) < length:
        y = np.concatenate([y, length - np.zeros(len(y))])
    elif len(y) > length:
        y = y[:length]
    return y

In [None]:
class BirdCLEFDataset(Dataset):
    def __init__(self, data, sr=SR, n_mels=128, fmin=0, fmax=None, duration=DURATION, step=None, res_type="kaiser_fast", resample=True):
        
        self.data = data
        
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax or self.sr//2

        self.duration = duration
        self.audio_length = self.duration*self.sr
        self.step = step or self.audio_length
        
        self.res_type = res_type
        self.resample = resample

        self.mel_spec_computer = MelSpecComputer(sr=self.sr, n_mels=self.n_mels, fmin=self.fmin,
                                                 fmax=self.fmax)
    def __len__(self):
        return len(self.data)
    
    @staticmethod
    def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image
    
    def audio_to_image(self, audio):
        melspec = self.mel_spec_computer(audio) 
        image = mono_to_color(melspec)
        image = self.normalize(image)
        return image

    def read_file(self, filepath):
        audio, orig_sr = sf.read(filepath, dtype="float32")

        if self.resample and orig_sr != self.sr:
            audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)
          
        audios = []
        for i in range(self.audio_length, len(audio) + self.step, self.step):
            start = max(0, i - self.audio_length)
            end = start + self.audio_length
            audios.append(audio[start:end])
            
        if len(audios[-1]) < self.audio_length:
            audios = audios[:-1]
            
        images = [self.audio_to_image(audio) for audio in audios]
        images = np.stack(images)
        
        return images
    
        
    def __getitem__(self, idx):
        return self.read_file(self.data.loc[idx, "filepath"])

In [None]:
data = pd.DataFrame(
     [(path.stem, *path.stem.split("_"), path) for path in Path(TEST_AUDIO_ROOT).glob("*.ogg")],
    columns = ["filename", "id", "site", "date", "filepath"]
)
print(data.shape)
data.head()

In [None]:
df_train = pd.read_csv("../input/birdclef-2021/train_metadata.csv")

LABEL_IDS = {label: label_id for label_id,label in enumerate(sorted(df_train["primary_label"].unique()))}
INV_LABEL_IDS = {val: key for key,val in LABEL_IDS.items()}

# Inference

In [None]:
test_data = BirdCLEFDataset(data=data)
len(test_data), test_data[0].shape

In [None]:
# model = getattr(models, 'resnext50_32x4d')(pretrained=False)

In [None]:
def get_model(name, checkpoint_path, num_classes=NUM_CLASSES):
    """
    Loads a pretrained model. 
    Supports ResNest, ResNext-wsl, EfficientNet, ResNext and ResNet.

    Arguments:
        name {str} -- Name of the model to load

    Keyword Arguments:
        num_classes {int} -- Number of classes to use (default: {1})

    Returns:
        torch model -- Pretrained model
    """
    if "resnest" in name:
        model = getattr(resnest_torch, name)(pretrained=False)
    elif "wsl" in name:
        model = torch.hub.load("facebookresearch/WSL-Images", name)
    elif name.startswith("resnext") or  name.startswith("resnet"):
        #model = torch.hub.load("pytorch/vision:v0.6.0", name, pretrained=False)
        model = getattr(models, name)(pretrained=False)
    elif name.startswith("tf_efficientnet_b"):
        model = getattr(timm.models.efficientnet, name)(pretrained=True)
    elif "efficientnet-b" in name:
        model = EfficientNet.from_name(name)
    else:
        model = pretrainedmodels.__dict__[name](pretrained='imagenet')

    if hasattr(model, "fc"):
        nb_ft = model.fc.in_features
        model.fc = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "_fc"):
        nb_ft = model._fc.in_features
        model._fc = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "classifier"):
        nb_ft = model.classifier.in_features
        model.classifier = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "last_linear"):
        nb_ft = model.last_linear.in_features
        model.last_linear = nn.Linear(nb_ft, num_classes)

    dummy_device = torch.device("cpu")
    d = torch.load(checkpoint_path, map_location=dummy_device)
    for key in list(d.keys()):
        d[key.replace("model.", "")] = d.pop(key)
    model.load_state_dict(d)
    model = model.to(DEVICE)
    model = model.eval()
    return model

In [None]:
# model = get_model('resnest50',Path("../input/kkiller-birdclef-models-public/birdclef_resnest50_fold0_epoch_10_f1_val_06471_20210417161101.pth"))

In [None]:
# ******************************
# 0 0.6654861111111114 0.7000000000000001
# ******************************
# 1 0.6654166666666669 0.6000000000000001
# ******************************
# 2 0.6643055555555558 0.7000000000000001
# ******************************
# 3 0.6539583333333335 0.5
# ******************************
# 4 0.654375 0.6000000000000001
# ******************************
# 5 0.6614166666666669 0.7000000000000001
# ******************************
# 6 0.6537083333333334 0.7000000000000001
# ******************************
# 7 0.7012083333333341 0.1
# ******************************
# 8 0.663291666666667 0.6000000000000001

In [None]:
# v1
# model_config_list = [
#     {'model_name': 'resnest50', 'thresh': 0.11, 'checkpoint': Path("../input/kkiller-birdclef-models-public/birdclef_resnest50_fold0_epoch_10_f1_val_06471_20210417161101.pth")},
#     {'model_name': 'resnest50', 'thresh': 0.7, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold0_epoch_19_f1_val_07711_20210523161407.pth")},
#     {'model_name': 'resnest50', 'thresh': 0.7, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold1_epoch_19_f1_val_07678_20210523031425.pth")},
#     {'model_name': 'resnest50', 'thresh': 0.7, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold2_epoch_19_f1_val_07680_20210523050005.pth")},
#     {'model_name': 'resnest50', 'thresh': 0.7, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold3_epoch_19_f1_val_07685_20210523064705.pth")},
#     {'model_name': 'resnest50', 'thresh': 0.7, 'checkpoint': Path("../input/bird-models/birdclef_resnest50_fold4_epoch_19_f1_val_07678_20210523175922.pth")},
#     {'model_name': 'resnext50_32x4d', 'thresh': 0.7, 'checkpoint': Path("../input/bird-models/birdclef_resnext50_32x4d_fold0_epoch_11_f1_val_07318_20210523012734.pth")},

# ]



In [None]:
# @torch.no_grad()
def get_thresh_preds(out, thresh=None):
    thresh = thresh or THRESH
    o = (-out).argsort(1)
    npreds = (out > thresh).sum(1)
    preds = []
    for oo, npred in zip(o, npreds):
        preds.append(oo[:npred].tolist())
    return preds


def get_metrics(s_true, s_pred):
    s_true = set(s_true.split())
    s_pred = set(s_pred.split())
    n, n_true, n_pred = len(s_true.intersection(s_pred)), len(s_true), len(s_pred)
    
    prec = n/n_pred
    rec = n/n_true
    f1 = 2*prec*rec/(prec + rec) if prec + rec else 0
    
    return {"f1": f1, "prec": prec, "rec": rec, "n_true": n_true, "n_pred": n_pred, "n": n}

def get_bird_names(preds):
    bird_names = []
    for pred in preds:
        if not pred:
            bird_names.append("nocall")
        else:
            bird_names.append(" ".join([INV_LABEL_IDS[bird_id] for bird_id in pred]))
    return bird_names

def predict(net, test_data, names=False):
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            o = net(xb)
            pred = torch.sigmoid(o)
            if names:
                pred = get_bird_names(get_thresh_preds(pred))

            preds.append(pred)
    return preds


def predict_nets(nets, test_data, names=False):
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            pred = 0.
            for j, net in enumerate(nets):
                o = net(xb)
                o = torch.sigmoid(o)
                if j==0:
                    pred += AP * o
                else:
                    pred += o

            pred /= SS
            
            if names:
                pred = get_bird_names(get_thresh_preds(pred))

            preds.append(pred)
    return preds


def preds_as_df(data, preds):
    sub = {
        "row_id": [],
        "birds": [],
    }
    
    for row, pred in zip(data.itertuples(False), preds):
        row_id = [f"{row.id}_{row.site}_{5*i}" for i in range(1, len(pred)+1)]
        sub["birds"] += pred
        sub["row_id"] += row_id
        
    sub = pd.DataFrame(sub)
    
    if SAMPLE_SUB_PATH:
        sample_sub = pd.read_csv(SAMPLE_SUB_PATH, usecols=["row_id"])
        sub = sample_sub.merge(sub, on="row_id", how="left")
        sub["birds"] = sub["birds"].fillna("nocall")
    return sub


def check_sub(sub):
    sub_target = pd.read_csv(TARGET_PATH)
    sub_target = sub_target.merge(sub, how="left", on="row_id")
    
    assert sub_target["birds_x"].notnull().all()
    assert sub_target["birds_y"].notnull().all()
    
    df_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_target.birds_x, sub_target.birds_y)])
    
    print(df_metrics.mean())
    
def filter_site(sub, s_dict, site_id):
    ret = []
    for row in sub.itertuples(False):
        i = row.row_id
        bird = row.birds
        s = i.split('_')[1]
        sid = site_id[s]
        site_birds = s_dict[sid]
        c = []
        for b in bird.split(' '):
            if b=='nocall' or b in site_birds:
                c.append(b)
        if len(c)==0:
            ret.append('nocall')
        else:
            ret.append(' '.join(c))
    return ret

def get_season(mon, lat):
    if lat>0:
        if mon <= 3:
            s = 0
        elif mon <= 6:
            s = 1
        elif mon <=9:
            s = 2
        else:
            s = 3
        return s
    else:
        if mon <= 3:
            s = 3
        elif mon <= 6:
            s = 2
        elif mon <=9:
            s = 1
        else:
            s = 0
        return s


In [None]:
nets = [
        get_model(row['model_name'], row['checkpoint'].as_posix()) for row in model_config_list
]



pred_probas = predict_nets(nets, test_data, names=False)
print('nets', len(nets))
print(len(pred_probas))

pred_probas = [x.cpu().numpy() for x in pred_probas]



In [None]:
print(pred_probas[0])

preds = [get_bird_names(get_thresh_preds(pred, thresh=THRESH)) for pred in pred_probas]

sub = preds_as_df(data, preds)
print(sub.shape)
print(sub['birds'].value_counts().shape)
check_sub(sub)
# f1        0.726792
# f1        0.725556
# f1        0.726111


In [None]:
sub.to_csv("submission.csv", index=False)

In [None]:
df_meta = pd.read_csv('../input/birdclef-2021/train_metadata.csv')

In [None]:

import collections

rules = collections.defaultdict(list)
for row in df_meta.itertuples(False):
    bird, lat, mon = row.primary_label, float(row.latitude), int(row.date.split('-')[1])
#     if lat>0:
#         a = 'n'
#     else:
#         a = 's'
    
    s = get_season(mon, lat)    
    rules[str(s)].append(bird)
    
for key in rules:
    c = collections.Counter(rules[key])
    c = [x[0] for x in c.items() if x[1]>3]
#     c = [x[0] for x in c.items()]
    rules[key] = c
    print(key, len(c))

In [None]:
lat_dict = {
    'COL':5.57,
    'COR':10.12,
    'SNE': 38.49,
    'SSW': 42.47
}

In [None]:
p_preds = []
for filename, pred in zip(data['filename'], preds):
    p_pred = []
    _, site, dt = filename.split('_')
    month = dt[4:6]
    s = get_season(int(month), 1)    
    birds = rules[str(s)]
    for i in pred:
        tmp = []
        for j in i.split(' '):
            if j in birds or j=='nocall':
                tmp.append(j)
        if len(tmp)==0:
            tmp.append('nocall')
        tmp = ' '.join(tmp)
        p_pred.append(tmp)
    p_preds.append(p_pred)

In [None]:
sub = preds_as_df(data, p_preds)
print(sub.shape)
print(sub['birds'].value_counts().shape)
sub.head()

check_sub(sub)

# 0.724931
# 0.7242222222222232
# f1        0.723806
# 0.726417

In [None]:
df_meta = pd.read_pickle('../input/bird-models/df_meta.pkl')

In [None]:
site_id_0 = {'COL': '8067fffffffffff',
 'COR': '8067fffffffffff',
 'SNE': '8029fffffffffff',
 'SSW': '802bfffffffffff'}

site_id_1 = {'COL': '81663ffffffffff',
 'COR': '816d7ffffffffff',
 'SNE': '8129bffffffffff',
 'SSW': '812bbffffffffff'}

site_id_2 = {'COL': '82661ffffffffff',
 'COR': '826d6ffffffffff',
 'SNE': '82298ffffffffff',
 'SSW': '822aa7fffffffff'}

site_id_3 = {'COL': '83661bfffffffff',
 'COR': '836d69fffffffff',
 'SNE': '83298dfffffffff',
 'SSW': '832aa4fffffffff'}

In [None]:
s1 = df_meta.groupby('r1')['primary_label'].agg(lambda x:set(x))
s1 = s1.to_dict()

In [None]:
sub1 = sub.copy()
ret = filter_site(sub, s1, site_id_1)
sub1['birds'] = ret
print(sub1['birds'].value_counts().shape)


In [None]:
check_sub(sub1)
# f1        0.729764
# 0.729806
# 0.731389
# 0.733194
# f1        0.734083
# 0.185 f1        0.734500

# f1        0.733292


In [None]:
sub1.to_csv("submission.csv", index=False)