# Notes

In [None]:
!nvidia-smi

In [None]:
import random
import numpy as np
import librosa as lb
import soundfile as sf
import pandas as pd
import cv2
from pathlib import Path
import re

import torch
from torch import nn
from  torch.utils.data import Dataset, DataLoader


import sys
sys.path.append("../input/timm-pytorch-image-models/pytorch-image-models-master")
sys.path.append("../input/resnest50-fast-package/resnest-0.0.6b20200701/resnest")
sys.path.append("../input/audiomentations/audiomentations-0.15.0-py3-none-any.whl")
sys.path.append("../input/evaluations")
sys.path.append("../input/torchlibrosa/torchlibrosa-0.0.5-py3-none-any.whl")





from tqdm.notebook import tqdm

import time
from resnest.torch import resnest50
import timm


import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
from albumentations.core.transforms_interface import ImageOnlyTransform
from audiomentations import Compose,ClippingDistortion


In [None]:
from evaluations.kaggle_2020 import *

# Configs

In [None]:
NUM_CLASSES = 397
SR = 32_000
DURATION = 5
# THRESH = 0.65


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)

TEST_AUDIO_ROOT = Path("../input/birdclef-2021/test_soundscapes")
SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
TARGET_PATH = None
    
if not len(list(TEST_AUDIO_ROOT.glob("*.ogg"))):
    TEST_AUDIO_ROOT = Path("../input/birdclef-2021/train_soundscapes")
    SAMPLE_SUB_PATH = None
    # SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
    TARGET_PATH = Path("../input/birdclef-2021/train_soundscape_labels.csv")

In [None]:
import timm

class CustomModel(nn.Module):
    def __init__(self, model_name='efficientnet_b0', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        
                ## rexnet family
        if hasattr(self.model, "head"):
            nb_ft = self.model.head.fc.in_features  # 1280
            self.model.head.fc = nn.Identity()
            # self.model.head.fc = nn.Linear(nb_ft, NUM_CLASSES)
            self.model.head.fc = nn.Sequential(nn.Linear(nb_ft, 512),
                                               nn.SiLU(),
                                               nn.Dropout(0.3),
                                               nn.Linear(512, NUM_CLASSES))
        
        
        if hasattr(self.model, "fc"):
            nb_ft = self.model.fc.in_features
#             print()
            self.model.fc = nn.Identity()
            self.model.fc = nn.Sequential( 
                nn.Linear(nb_ft, 1024), nn.ReLU(), nn.Dropout(p=0.2),
                nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
                nn.Linear(1024, NUM_CLASSES)
            )
            
#             self.model.fc = nn.Linear(nb_ft, NUM_CLASSES)
        elif hasattr(self.model, "_fc"):
            nb_ft = self.model._fc.in_features
            self.model._fc = nn.Linear(nb_ft, NUM_CLASSES)
        elif hasattr(self.model, "classifier"):
            nb_ft = self.model.classifier.in_features
            self.model.classifier = nn.Linear(nb_ft, NUM_CLASSES)
        elif hasattr(self.model, "last_linear"):
            nb_ft = self.model.last_linear.in_features
            self.model.last_linear = nn.Linear(nb_ft, NUM_CLASSES)
        
    def forward(self, x):
        x = self.model(x)
        return x

# Data

In [None]:
class MelSpecComputer:
    def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax
        kwargs["n_fft"] = kwargs.get("n_fft", self.sr//10)
        kwargs["hop_length"] = kwargs.get("hop_length", self.sr//(10*4))
        self.kwargs = kwargs

    def __call__(self, y):

        melspec = lb.feature.melspectrogram(
            y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs,
        )

        melspec = lb.power_to_db(melspec).astype(np.float32)
        
        return melspec

In [None]:
# def mono_to_color(X, eps=1e-6, mean=None, std=None):
#     mean = mean or X.mean()
#     std = std or X.std()
#     X = (X - mean) / (std + eps)
    
#     _min, _max = X.min(), X.max()

#     if (_max - _min) > eps:
#         V = np.clip(X, _min, _max)
#         V = 255 * (V - _min) / (_max - _min)
#         V = V.astype(np.uint8)
#     else:
#         V = np.zeros_like(X, dtype=np.uint8)

#     return V

def crop_or_pad(y, length):
    if len(y) < length:
        y = np.concatenate([y, length - np.zeros(len(y))])
    elif len(y) > length:
        y = y[:length]
    return y

In [None]:
from torchvision import transforms


def mono_to_color(X: np.ndarray, height, width, mean=0.5, std=0.5, eps=1e-6):
    trans = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([height, width]), transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
    )
    
    X = np.stack([X, X, X], axis=-1)
    V = (255 * X).astype(np.uint8)
    V = (trans(V)+1)/2
    return V

In [None]:
def random_power(images, power = 1.5, c= 0.7):
    images = images - images.min()
    images = images/(images.max()+0.0000001)
    images = images**(random.random()*power + c)
    return images


class BirdCLEFDataset(Dataset):
    def __init__(self, data, wh=(128,201), sr=SR, n_mels=128, fmin=0, fmax=None, duration=DURATION, step=None, res_type="kaiser_fast", resample=True, tta=True):
        
        self.data = data
        
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax or self.sr//2

        self.duration = duration
        self.audio_length = self.duration*self.sr
        self.step = step or self.audio_length
        
        self.res_type = res_type
        self.resample = resample
        self.wh = wh
        self.tta = tta

        self.mel_spec_computer = MelSpecComputer(sr=self.sr, n_mels=self.n_mels, fmin=self.fmin,
                                                 fmax=self.fmax)
    def __len__(self):
        return len(self.data)
    
    @staticmethod
    def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image
    
    def audio_to_image(self, audio):
        image = self.mel_spec_computer(audio) 
        image = (image+80)/80
        
        image = random_power(image, power=2, c= 0.7) # new line maybe wrong
        
#         image = mono_to_color(image, height=128, width=201)
        image = mono_to_color(image, height=self.wh[0], width=self.wh[1])
#         image = self.normalize(image)
        
        return image

    def read_file(self, filepath):
        audio, orig_sr = sf.read(filepath, dtype="float32")

        if self.resample and orig_sr != self.sr:
            audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)
          
        audios = []
        for i in range(self.audio_length, len(audio) + self.step, self.step):
            start = max(0, i - self.audio_length)
            end = start + self.audio_length
            
            
            if self.tta:
                dummy = ClippingDistortion(
                    min_percentile_threshold = 25, max_percentile_threshold = 25, p = 1.0
                )(samples = audio[start:end], sample_rate = self.sr)
                
                audios.append(dummy)
            else:
                audios.append(audio[start:end])
            
        if len(audios[-1]) < self.audio_length:
            audios = audios[:-1]
            
        images = [self.audio_to_image(audio) for audio in audios]
        images = np.stack(images)
        
        return images
    
        
    def __getitem__(self, idx):
        return self.read_file(self.data.loc[idx, "filepath"])

In [None]:
data = pd.DataFrame(
     [(path.stem, *path.stem.split("_"), path) for path in Path(TEST_AUDIO_ROOT).glob("*.ogg")],
    columns = ["filename", "id", "site", "date", "filepath"]
)
print(data.shape)
data.head()

In [None]:
df_train = pd.read_csv("../input/birdclef-2021/train_metadata.csv")

LABEL_IDS = {label: label_id for label_id,label in enumerate(sorted(df_train["primary_label"].unique()))}
INV_LABEL_IDS = {val: key for key,val in LABEL_IDS.items()}

In [None]:
# LABEL_IDS = {label : label_id for label_id, label in enumerate(primary_labels)}
# LABEL_IDS_INV = {label_id : label for label, label_id in LABEL_IDS.items()}

# Inference

In [None]:
class RegnetModel(nn.Module):
    def __init__(self, model_name='efficientnet_b0', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        
#         if hasattr(self.model, "fc"):
        nb_ft = self.model.head.fc.in_features
        self.model.head.fc = nn.Identity()
        self.model.head.fc = nn.Sequential( 
            nn.Linear(nb_ft, 1024), nn.ReLU(), nn.Dropout(p=0.2),
            nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
            nn.Linear(1024, NUM_CLASSES)
        )
            
        # print(self.model)
        
    def forward(self, x):
        x = self.model(x)
        return x

    

def load_net(checkpoint_path, model_name,  num_classes=NUM_CLASSES, parallel=False):
    
    if model_name == "regnetx_008":
        net = RegnetModel(model_name=model_name, pretrained=False) 
    else:
        net = CustomModel(model_name=model_name, pretrained=False)    
    dummy_device = torch.device("cpu")
    if parallel:
        net = nn.DataParallel(net)
    d = torch.load(checkpoint_path, map_location=dummy_device)
    net.load_state_dict(d)
    net = net.to(DEVICE)
    net = net.eval()
    return net

In [None]:
@torch.no_grad()
def get_thresh_preds(out, thresh=None, use_pp=True):
    thresh = thresh or THRESH
    o = (-out).argsort(1)
    npreds = (out > thresh).sum(1)
    preds = []
    for oo, npred in zip(o, npreds):
        if use_pp:
            preds.append(oo[:npred].tolist())
        else:
            preds.append(oo[:npred].cpu().numpy().tolist())
    return preds


def get_bird_names(preds):
    bird_names = []
    for pred in preds:
        if not pred:
            bird_names.append("nocall")
        else:
            bird_names.append(" ".join([INV_LABEL_IDS[bird_id] for bird_id in pred]))
    return bird_names


def post_process(preds, threshold=0.5, maxpreds=3):
    preds = preds * (preds >= threshold)  # remove preds < threshold
    next_preds = np.concatenate([preds[1:], np.zeros((1, preds.shape[-1]))])  # pred corresponding to next window
    prev_preds = np.concatenate([np.zeros((1, preds.shape[-1])), preds[:-1]])  # pred corresponding to previous window
    score = preds + 0.5 * next_preds + 0.5 * prev_preds  # Aggregating with neighbouring predictions
    
    # test this
#     n_birds = (score >= threshold).sum(-1)  # Counting birds
#     n_birds = np.clip(n_birds, 0, maxpreds)  # keep at most maxpreds birds
    
    return score

In [None]:
def get_preds1(test_data, names=False): # 128, 201
    
    ckps = [
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_34_f1_val_04335_20210517183043.pth"),
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_39_f1_val_04217_20210517185903.pth"),
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_43_f1_val_04890_20210517024249.pth"),
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_45_f1_val_06436_20210517135849.pth"),
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_47_f1_val_06460_20210517140640.pth"),
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_49_f1_val_04890_20210517030949.pth"),
        
        
        ## d5 - new
        Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold0_epoch_76_f1_val_06552.pth'),
        Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold1_epoch_78_f1_val_06542.pth'),
        Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold2_epoch_76_f1_val_06565.pth'),
        Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold3_epoch_63_f1_val_06540.pth'),
        Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold4_epoch_72_f1_val_06515.pth'),

        ## d7
        Path('../input/bird-models-ioa/densenet121_d7_focal/densenet121_fold0_epoch_73_f1_val_06540.pth'), 
        
        
        # resnext100
        Path('../input/bird-models-ioa/rexnet_100_d7_bce/rexnet_100_fold0_epoch_68_f1_val_06533.pth'),
        Path('../input/bird-models-ioa/rexnet_100_d7_bce/rexnet_100_fold0_epoch_69_f1_val_06516.pth'),
        
        
#         Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold0_epoch_72_f1_val_06520.pth'),
#         Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold1_epoch_77_f1_val_06515.pth'),
#         Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold2_epoch_73_f1_val_06557.pth'),
#         Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold3_epoch_79_f1_val_06526.pth'),
#         Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold4_epoch_76_f1_val_06486.pth'),
        
        
        ## d7
#         Path('../input/bird-models-ioa/densenet121_d7_focal/densenet121_fold0_epoch_77_f1_val_06538.pth')

    ]
    
    
    nets = [
        # effnets
        load_net(ckps[0].as_posix(), model_name="efficientnet_b0"),
        load_net(ckps[1].as_posix(), model_name="efficientnet_b0"),
        load_net(ckps[2].as_posix(), model_name="efficientnet_b0"),
        load_net(ckps[3].as_posix(), model_name="efficientnet_b0"),
        load_net(ckps[4].as_posix(), model_name="efficientnet_b0"),
        load_net(ckps[5].as_posix(), model_name="efficientnet_b0"),
        
        
        # densenets
        load_net(ckps[6].as_posix(), model_name="densenet121"),
        load_net(ckps[7].as_posix(), model_name="densenet121"),
        load_net(ckps[8].as_posix(), model_name="densenet121"),
        load_net(ckps[9].as_posix(), model_name="densenet121"),
        load_net(ckps[10].as_posix(), model_name="densenet121"),
        load_net(ckps[11].as_posix(), model_name="densenet121"),
        
        #rexnet
        load_net(ckps[12].as_posix(), model_name="rexnet_100"),
        load_net(ckps[13].as_posix(), model_name="rexnet_100"),
        
    ]
    
    THRESH = 0.3
    
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            pred = 0.
            for net in nets:
                o = net(xb)
                o = torch.sigmoid(o)
                pred += o

            pred /= len(nets)

#             # add pp here
#             pred = post_process(pred.cpu().numpy(), threshold=THRESH)

            if names:
                pred = get_bird_names(get_thresh_preds(pred))

            preds.append(pred)
            
    return preds
    

In [None]:
def get_preds2(test_data, names=False):
    
    ckps = [
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_41_f1_val_06781_20210519042210.pth"),
        Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_43_f1_val_06777_20210519044405.pth"),
        
#         Path("../input/clef-effb0/birdclef_efficientnet_b0_fold0_epoch_43_f1_val_06777_20210519044405.pth")
        Path("../input/birdcall-224-448/birdclef_regnetx_008_fold0_epoch_46_f1_val_06390_20210519205412.pth"),
        Path("../input/birdcall-224-448/birdclef_regnetx_008_fold0_epoch_47_f1_val_06343_20210519210404.pth"),
        
        Path("../input/birdcall-224-448/birdclef_tf_efficientnet_b0_ap_fold0_epoch_43_f1_val_06231_20210521112036.pth"),
        Path("../input/birdcall-224-448/birdclef_tf_efficientnet_b0_ap_fold0_epoch_45_f1_val_06205_20210521115612.pth"),
        
        Path("../input/birdcall-224-448/birdclef_tf_efficientnet_b0_ap_fold0_epoch_48_f1_val_06769_20210520200135.pth"),
        Path("../input/birdcall-224-448/birdclef_tf_efficientnet_b0_ap_fold0_epoch_45_f1_val_06764_20210520192838.pth"),
        
#         Path("../input/birdcall-224-448/birdclef_tf_efficientnet_b0_ap_fold1_epoch_47_f1_val_06234_20210522105603.pth"),
#         Path("../input/birdcall-224-448/birdclef_tf_efficientnet_b0_ap_fold1_epoch_45_f1_val_06215_20210522102142.pth")
        
#         Path('../input/bird-models-ioa/tf_efficientnet_b3_ns_d5_bce_224/tf_efficientnet_b3_ns_fold0_epoch_71_f1_val_06342.pth'),        
    ]
    
    
    nets = [
        load_net(ckps[0].as_posix(), model_name="efficientnet_b0", parallel=True),
        load_net(ckps[1].as_posix(), model_name="efficientnet_b0", parallel=True),
        
        load_net(ckps[2].as_posix(), model_name="regnetx_008", parallel=True),
        load_net(ckps[3].as_posix(), model_name="regnetx_008", parallel=True),
        
        load_net(ckps[4].as_posix(), model_name="tf_efficientnet_b0_ap", parallel=True),
        load_net(ckps[5].as_posix(), model_name="tf_efficientnet_b0_ap", parallel=True),
        
        load_net(ckps[6].as_posix(), model_name="tf_efficientnet_b0_ap", parallel=True),
        load_net(ckps[7].as_posix(), model_name="tf_efficientnet_b0_ap", parallel=True),
        
        
#         load_net(ckps[8].as_posix(), model_name="tf_efficientnet_b0_ap", parallel=True),
#         load_net(ckps[9].as_posix(), model_name="tf_efficientnet_b0_ap", parallel=True),
        
#         load_net(ckps[2].as_posix(), model_name="tf_efficientnet_b3_ns"),
        
    ]
    
    THRESH = 0.4
    
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            pred = 0.
            for net in nets:
                o = net(xb)
                o = torch.sigmoid(o)
                pred += o

            pred /= len(nets)

#             # add pp here
#             pred = post_process(pred.cpu().numpy(), threshold=THRESH)

            if names:
                pred = get_bird_names(get_thresh_preds(pred))

            preds.append(pred)
            
    return preds

In [None]:
def get_preds3():

    class MelSpecComputer:
        def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
            self.sr = sr
            self.n_mels = n_mels
            self.fmin = fmin
            self.fmax = fmax
            kwargs["n_fft"] = kwargs.get("n_fft", self.sr//10)
            kwargs["hop_length"] = kwargs.get("hop_length", self.sr//(10*4))
            self.kwargs = kwargs

        def __call__(self, y):

            melspec = lb.feature.melspectrogram(
                y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs,
            )

            melspec = lb.power_to_db(melspec).astype(np.float32)
            return melspec
        
        
    def mono_to_color(X, eps=1e-6, mean=None, std=None):
        mean = mean or X.mean()
        std = std or X.std()
        X = (X - mean) / (std + eps)

        _min, _max = X.min(), X.max()

        if (_max - _min) > eps:
            V = np.clip(X, _min, _max)
            V = 255 * (V - _min) / (_max - _min)
            V = V.astype(np.uint8)
        else:
            V = np.zeros_like(X, dtype=np.uint8)

        return V

    def crop_or_pad(y, length):
        if len(y) < length:
            y = np.concatenate([y, length - np.zeros(len(y))])
        elif len(y) > length:
            y = y[:length]
        return y
    
    
    class BirdCLEFDataset(Dataset):
        def __init__(self, data, sr=SR, n_mels=128, fmin=0, fmax=None, duration=DURATION, step=None, res_type="kaiser_fast", resample=True, tta=True):

            self.data = data

            self.sr = sr
            self.n_mels = n_mels
            self.fmin = fmin
            self.fmax = fmax or self.sr//2

            self.duration = duration
            self.audio_length = self.duration*self.sr
            self.step = step or self.audio_length

            self.tta = tta
            
            self.res_type = res_type
            self.resample = resample

            self.mel_spec_computer = MelSpecComputer(sr=self.sr, n_mels=self.n_mels, fmin=self.fmin,
                                                     fmax=self.fmax)
        def __len__(self):
            return len(self.data)

        @staticmethod
        def normalize(image):
            image = image.astype("float32", copy=False) / 255.0
            image = np.stack([image, image, image])
            return image

        def audio_to_image(self, audio):
            melspec = self.mel_spec_computer(audio) 
            image = mono_to_color(melspec)
            image = self.normalize(image)
            return image

        def read_file(self, filepath):
            audio, orig_sr = sf.read(filepath, dtype="float32")

            if self.resample and orig_sr != self.sr:
                audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)

            audios = []
            for i in range(self.audio_length, len(audio) + self.step, self.step):
                start = max(0, i - self.audio_length)
                end = start + self.audio_length
                
                if self.tta:
                    dummy = ClippingDistortion(
                        min_percentile_threshold = 25, max_percentile_threshold = 25, p = 1.0
                    )(samples = audio[start:end], sample_rate = self.sr)
                    
                    audios.append(dummy)
                    
                else:
                    audios.append(audio[start:end])
                    

            if len(audios[-1]) < self.audio_length:
                audios = audios[:-1]

            images = [self.audio_to_image(audio) for audio in audios]
            images = np.stack(images)

            return images


        def __getitem__(self, idx):
            return self.read_file(self.data.loc[idx, "filepath"])
        
        
    
    def load_net(checkpoint_path,model_name = "", num_classes=NUM_CLASSES):
        
        if model_name == "efficientnet_b0":
            net = CustomModel(model_name=model_name, pretrained=False)
        else:
            net = resnest50(pretrained=False)
            net.fc = nn.Linear(net.fc.in_features, num_classes)
        dummy_device = torch.device("cpu")
        d = torch.load(checkpoint_path, map_location=dummy_device)
        
        
        if model_name != "efficientnet_b0":
            for key in list(d.keys()):
                d[key.replace("model.", "")] = d.pop(key)
        net.load_state_dict(d)
        net = net.to(DEVICE)
        net = net.eval()
        return net
    
        
    test_data = BirdCLEFDataset(data=data)
    print(len(test_data), test_data[0].shape)
    
    
    ckps = [
        Path("../input/kkiller-birdclef-models-public/birdclef_resnest50_fold0_epoch_10_f1_val_06471_20210417161101.pth"),
                        
#         Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold0_epoch_27_f1_07675.pth"),
#         Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold0_epoch_29_f1_07666.pth"),
#         Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold1_epoch_29_f1_07628.pth"),
#         Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold2_epoch_28_f1_07727.pth"),
#         Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold3_epoch_29_f1_07682.pth"),
#         Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold4_epoch_20_f1_07703.pth"),  
    ]


    nets = [
        load_net(checkpoint_path.as_posix()) for checkpoint_path in ckps        
    ]
    
    
    THRESH = 0.15
    
    preds = []
    with torch.no_grad():
        for idx in  tqdm(list(range(len(test_data)))):
            xb = torch.from_numpy(test_data[idx]).to(DEVICE)
            pred = 0.
            for net in nets:
                o = net(xb)
                o = torch.sigmoid(o)
                pred += o

            pred /= len(nets)

#             # add pp here
#             pred = post_process(pred.cpu().numpy(), threshold=THRESH)

            preds.append(pred)
                
    return preds
    

In [None]:
# import os
# os.listdir("../input/birdclef-kkiler-train/efficientnet_b0_sr32000_d7_v1_v1")

In [None]:
test_data = BirdCLEFDataset(data=data, tta=False)
test_data1 = BirdCLEFDataset(data=data, wh=(224,448), tta=False)

print(len(test_data), test_data[0].shape)
print(len(test_data1), test_data1[0].shape)

In [None]:
probs2 = get_preds2(test_data1)

In [None]:
probs1 = get_preds1(test_data)

In [None]:
probs3 = get_preds3()

In [None]:
# len(list(((np.array(probs1) + np.array(probs2)) / 2)))

In [None]:
# pred_probas = list(0.7*np.array(probs1) + 0.3*np.array(probs2))

# pred_probas = list((0.7*np.array(probs1) + 0.3*np.array(probs2) + 0.2*(np.array(probs3))) / 1.2 )

# pred_probas = list((0.7*np.array(probs1) + 0.3*np.array(probs2) + 0.2*(np.array(probs3))) / 1.2 )

pred_probas = list((0.7*np.array(probs1) + 0.3*np.array(probs2) + 0.7*(np.array(probs3))) / 1.7 )


In [None]:
THRESH = 0.35

preds = [get_bird_names(get_thresh_preds(pred, thresh=THRESH)) for pred in pred_probas]
# preds[:2]

In [None]:
pred_probas[0].shape
# torch.Size([120, 397])

In [None]:
def preds_as_df(data, preds):
    sub = {
        "row_id": [],
        "birds": [],
    }
    
    for row, pred in zip(data.itertuples(False), preds):
        row_id = [f"{row.id}_{row.site}_{5*i}" for i in range(1, len(pred)+1)]
        sub["birds"] += pred
        sub["row_id"] += row_id
        
    sub = pd.DataFrame(sub)
    
    if SAMPLE_SUB_PATH:
        sample_sub = pd.read_csv(SAMPLE_SUB_PATH, usecols=["row_id"])
        sub = sample_sub.merge(sub, on="row_id", how="left")
        sub["birds"] = sub["birds"].fillna("nocall")
    return sub

In [None]:
sub1 = preds_as_df(data, preds)
print(sub1.shape)
sub1

In [None]:
sub1.to_csv("submission1.csv", index=False)

# Small validation

In [None]:
def get_metrics(s_true, s_pred):
    s_true = set(s_true.split())
    s_pred = set(s_pred.split())
    n, n_true, n_pred = len(s_true.intersection(s_pred)), len(s_true), len(s_pred)
    
    prec = n/n_pred
    rec = n/n_true
    f1 = 2*prec*rec/(prec + rec) if prec + rec else 0
    
    return {"f1": f1, "prec": prec, "rec": rec, "n_true": n_true, "n_pred": n_pred, "n": n}

In [None]:
if TARGET_PATH:
    sub_target = pd.read_csv(TARGET_PATH)
    sub_target = sub_target.merge(sub1, how="left", on="row_id")
    
    print(sub_target["birds_x"].notnull().sum(), sub_target["birds_x"].notnull().sum())
    assert sub_target["birds_x"].notnull().all()
    assert sub_target["birds_y"].notnull().all()
    
    df_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_target.birds_x, sub_target.birds_y)])
    
    print(df_metrics.mean())

In [None]:
w_nocall = 0.56
sc1 = row_wise_micro_averaged_f1_score(sub_target[sub_target.birds_x=='nocall']['birds_x'], sub_target[sub_target.birds_x=='nocall']['birds_y'])
sc2 = row_wise_micro_averaged_f1_score(sub_target[sub_target.birds_x!='nocall']['birds_x'], sub_target[sub_target.birds_x!='nocall']['birds_y'])

final_score = w_nocall*sc1 + (1-w_nocall)*sc2
final_score, sc1, sc2


print(f"Your LB will be around {final_score} | Other Metrics {sc1}, {sc2}")

In [None]:
# 0.3529

# version 12: Your LB will be around 0.6958094119631681 | Other Metrics 0.9627207325048923, 0.35610409491006456


## Add ioannis full sub kernel here

In [None]:
!pip install ../input/torchlibrosa/torchlibrosa-0.0.9-py2.py3-none-any.whl -q


In [None]:
import os, sys, gc
import random
import time
import warnings
import logging
from tqdm import tqdm

import numpy as np
import pandas as pd
from contextlib import contextmanager
from pathlib import Path
from typing import Optional
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import torchvision
import torchvision.models as M
import timm

import cv2
import audioread
import soundfile as sf
import librosa
import librosa as lb
from torchlibrosa.stft import LogmelFilterBank, Spectrogram
from torchlibrosa.augmentation import SpecAugmentation
from albumentations.core.transforms_interface import ImageOnlyTransform

from evaluations.kaggle_2020 import *
import sklearn.metrics as metrics
from sklearn.metrics import precision_score, recall_score, fbeta_score

from resnest.torch import resnest50
from resnest.torch.resnest import ResNet, Bottleneck

print('Torch version: ', torch.__version__)
print('TIMM version: ', timm.__version__)

In [None]:
def set_seed(seed: int=42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore
    

In [None]:
set_seed(1213)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")
print(device)

In [None]:
class CFG:

    # Model #
    num_classes = 397    # NUM_CLASSES = 397
    in_channels = 1      # todo: try with 3

    n_mels = 128
    n_fft = 2048
    hop_length = 512
    sample_rate = 32000
    fmin = 50
    fmax = 16000

#     melspectrogram_parameters = {
#         "n_mels": n_mels,
#         "fmin": fmin,
#         "fmax": fmax
#     }
    

    seed = 2020        # 1213
    img_size = None    # n_mels

    sr = sample_rate

    # ######################
    # # Data #
    # ######################
    train_csv = "../input/birdclef-2021/train_metadata.csv"
    train_soundscape = "../input/birdclef-2021/train_soundscape_labels.csv"

    
    # todo:  add transf
    ## ['RandomVolume', "PinkNoise", "GaussianNoise",  "PitchShift", "CosineVolume", "TimeStretch"]
    transforms = {
        "train": [{"name": "Normalize"},
                  {"name": "PinkNoise"},
                  {"name": "GaussianNoise"},
                  # {"name": "RandomVolume"},
                  # {"name": "PitchShift"},
                  ],
        "valid": [{"name": "Normalize"}],
        "test": [{"name": "Normalize"}]
    }

    
    
cfg = CFG.__dict__

In [None]:
df = pd.read_csv(CFG.train_csv)  # df_train
BIRD_CODE = {p:i for i,p in enumerate(sorted(df.primary_label.unique()))}             # LABEL_IDS
INV_BIRD_CODE = {i:p for i,p in enumerate(sorted(df.primary_label.unique()))}         # INV_LABEL_IDS
len(BIRD_CODE)

In [None]:
TEST = (len(list(Path("../input/birdclef-2021/test_soundscapes/").glob("*.ogg"))) != 0)
if TEST:
    DATADIR = Path("../input/birdclef-2021/test_soundscapes/")  # TEST_AUDIO_ROOT
else:
    DATADIR = Path("../input/birdclef-2021/train_soundscapes/")

In [None]:
TEST_AUDIO_ROOT = Path("../input/birdclef-2021/test_soundscapes")  ## DATADIR
SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
TARGET_PATH = None
    
if not len(list(TEST_AUDIO_ROOT.glob("*.ogg"))):
    TEST_AUDIO_ROOT = Path("../input/birdclef-2021/train_soundscapes")
    SAMPLE_SUB_PATH = None
    # SAMPLE_SUB_PATH = "../input/birdclef-2021/sample_submission.csv"
    TARGET_PATH = Path("../input/birdclef-2021/train_soundscape_labels.csv")

In [None]:
TARGET_SR = 32000  # SR 

all_audios = list(DATADIR.glob("*.ogg"))
all_audio_ids = ["_".join(audio_id.name.split("_")[:2]) for audio_id in all_audios]

In [None]:
#### -----------------------------------------------------
# **************    Dataset    **************
#### -----------------------------------------------------
def mono_to_color(X, eps=1e-6, mean=None, std=None):
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)

    # Normalize to [0, 255]
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V


def resize(image, size=None):
    if size is not None:
        h, w, _ = image.shape
        new_w, new_h = int(w * size / h), size
        image = cv2.resize(image, (new_w, new_h))

    return image


def normalize(image, mean=None, std=None):
    image = image / 255.0
    if mean is not None and std is not None:
        image = (image - mean) / std
    return np.moveaxis(image, 2, 0).astype(np.float32)


def load_audio(path, sr):
    clip, _ = librosa.load(path, sr=sr, mono=True, res_type="kaiser_fast")
    # clip, _ = sf.read(path)
    return clip



def mono_to_color2(X, eps=1e-6, mean=None, std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V

def crop_or_pad(y, length):
    if len(y) < length:
        y = np.concatenate([y, length - np.zeros(len(y))])
    elif len(y) > length:
        y = y[:length]
    return y



class MelSpecComputer:
    def __init__(self, sr, n_mels, fmin, fmax, **kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax
        kwargs["n_fft"] = kwargs.get("n_fft", self.sr//10)
        kwargs["hop_length"] = kwargs.get("hop_length", self.sr//(10*4))
        self.kwargs = kwargs

    def __call__(self, y):
        melspec = lb.feature.melspectrogram(
            y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs)
        melspec = lb.power_to_db(melspec).astype(np.float32)
        return melspec


    

class TestDataset(Dataset):
    def __init__(self,
                 df: pd.DataFrame,
                 clip: np.ndarray,
                 waveform_transforms=None,
                 in_channels=1
                 ):
        self.df = df
        self.clip = clip
        self.waveform_transforms = waveform_transforms
        self.in_channels = in_channels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        SR = 32000
        sample = self.df.loc[idx, :]
        row_id = sample.row_id

        end_seconds = int(sample.seconds)
        start_seconds = int(end_seconds - 5)

        start_index = SR * start_seconds
        end_index = SR * end_seconds

        y = self.clip[start_index:end_index].astype(np.float32)
        y = np.nan_to_num(y)

        if self.waveform_transforms:
            y = self.waveform_transforms(y)
        y = np.nan_to_num(y)


        return y, row_id


    
######## ===========================================    
### resnest dataset
####### ============================================

# DURATION = 5
# THRESH = 0.25

class TestDataset2(Dataset):
    def __init__(self, 
                 data, 
                 sr=TARGET_SR, 
                 n_mels=128, 
                 fmin=CFG.fmin, 
                 fmax=CFG.fmax, 
                 duration=5, 
                 step=None, 
                 res_type="kaiser_fast", 
                 resample=True
                ):
        
        self.data = data
        self.sr = sr
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax or self.sr//2
        self.duration = duration
        self.audio_length = self.duration*self.sr
        self.step = step or self.audio_length
        self.res_type = res_type
        self.resample = resample

        self.mel_spec_computer = MelSpecComputer(sr=self.sr, n_mels=self.n_mels, fmin=self.fmin,fmax=self.fmax)
    
    def __len__(self):
        return len(self.data)
    
    @staticmethod
    def normalize(image):
        image = image.astype("float32", copy=False) / 255.0
        image = np.stack([image, image, image])
        return image
    
    def audio_to_image(self, audio):
        melspec = self.mel_spec_computer(audio) 
        image = mono_to_color2(melspec)
        image = self.normalize(image)
        return image

    def read_file(self, filepath):
        audio, orig_sr = sf.read(filepath, dtype="float32")

        if self.resample and orig_sr != self.sr:
            audio = lb.resample(audio, orig_sr, self.sr, res_type=self.res_type)
          
        audios = []
        for i in range(self.audio_length, len(audio) + self.step, self.step):
            start = max(0, i - self.audio_length)
            end = start + self.audio_length
            audios.append(audio[start:end])
            
        if len(audios[-1]) < self.audio_length:
            audios = audios[:-1]
            
        images = [self.audio_to_image(audio) for audio in audios]
        images = np.stack(images)
        
        return images
    
        
    def __getitem__(self, idx):
        return self.read_file(self.data.loc[idx, "filepath"])
    
    



In [None]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)



class NewNormalize:
    def __call__(self, y: np.ndarray):
        y_mm = y - y.mean()
        return y_mm / y_mm.abs().max()


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [None]:
## for Resnest50
## https://www.kaggle.com/kneroma/clean-fast-simple-bird-identifier-inference

def predict2(nets, test_data):
    preds = []
    with torch.no_grad():
        for idx in  list(range(len(test_data))):
            xb = torch.from_numpy(test_data[idx]).to(device)
            pred = 0.
            for net in nets:
                o = net(xb)
                o = torch.sigmoid(o)
                pred += o
            pred /= len(nets)
            
#             preds.append(pred)
    return pred # preds


In [None]:
## Jaideep helpers

# import torch.nn.functional as F
# import timm

def get_model_jaid(name, num_classes=397):
    print(name)
    if "resnest" in name:
        model = getattr(resnest_torch, name)(pretrained=True)
    elif "wsl" in name:
        model = torch.hub.load("facebookresearch/WSL-Images", name)
    elif name.startswith("resnext") or  name.startswith("resnet"):
        model = torch.hub.load("pytorch/vision:v0.6.0", name, pretrained=True)
    elif name.startswith("tf_efficientnet_b"):
        model = getattr(timm.models.efficientnet, name)(pretrained=False)
        print('name')
    elif "efficientnet-b" in name:
        model = EfficientNet.from_pretrained(name)
        
    elif name.startswith("tf_mobile") :
        model = getattr(timm.models.mobilenetv3, name)(pretrained=False)
        print('name')     
    
    else:
        model = pretrainedmodels.__dict__[name](pretrained='imagenet')

    if hasattr(model, "fc"):
        nb_ft = model.fc.in_features
        model.fc = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "_fc"):
        nb_ft = model._fc.in_features
        model._fc = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "classifier"):
        nb_ft = model.classifier.in_features
        model.classifier = nn.Linear(nb_ft, num_classes)
    elif hasattr(model, "last_linear"):
        nb_ft = model.last_linear.in_features
        model.last_linear = nn.Linear(nb_ft, num_classes)
    return model



class MishFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x * torch.tanh(F.softplus(x))   # x * tanh(ln(1 + exp(x)))

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_variables[0]
        sigmoid = torch.sigmoid(x)
        tanh_sp = torch.tanh(F.softplus(x)) 
        return grad_output * (tanh_sp + x * sigmoid * (1 - tanh_sp * tanh_sp))

class Mish(nn.Module):
    def forward(self, x):
        return MishFunction.apply(x)

def to_Mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            to_Mish(child)
            

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)


class AdaptiveConcatPool2d(nn.Module):
    "Layer that concats `AdaptiveAvgPool2d` and `AdaptiveMaxPool2d`"
    def __init__(self, size=None):
        super().__init__()
        self.size = size or 1
        self.ap = nn.AdaptiveAvgPool2d(self.size)
        self.mp = nn.AdaptiveMaxPool2d(self.size)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
    
    

    
    
class custom_model_mbnet(nn.Module):
    def __init__(self,n=NUM_CLASSES,name=None):
        super().__init__()
        self.model=get_model_jaid(name)
        self.model.classifier=nn.Linear(1280,n)
        self.group_length=grp_length
    
     
    def forward(self,x):
        if self.group_length !=1:
            n=len(x)
            shape=x[0].shape
            x = torch.stack(x,1).view(-1,shape[1],shape[2],shape[3])
            x=self.model(x)
            shape = x.shape
            x = x.view(-1,n,shape[1],shape[2],shape[3]).permute(0,2,1,3,4).contiguous()\
            .view(-1,shape[1],shape[2]*n,shape[3])
        else:
            x=self.model(x)
        
        return x
         

  

    
    
    
## EffB1 - Jai    
class custom_model_b1(nn.Module):
    def __init__(self, n=CFG.num_classes):
        super().__init__()
        self.model=nn.Sequential((*list(get_model_jaid('tf_efficientnet_b1').children())[:-2]))
        nc=list(get_model_jaid('tf_efficientnet_b1').children())[-5].out_channels
        self.head= nn.Sequential( AdaptiveConcatPool2d(),Flatten(),nn.BatchNorm1d(2*nc ) ,
                                 nn.Linear(2*nc ,512), Mish() ,nn.Dropout(0.3),nn.Linear(512,n))
    def forward(self,x):
        x=self.model(x)
        x=self.head(x)
        return x

    
grp_length=1 #2    
class custom_model_b2(nn.Module):
    def __init__(self, n=CFG.num_classes, name=None):
        super().__init__()
        self.model=nn.Sequential((*list(get_model_jaid('tf_efficientnet_b2_ns').children())[:-2]))
        nc=list(get_model_jaid('tf_efficientnet_b2_ns').children())[-5].out_channels
        self.head= nn.Sequential( AdaptiveConcatPool2d(),Flatten(),nn.BatchNorm1d(2*nc ) ,
                                 nn.Linear(2*nc ,512), torch.nn.SiLU(inplace=True) ,nn.Dropout(0.3),nn.Linear(512,n))
        self.group_length=grp_length
    
     
    def forward(self,x):
        if self.group_length !=1:
            
            n=len(x)
            shape=x[0].shape
            x = torch.stack(x,1).view(-1,shape[1],shape[2],shape[3])
            x=self.model(x)
            shape = x.shape
            x = x.view(-1,n,shape[1],shape[2],shape[3]).permute(0,2,1,3,4).contiguous().view(-1,shape[1],shape[2]*n,shape[3])
        else:
            x=self.model(x)
            x=self.head(x)
        return x    
    

    
class CustomModel_2(nn.Module):
    def __init__(self, model_name='efficientnet_b0', pretrained=True, coord=1):
        super().__init__()
        self.coord = coord
        self.trunk = timm.create_model(model_name, pretrained=pretrained, num_classes=CFG.num_classes, in_chans=3+coord)
        self.do = nn.Dropout2d(0.2)

    def forward(self, x):
        bs, _, freq_bins, time_bins = x.size()
        x_coord = torch.linspace(-1, 1, freq_bins, dtype=x.dtype, device=x.device).view(1,1,-1,1).expand(bs,1,-1,time_bins)
        if self.coord==1: x = torch.cat([x, x_coord], dim=1)
        x = self.do(x)
        x = self.trunk(x)
        return x    
    


## load Resnest_v1 - Ioa
def load_model_weights_resnest(checkpoint_path, num_classes=CFG.num_classes):
    net = resnest50(pretrained=False)
    net.fc = nn.Linear(net.fc.in_features, num_classes)
    dummy_device = torch.device("cpu")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    d = torch.load(checkpoint_path, map_location=dummy_device)
    for key in list(d.keys()):
        d[key.replace("model.", "")] = d.pop(key)
    net.load_state_dict(d)
    net = net.to(device)
    net = net.eval()
    return net



## load Resnest_v3 - Ioa
def load_model_weights_resnest_v3(checkpoint_path, num_classes=CFG.num_classes):
    net = getattr(timm.models, 'resnest50d_1s4x24d')(pretrained=False)
    net.fc = nn.Linear(net.fc.in_features, num_classes)
    dummydevice = torch.device("cpu")
    d = torch.load(checkpoint_path, map_location=dummydevice)
    for key in list(d.keys()):
        d[key.replace("model.", "")] = d.pop(key)
    net.load_state_dict(d)
    net = net.to(device)
    net = net.eval()
    return net



MODEL_CONFIGS = {
    "resnest50_fast_1s1x64d":
    {
        "num_classes": 264,
        "block": Bottleneck,
        "layers": [3, 4, 6, 3],
        "radix": 1,
        "groups": 1,
        "bottleneck_width": 64,
        "deep_stem": True,
        "stem_width": 32,
        "avg_down": True,
        "avd": True,
        "avd_first": True
    }
}


## load Resnest_v6 - Ioa
def load_model_weights_resnest_v6(checkpoint_path, num_classes=CFG.num_classes):
    net = ResNet(**MODEL_CONFIGS["resnest50_fast_1s1x64d"])
    n_features = net.fc.in_features
    net.fc = nn.Linear(n_features, num_classes)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    d = torch.load(checkpoint_path, map_location=device)
    for key in list(d.keys()):
        d[key.replace("model.", "")] = d.pop(key)
    net.load_state_dict(d)
    net = net.to(device)
    net = net.eval()
    return net



## load EffB1 - Jai
def load_net_b1(checkpoint_path, num_classes=CFG.num_classes):
    net = custom_model_b1()
    net.load_state_dict(torch.load(checkpoint_path, map_location=device))
    net = net.to(device)
    net = net.eval()
    return net



## load EffB2 - Jai
def load_net_b2(checkpoint_path, num_classes=CFG.num_classes):
    net = custom_model_b2()
    net.load_state_dict(torch.load(checkpoint_path, map_location=device))
    net = net.to(device)
    net = net.eval()
    return net


def load_net_mbnet(checkpoint_path, num_classes=CFG.num_classes):
    net = custom_model_mbnet(name="tf_mobilenetv3_large_100")
    net.load_state_dict(torch.load(checkpoint_path, map_location=device))
    net = net.to(device)
    net = net.eval()
    return net



## load Densenet121 - Ioa
def load_net2(checkpoint_path, num_classes=CFG.num_classes):
    net = CustomModel(model_name='densenet121', pretrained=False)   
    net.load_state_dict(torch.load(checkpoint_path, map_location=device))
    net = net.to(device)
    net = net.eval()
    return net

## load Rexnet - Ioa
def load_net3(checkpoint_path, num_classes=CFG.num_classes):
    net = CustomModel(model_name="rexnet_100", pretrained=False)   # 
    net.load_state_dict(torch.load(checkpoint_path, map_location=device))
    net = net.to(device)
    net = net.eval()
    return net


## load Effnet - Ioa
def load_net4(checkpoint_path, num_classes=CFG.num_classes):
    net = CustomModel_2(model_name="tf_efficientnet_b3_ns", pretrained=False)   # 
    net.load_state_dict(torch.load(checkpoint_path, map_location=device))
    net = net.to(device)
    net = net.eval()
    return net

In [None]:
## Resnest50_v1 - ioa (kkiller data)

ckp_paths = [
    Path("../input/kkiller-birdclef-models-public/birdclef_resnest50_fold0_epoch_10_f1_val_06471_20210417161101.pth"),
    ####
    Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold0_epoch_27_f1_07675.pth"),
    Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold0_epoch_29_f1_07666.pth"),
    Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold1_epoch_29_f1_07628.pth"),
    Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold2_epoch_28_f1_07727.pth"),
    Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold3_epoch_29_f1_07682.pth"),
    Path(f"../input/bird-models/resnest50_d7_v1/resnest50_fold4_epoch_20_f1_07703.pth"),
]


## Resnest50_v3 - ioa (kkiller data)
ckp_paths_v3 = [
    Path("../input/bird-models/resnest50d_d7_v3/resnest50d_1s4x24d_fold0_epoch_28_f1_07400.pth"),
    Path("../input/bird-models/resnest50d_d7_v3/resnest50d_1s4x24d_fold0_epoch_29_f1_07409.pth"),
]

## Resnest50_v4 - ioa (kkiller data)
ckp_paths_v4 = [
    Path("../input/bird-models/resnest50_d7_v4/resnest50_fold0_epoch_26_f1_07411.pth"),
    Path("../input/bird-models/resnest50_d7_v4/resnest50_fold0_epoch_28_f1_07430.pth"),
]


## Resnest50_v6 - ioa (kkiller data)
ckp_paths_v6 = [
    Path("../input/bird-models/resnest2020_d7_v6/resnest2020_fold0_epoch_17_f1_07340.pth"),
    Path("../input/bird-models/resnest2020_d7_v6/resnest2020_fold0_epoch_18_f1_07316.pth"),
    Path("../input/bird-models/resnest2020_d7_v6/resnest2020_fold0_epoch_21_f1_07344.pth"),
]


## Densenet121 - ioa (rohit data)
ckp_paths_dense = [
    ## d5 - new
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold0_epoch_72_f1_val_06520.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold0_epoch_76_f1_val_06552.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold1_epoch_77_f1_val_06515.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold1_epoch_78_f1_val_06542.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold2_epoch_73_f1_val_06557.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold2_epoch_76_f1_val_06565.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold3_epoch_63_f1_val_06540.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold3_epoch_79_f1_val_06526.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold4_epoch_72_f1_val_06515.pth'),
    Path('../input/bird-models-ioa/densenet121_d5_focal_new/densenet121_fold4_epoch_76_f1_val_06486.pth'),
    ## d7
    Path('../input/bird-models-ioa/densenet121_d7_focal/densenet121_fold0_epoch_73_f1_val_06540.pth'),
    Path('../input/bird-models-ioa/densenet121_d7_focal/densenet121_fold0_epoch_77_f1_val_06538.pth')
]



## Rexnet100 - ioa (rohit data)
ckp_paths_rex = [
    Path('../input/bird-models-ioa/rexnet_100_d7_bce/rexnet_100_fold0_epoch_68_f1_val_06533.pth'),
    Path('../input/bird-models-ioa/rexnet_100_d7_bce/rexnet_100_fold0_epoch_69_f1_val_06516.pth'),
]


## EffB3 - ioa (rohit data)
ckp_paths_eff = [
    Path('../input/bird-models-ioa/tf_efficientnet_b3_ns_d5_bce_224/tf_efficientnet_b3_ns_fold0_epoch_71_f1_val_06342.pth'),
    Path('../input/bird-models-ioa/tf_efficientnet_b3_ns_d5_bce_224/tf_efficientnet_b3_ns_fold0_epoch_77_f1_val_06338.pth'),
    Path('../input/bird-models-ioa/tf_efficientnet_b3_ns_d5_bce_224/tf_efficientnet_b3_ns_fold0_epoch_79_f1_val_06340.pth'),
]



## EffB1 - Jai
ckp_paths_jaid_b1 = [
    Path('../input/bird-clf-models/b1_concat_samp_0.8438136045980652_0_12'),
]

ckp_paths_jaid_b2 = [
    Path('../input/clean-fast-simple-bird-identifier-training-colab/birdclef_b2_effnet_fold0_epoch_16_f1_val_05515_20210519164405.pth'),
    Path('../input/clean-fast-simple-bird-identifier-training-colab/birdclef_b2_effnet_fold1_epoch_39_f1_val_06363_20210520165108.pth'),
]


ckp_paths_jaid_mbnet = [
    Path('../input/jaid-mbnet/mobilenet_v3_sr32000_d5_v1_v1/birdclef_mobilenet_v3_fold2_epoch_37_f1_val_04512_20210524112151.pth'),
    Path('../input/jaid-mbnet1/mobilenet_v3_d5_v1/birdclef_mobilenet_v3_fold0_epoch_59_f1_val_04431_20210524201737.pth'),
#     Path('../input/mbnet-fold3/mobilenet_v3_d5_v1/birdclef_mobilenet_v3_fold1_epoch_51_f1_val_04556_20210525192949.pth')
    Path('..../input/rank2bird-identifier-training/mobilenet_v3_sr32000_d5_v1_v1/birdclef_mobilenet_v3_fold3_epoch_49_f1_val_04422_20210525233716.pth')
]


#### ===================================================================================================

## Resnest Ioann
# models2 = [ load_model_weights_resnest(c.as_posix()) for c in ckp_paths ]

models2 = []


## Eff Jaideep
models2 += [load_net_b1(checkpoint_path.as_posix()) for checkpoint_path in ckp_paths_jaid_b1 ]
models2 += [load_net_b2(checkpoint_path.as_posix()) for checkpoint_path in ckp_paths_jaid_b2 ]
models2 += [load_net_mbnet(checkpoint_path.as_posix()) for checkpoint_path in ckp_paths_jaid_mbnet]

# ## Densenet Ioa
# models2 += [load_net2(checkpoint_path.as_posix()) for checkpoint_path in ckp_paths_dense ]

## Rexnet Ioa
# models2 += [load_net3(checkpoint_path.as_posix()) for checkpoint_path in ckp_paths_rex ]


## EffB3 Ioa
# models2 += [load_net4(checkpoint_path.as_posix()) for checkpoint_path in ckp_paths_eff ]


In [None]:
len(models2)


In [None]:
def inference(test_audio, models, threshold=0.5, device='cpu'): 
    
    pred_dfs = []
    raw_preds = []
    for i, audio_id in enumerate(test_audio):

        seconds = []
        row_ids = []
        for second in range(5, 605, 5):
            row_id = "_".join(audio_id.name.split("_")[:2]) + f"_{second}"
            seconds.append(second)
            row_ids.append(row_id)
            
            
        test_df = pd.DataFrame([(audio_id.stem, *audio_id.stem.split("_"), audio_id)], 
                                columns = ["filename", "id", "site", "date", "filepath"])
        dataset = TestDataset2(data=test_df)
        
        print(f'\n [{i+1}/{len(test_audio)}] Making predictions for audio: {audio_id}  ')  # in {site}
        
        ####### --------------------------
        preds = []
        
        if models is not None:
            ## Resnest50 prediction 
            pred2 = predict2(models, dataset)  ## pred_probas Resnest50
            preds.append(pred2.detach().cpu().numpy())    
    

        ## Mean of all preds 
        preds = np.mean(preds, 0)
        
#         preds_pp = post_process(preds, threshold=threshold)        
#         raw_preds.append(preds_pp)
        raw_preds.append(preds)
            
    return raw_preds


In [None]:


THRES = 0.5       # 0.704208

# sub2, raw_preds = inference(all_audios, models=None, configs=configs, params=CFG, models2=models2, threshold=THRES, device=device)

raw_preds = inference(all_audios, models=models2, threshold=THRES, device=device)


In [None]:
## add Rahul's model here SED

In [None]:
import cv2
import audioread
import logging
import os
import random
import time
import warnings

import librosa
import numpy as np
import pandas as pd
import soundfile as sf
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

from contextlib import contextmanager
from pathlib import Path
from typing import Optional

from albumentations.core.transforms_interface import ImageOnlyTransform
from torchlibrosa.stft import LogmelFilterBank, Spectrogram
from torchlibrosa.augmentation import SpecAugmentation
from tqdm import tqdm

import timm
from timm.models.efficientnet import tf_efficientnet_b2_ns , tf_efficientnet_b0

In [None]:
def get_preds_sed():
    import cv2
    import audioread
    import logging
    import os
    import random
    import time
    import warnings

    import librosa
    import numpy as np
    import pandas as pd
    import soundfile as sf
    import timm
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.utils.data as torchdata

    from contextlib import contextmanager
    from pathlib import Path
    from typing import Optional

    from albumentations.core.transforms_interface import ImageOnlyTransform
    from torchlibrosa.stft import LogmelFilterBank, Spectrogram
    from torchlibrosa.augmentation import SpecAugmentation
    from tqdm import tqdm

    import timm
    from timm.models.efficientnet import tf_efficientnet_b2_ns , tf_efficientnet_b0
    
    
    def set_seed(seed: int = 42):
        random.seed(seed)
        np.random.seed(seed)
        os.environ["PYTHONHASHSEED"] = str(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)  # type: ignore
        torch.backends.cudnn.deterministic = True  # type: ignore
        torch.backends.cudnn.benchmark = True  # type: ignore
    
    
    def get_logger(out_file=None):
        logger = logging.getLogger()
        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
        logger.handlers = []
        logger.setLevel(logging.INFO)

        handler = logging.StreamHandler()
        handler.setFormatter(formatter)
        handler.setLevel(logging.INFO)
        logger.addHandler(handler)

        if out_file is not None:
            fh = logging.FileHandler(out_file)
            fh.setFormatter(formatter)
            fh.setLevel(logging.INFO)
            logger.addHandler(fh)
        logger.info("logger set up")
        return logger


    @contextmanager
    def timer(name: str, logger: Optional[logging.Logger] = None):
        t0 = time.time()
        msg = f"[{name}] start"
        if logger is None:
            print(msg)
        else:
            logger.info(msg)
        yield

        msg = f"[{name}] done in {time.time() - t0:.2f} s"
        if logger is None:
            print(msg)
        else:
            logger.info(msg)
    
    
    logger = get_logger("main.log")
    set_seed(1213)
    
    
    class CFG:
        ######################
        # Globals #
        ######################
        seed = 1213
        epochs = 35
        train = True
        folds = [0]
        img_size = 224
        main_metric = "epoch_f1_at_05"
        minimize_metric = False

        ######################
        # Data #
        ######################
        train_datadir = Path("../input/birdclef-2021/train_short_audio")
        train_csv = "../input/birdclef-2021/train_metadata.csv"
        train_soundscape = "../input/birdclef-2021/train_soundscape_labels.csv"

        ######################
        # Dataset #
        ######################
        transforms = {
            "train": [{"name": "Normalize"}],
            "valid": [{"name": "Normalize"}],
            "test": [{"name": "Normalize"}]
        }
        period = 10
        n_mels = 256
        fmin = 40
        fmax = 16000
        n_fft = 1024
        hop_length = 320
        sample_rate = 32000
        melspectrogram_parameters = {
            "n_mels": 224,
            "fmin": 20,
            "fmax": 16000
        }

        target_columns = [
            'acafly', 'acowoo', 'aldfly', 'ameavo', 'amecro',
            'amegfi', 'amekes', 'amepip', 'amered', 'amerob',
            'amewig', 'amtspa', 'andsol1', 'annhum', 'astfly',
            'azaspi1', 'babwar', 'baleag', 'balori', 'banana',
            'banswa', 'banwre1', 'barant1', 'barswa', 'batpig1',
            'bawswa1', 'bawwar', 'baywre1', 'bbwduc', 'bcnher',
            'belkin1', 'belvir', 'bewwre', 'bkbmag1', 'bkbplo',
            'bkbwar', 'bkcchi', 'bkhgro', 'bkmtou1', 'bknsti', 'blbgra1',
            'blbthr1', 'blcjay1', 'blctan1', 'blhpar1', 'blkpho',
            'blsspa1', 'blugrb1', 'blujay', 'bncfly', 'bnhcow', 'bobfly1',
            'bongul', 'botgra', 'brbmot1', 'brbsol1', 'brcvir1', 'brebla',
            'brncre', 'brnjay', 'brnthr', 'brratt1', 'brwhaw', 'brwpar1',
            'btbwar', 'btnwar', 'btywar', 'bucmot2', 'buggna', 'bugtan',
            'buhvir', 'bulori', 'burwar1', 'bushti', 'butsal1', 'buwtea',
            'cacgoo1', 'cacwre', 'calqua', 'caltow', 'cangoo', 'canwar',
            'carchi', 'carwre', 'casfin', 'caskin', 'caster1', 'casvir',
            'categr', 'ccbfin', 'cedwax', 'chbant1', 'chbchi', 'chbwre1',
            'chcant2', 'chispa', 'chswar', 'cinfly2', 'clanut', 'clcrob',
            'cliswa', 'cobtan1', 'cocwoo1', 'cogdov', 'colcha1', 'coltro1',
            'comgol', 'comgra', 'comloo', 'commer', 'compau', 'compot1',
            'comrav', 'comyel', 'coohaw', 'cotfly1', 'cowscj1', 'cregua1',
            'creoro1', 'crfpar', 'cubthr', 'daejun', 'dowwoo', 'ducfly', 'dusfly',
            'easblu', 'easkin', 'easmea', 'easpho', 'eastow', 'eawpew', 'eletro',
            'eucdov', 'eursta', 'fepowl', 'fiespa', 'flrtan1', 'foxspa', 'gadwal',
            'gamqua', 'gartro1', 'gbbgul', 'gbwwre1', 'gcrwar', 'gilwoo',
            'gnttow', 'gnwtea', 'gocfly1', 'gockin', 'gocspa', 'goftyr1',
            'gohque1', 'goowoo1', 'grasal1', 'grbani', 'grbher3', 'grcfly',
            'greegr', 'grekis', 'grepew', 'grethr1', 'gretin1', 'greyel',
            'grhcha1', 'grhowl', 'grnher', 'grnjay', 'grtgra', 'grycat',
            'gryhaw2', 'gwfgoo', 'haiwoo', 'heptan', 'hergul', 'herthr',
            'herwar', 'higmot1', 'hofwoo1', 'houfin', 'houspa', 'houwre',
            'hutvir', 'incdov', 'indbun', 'kebtou1', 'killde', 'labwoo', 'larspa',
            'laufal1', 'laugul', 'lazbun', 'leafly', 'leasan', 'lesgol', 'lesgre1',
            'lesvio1', 'linspa', 'linwoo1', 'littin1', 'lobdow', 'lobgna5', 'logshr',
            'lotduc', 'lotman1', 'lucwar', 'macwar', 'magwar', 'mallar3', 'marwre',
            'mastro1', 'meapar', 'melbla1', 'monoro1', 'mouchi', 'moudov', 'mouela1',
            'mouqua', 'mouwar', 'mutswa', 'naswar', 'norcar', 'norfli', 'normoc', 'norpar',
            'norsho', 'norwat', 'nrwswa', 'nutwoo', 'oaktit', 'obnthr1', 'ocbfly1',
            'oliwoo1', 'olsfly', 'orbeup1', 'orbspa1', 'orcpar', 'orcwar', 'orfpar',
            'osprey', 'ovenbi1', 'pabspi1', 'paltan1', 'palwar', 'pasfly', 'pavpig2',
            'phivir', 'pibgre', 'pilwoo', 'pinsis', 'pirfly1', 'plawre1', 'plaxen1',
            'plsvir', 'plupig2', 'prowar', 'purfin', 'purgal2', 'putfru1', 'pygnut',
            'rawwre1', 'rcatan1', 'rebnut', 'rebsap', 'rebwoo', 'redcro', 'reevir1',
            'rehbar1', 'relpar', 'reshaw', 'rethaw', 'rewbla', 'ribgul', 'rinkin1',
            'roahaw', 'robgro', 'rocpig', 'rotbec', 'royter1', 'rthhum', 'rtlhum',
            'ruboro1', 'rubpep1', 'rubrob', 'rubwre1', 'ruckin', 'rucspa1', 'rucwar',
            'rucwar1', 'rudpig', 'rudtur', 'rufhum', 'rugdov', 'rumfly1', 'runwre1',
            'rutjac1', 'saffin', 'sancra', 'sander', 'savspa', 'saypho', 'scamac1',
            'scatan', 'scbwre1', 'scptyr1', 'scrtan1', 'semplo', 'shicow', 'sibtan2',
            'sinwre1', 'sltred', 'smbani', 'snogoo', 'sobtyr1', 'socfly1', 'solsan',
            'sonspa', 'soulap1', 'sposan', 'spotow', 'spvear1', 'squcuc1', 'stbori',
            'stejay', 'sthant1', 'sthwoo1', 'strcuc1', 'strfly1', 'strsal1', 'stvhum2',
            'subfly', 'sumtan', 'swaspa', 'swathr', 'tenwar', 'thbeup1', 'thbkin',
            'thswar1', 'towsol', 'treswa', 'trogna1', 'trokin', 'tromoc', 'tropar',
            'tropew1', 'tuftit', 'tunswa', 'veery', 'verdin', 'vigswa', 'warvir',
            'wbwwre1', 'webwoo1', 'wegspa1', 'wesant1', 'wesblu', 'weskin', 'wesmea',
            'westan', 'wewpew', 'whbman1', 'whbnut', 'whcpar', 'whcsee1', 'whcspa',
            'whevir', 'whfpar1', 'whimbr', 'whiwre1', 'whtdov', 'whtspa', 'whwbec1',
            'whwdov', 'wilfly', 'willet1', 'wilsni1', 'wiltur', 'wlswar', 'wooduc',
            'woothr', 'wrenti', 'y00475', 'yebcha', 'yebela1', 'yebfly', 'yebori1',
            'yebsap', 'yebsee1', 'yefgra1', 'yegvir', 'yehbla', 'yehcar1', 'yelgro',
            'yelwar', 'yeofly1', 'yerwar', 'yeteup1', 'yetvir']

        ######################
        # Loaders #
        ######################
        loader_params = {
            "train": {
                "batch_size": 64,
                "num_workers": 20,
                "shuffle": True
            },
            "valid": {
                "batch_size": 64,
                "num_workers": 20,
                "shuffle": False
            },
            "test": {
                "batch_size": 64,
                "num_workers": 20,
                "shuffle": False
            }
        }

        ######################
        # Split #
        ######################
        split = "StratifiedKFold"
        split_params = {
            "n_splits": 5,
            "shuffle": True,
            "random_state": 1213
        }

        ######################
        # Model #
        ######################
        base_model_name = "tf_efficientnet_b0_ns"
        pooling = "max"
        pretrained = True
        num_classes = 397
        in_channels = 1

        ######################
        # Criterion #
        ######################
        loss_name = "BCEFocal2WayLoss"
        loss_params: dict = {}

        ######################
        # Optimizer #
        ######################
        optimizer_name = "Adam"
        base_optimizer = "Adam"
        optimizer_params = {
            "lr": 0.001
        }
        # For SAM optimizer
        base_optimizer = "Adam"

        ######################
        # Scheduler #
        ######################
        scheduler_name = "CosineAnnealingLR"
        scheduler_params = {
            "T_max": 10
        }
      
    class CFG_2:
            period = 30
            n_mels = 128
            fmin = 40
            fmax = 16000
            n_fft = 2048
            hop_length = 256
            sample_rate = 32000
        
    TARGET_SR = 32000
    TEST = (len(list(Path("../input/birdclef-2021/test_soundscapes/").glob("*.ogg"))) != 0)
    if TEST:
        DATADIR = Path("../input/birdclef-2021/test_soundscapes/")
    else:
        DATADIR = Path("../input/birdclef-2021/train_soundscapes/")   
        
        
    all_audios = list(DATADIR.glob("*.ogg"))
    all_audio_ids = ["_".join(audio_id.name.split("_")[:2]) for audio_id in all_audios]
    submission_df = pd.DataFrame({
        "row_id": all_audio_ids
    })
    submission_df
    
    from functools import partial
    encoder_params = {
        "tf_efficientnet_b1_ns": {
            "features": 1280,
            "init_op": partial(tf_efficientnet_b0, pretrained=False, drop_path_rate=0.2)
        }
    }
    
    
    def init_layer(layer):
        nn.init.xavier_uniform_(layer.weight)

        if hasattr(layer, "bias"):
            if layer.bias is not None:
                layer.bias.data.fill_(0.)


    def init_bn(bn):
        bn.bias.data.fill_(0.)
        bn.weight.data.fill_(1.0)


    def init_weights(model):
        classname = model.__class__.__name__
        if classname.find("Conv2d") != -1:
            nn.init.xavier_uniform_(model.weight, gain=np.sqrt(2))
            model.bias.data.fill_(0)
        elif classname.find("BatchNorm") != -1:
            model.weight.data.normal_(1.0, 0.02)
            model.bias.data.fill_(0)
        elif classname.find("GRU") != -1:
            for weight in model.parameters():
                if len(weight.size()) > 1:
                    nn.init.orghogonal_(weight.data)
        elif classname.find("Linear") != -1:
            model.weight.data.normal_(0, 0.01)
            model.bias.data.zero_()


    def do_mixup(x: torch.Tensor, mixup_lambda: torch.Tensor):
        """Mixup x of even indexes (0, 2, 4, ...) with x of odd indexes
        (1, 3, 5, ...).
        Args:
          x: (batch_size * 2, ...)
          mixup_lambda: (batch_size * 2,)
        Returns:
          out: (batch_size, ...)
        """
        out = (x[0::2].transpose(0, -1) * mixup_lambda[0::2] +
               x[1::2].transpose(0, -1) * mixup_lambda[1::2]).transpose(0, -1)
        return out


        class Mixup(object):
            def __init__(self, mixup_alpha, random_seed=1234):
                """Mixup coefficient generator.
                """
                self.mixup_alpha = mixup_alpha
                self.random_state = np.random.RandomState(random_seed)

            def get_lambda(self, batch_size):
                """Get mixup random coefficients.
                Args:
                  batch_size: int
                Returns:
                  mixup_lambdas: (batch_size,)
                """
                mixup_lambdas = []
                for n in range(0, batch_size, 2):
                    lam = self.random_state.beta(
                        self.mixup_alpha, self.mixup_alpha, 1)[0]
                    mixup_lambdas.append(lam)
                    mixup_lambdas.append(1. - lam)

                return torch.from_numpy(np.array(mixup_lambdas, dtype=np.float32))



    def interpolate(x: torch.Tensor, ratio: int):
        """Interpolate data in time domain. This is used to compensate the
        resolution reduction in downsampling of a CNN.
        Args:
          x: (batch_size, time_steps, classes_num)
          ratio: int, ratio to interpolate
        Returns:
          upsampled: (batch_size, time_steps * ratio, classes_num)
        """
        (batch_size, time_steps, classes_num) = x.shape
        upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
        upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)
        return upsampled


    def pad_framewise_output(framewise_output: torch.Tensor, frames_num: int):
        """Pad framewise_output to the same length as input frames. The pad value
        is the same as the value of the last frame.
        Args:
          framewise_output: (batch_size, frames_num, classes_num)
          frames_num: int, number of frames to pad
        Outputs:
          output: (batch_size, frames_num, classes_num)
        """
        pad = framewise_output[:, -1:, :].repeat(
            1, frames_num - framewise_output.shape[1], 1)
        """tensor for padding"""

        output = torch.cat((framewise_output, pad), dim=1)
        """(batch_size, frames_num, classes_num)"""

        return output


    def gem(x: torch.Tensor, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1. / p)


    class GeM(nn.Module):
        def __init__(self, p=3, eps=1e-6):
            super().__init__()
            self.p = nn.Parameter(torch.ones(1) * p)
            self.eps = eps

        def forward(self, x):
            return gem(x, p=self.p, eps=self.eps)

        def __repr__(self):
            return self.__class__.__name__ + f"(p={self.p.data.tolist()[0]:.4f}, eps={self.eps})"





    class AttBlock(nn.Module):
        def __init__(self,
                     in_features: int,
                     out_features: int,
                     activation="linear",
                     temperature=1.0):
            super().__init__()

            self.activation = activation
            self.temperature = temperature
            self.att = nn.Conv1d(
                in_channels=in_features,
                out_channels=out_features,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True)
            self.cla = nn.Conv1d(
                in_channels=in_features,
                out_channels=out_features,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True)

            self.bn_att = nn.BatchNorm1d(out_features)
            self.init_weights()

        def init_weights(self):
            init_layer(self.att)
            init_layer(self.cla)
            init_bn(self.bn_att)

        def forward(self, x):
            # x: (n_samples, n_in, n_time)
            norm_att = torch.softmax(torch.clamp(self.att(x), -10, 10), dim=-1)
            cla = self.nonlinear_transform(self.cla(x))
            x = torch.sum(norm_att * cla, dim=2)
            return x, norm_att, cla

        def nonlinear_transform(self, x):
            if self.activation == 'linear':
                return x
            elif self.activation == 'sigmoid':
                return torch.sigmoid(x)    

    class TimmSED(nn.Module):
        def __init__(self, base_model_name: str, pretrained=False, num_classes=24, in_channels=1):
            super().__init__()
            # Spectrogram extractor
            self.spectrogram_extractor = Spectrogram(n_fft=CFG_2.n_fft, hop_length=CFG_2.hop_length,
                                                     win_length=CFG_2.n_fft, window="hann", center=True, pad_mode="reflect",
                                                     freeze_parameters=True)

            # Logmel feature extractor
            self.logmel_extractor = LogmelFilterBank(sr=CFG_2.sample_rate, n_fft=CFG_2.n_fft,
                                                     n_mels=CFG_2.n_mels, fmin=CFG_2.fmin, fmax=CFG_2.fmax, ref=1.0, amin=1e-10, top_db=None,
                                                     freeze_parameters=True)

            # Spec augmenter
            self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
                                                   freq_drop_width=8, freq_stripes_num=2)

            self.bn0 = nn.BatchNorm2d(CFG_2.n_mels)

            self.encoder = encoder_params["tf_efficientnet_b1_ns"]["init_op"]()
            self.fc1 = nn.Linear(encoder_params["tf_efficientnet_b1_ns"]["features"], 2048, bias=True)

            self.att_block = AttBlock(
                2048, num_classes, activation="sigmoid")

            self.init_weight()

        def init_weight(self):
            init_layer(self.fc1)
            init_bn(self.bn0)


        def preprocess(self, input, mixup_lambda=None):
            # t1 = time.time()
            x = self.spectrogram_extractor(input)  # (batch_size, 1, time_steps, freq_bins)
            x = self.logmel_extractor(x)  # (batch_size, 1, time_steps, mel_bins)

            frames_num = x.shape[2]

            x = x.transpose(1, 3)
            x = self.bn0(x)
            x = x.transpose(1, 3)

            if self.training:
                x = self.spec_augmenter(x)

            # Mixup on spectrogram
            return x



        def forward(self, input):
            # (batch_size, 1, time_steps, freq_bins)
            x = self.spectrogram_extractor(input)
            x = self.logmel_extractor(x)    # (batch_size, 1, time_steps, mel_bins)

            frames_num = x.shape[2]

            x = x.transpose(1, 3)
            x = self.bn0(x)
            x = x.transpose(1, 3)

            if self.training:
                x = self.spec_augmenter(x)

    #         x = x.transpose(2, 3)

            x = x.expand(x.shape[0], 3, x.shape[2], x.shape[3])
            #print(x.shape)
            x = self.encoder.forward_features(x)

            # (batch_size, channels, frames)
            x = torch.mean(x, dim=3)

            # channel smoothing
            x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
            x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
            x = x1 + x2

            x = F.dropout(x, p=0.5, training=self.training)
            x = x.transpose(1, 2)
            x = F.relu_(self.fc1(x))
            x = x.transpose(1, 2)
            x = F.dropout(x, p=0.5, training=self.training)
            (clipwise_output, norm_att, segmentwise_output) = self.att_block(x)
            logit = torch.sum(norm_att * self.att_block.cla(x), dim=2)
            segmentwise_logit = self.att_block.cla(x).transpose(1, 2)
            segmentwise_output = segmentwise_output.transpose(1, 2)

            interpolate_ratio = frames_num // segmentwise_output.size(1)

            # Get framewise output
            framewise_output = interpolate(segmentwise_output,
                                           interpolate_ratio)
            framewise_output = pad_framewise_output(framewise_output, frames_num)

            framewise_logit = interpolate(segmentwise_logit, interpolate_ratio)
            framewise_logit = pad_framewise_output(framewise_logit, frames_num)

            output_dict = {
                "framewise_output": framewise_output,
                "segmentwise_output": segmentwise_output,
                "logit": logit,
                "framewise_logit": framewise_logit,
                "clipwise_output": clipwise_output
            }

            return output_dict

    class AttBlock_cnn(nn.Module):
        def __init__(self,
                     in_features: int,
                     out_features: int,
                     activation="linear",
                     temperature=1.0):
            super().__init__()

            self.activation = activation
            self.temperature = temperature
            self.att = nn.Conv1d(
                in_channels=in_features,
                out_channels=out_features,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True)
            self.cla = nn.Conv1d(
                in_channels=in_features,
                out_channels=out_features,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=True)

            self.bn_att = nn.BatchNorm1d(out_features)
            self.init_weights()

        def init_weights(self):
            init_layer(self.att)
            init_layer(self.cla)
            init_bn(self.bn_att)

        def forward(self, x):
            # x: (n_samples, n_in, n_time)
            norm_att = torch.softmax(torch.clamp(self.att(x), -10, 10), dim=-1)
            cla = self.nonlinear_transform(self.cla(x))
            x = torch.sum(norm_att * cla, dim=2)
            return x, norm_att, cla

        def nonlinear_transform(self, x):
            if self.activation == 'linear':
                return x
            elif self.activation == 'sigmoid':
                return torch.sigmoid(x) 

    class CNNSED(nn.Module):
        def __init__(self, base_model_name: str, pretrained=False, num_classes=24, in_channels=1):
            super().__init__()
            # Spectrogram extractor
            self.spectrogram_extractor = Spectrogram(n_fft=CFG.n_fft, hop_length=CFG.hop_length,
                                                     win_length=CFG.n_fft, window="hann", center=True, pad_mode="reflect",
                                                     freeze_parameters=True)

            # Logmel feature extractor
            self.logmel_extractor = LogmelFilterBank(sr=CFG.sample_rate, n_fft=CFG.n_fft,
                                                     n_mels=CFG.n_mels, fmin=CFG.fmin, fmax=CFG.fmax, ref=1.0, amin=1e-10, top_db=None,
                                                     freeze_parameters=True)

            # Spec augmenter
            self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
                                                   freq_drop_width=8, freq_stripes_num=2)

            self.bn0 = nn.BatchNorm2d(CFG.n_mels)

            self.encoder = encoder_params_2["tf_efficientnet_b1_ns"]["init_op"]()
            self.fc1 = nn.Linear(encoder_params_2["tf_efficientnet_b1_ns"]["features"], 2048, bias=True)

            self.att_block = AttBlock(
                2048, num_classes, activation="sigmoid")

            self.init_weight()

        def init_weight(self):
            init_layer(self.fc1)
            init_bn(self.bn0)


        def preprocess(self, input, mixup_lambda=None):
            # t1 = time.time()
            x = self.spectrogram_extractor(input)  # (batch_size, 1, time_steps, freq_bins)
            x = self.logmel_extractor(x)  # (batch_size, 1, time_steps, mel_bins)

            frames_num = x.shape[2]

            x = x.transpose(1, 3)
            x = self.bn0(x)
            x = x.transpose(1, 3)

            if self.training:
                x = self.spec_augmenter(x)

            # Mixup on spectrogram
            return x



        def forward(self, input):
            # (batch_size, 1, time_steps, freq_bins)
            x = self.spectrogram_extractor(input)
            x = self.logmel_extractor(x)    # (batch_size, 1, time_steps, mel_bins)

            frames_num = x.shape[2]

            x = x.transpose(1, 3)
            x = self.bn0(x)
            x = x.transpose(1, 3)

            if self.training:
                x = self.spec_augmenter(x)

    #         x = x.transpose(2, 3)

            x = x.expand(x.shape[0], 3, x.shape[2], x.shape[3])
            #print(x.shape)
            x = self.encoder.forward_features(x)

            # (batch_size, channels, frames)
            x = torch.mean(x, dim=3)

            # channel smoothing
            x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
            x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
            x = x1 + x2

            x = F.dropout(x, p=0.5, training=self.training)
            x = x.transpose(1, 2)
            x = F.relu_(self.fc1(x))
            x = x.transpose(1, 2)
            x = F.dropout(x, p=0.5, training=self.training)
            (clipwise_output, norm_att, segmentwise_output) = self.att_block(x)
            logit = torch.sum(norm_att * self.att_block.cla(x), dim=2)
            segmentwise_logit = self.att_block.cla(x).transpose(1, 2)
            segmentwise_output = segmentwise_output.transpose(1, 2)

            interpolate_ratio = frames_num // segmentwise_output.size(1)

            # Get framewise output
            framewise_output = interpolate(segmentwise_output,
                                           interpolate_ratio)
            framewise_output = pad_framewise_output(framewise_output, frames_num)

            framewise_logit = interpolate(segmentwise_logit, interpolate_ratio)
            framewise_logit = pad_framewise_output(framewise_logit, frames_num)

            output_dict = {
                "framewise_output": framewise_output,
                "segmentwise_output": segmentwise_output,
                "logit": logit,
                "framewise_logit": framewise_logit,
                "clipwise_output": clipwise_output
            }

            return output_dict




    encoder_params_2 = {
        "tf_efficientnet_b1_ns": {
            "features": 1408,
            "init_op": partial(tf_efficientnet_b2_ns, pretrained=False, drop_path_rate=0.2)
        }
    }    
    
    
    class TestDataset(torchdata.Dataset):
        def __init__(self, df: pd.DataFrame, clip: np.ndarray,
                     waveform_transforms=None):
            self.df = df
            self.clip = clip
            self.waveform_transforms=waveform_transforms

        def __len__(self):
            return len(self.df)

        def __getitem__(self, idx: int):
            SR = 32000
            sample = self.df.loc[idx, :]
            row_id = sample.row_id

            end_seconds = int(sample.seconds)
            start_seconds = max(0,int(end_seconds - 5))

            start_index = SR * start_seconds
            end_index = SR * end_seconds

            y = self.clip[start_index:end_index].astype(np.float32)

            y = np.nan_to_num(y)

            if self.waveform_transforms:
                y = self.waveform_transforms(y)

            y = np.nan_to_num(y)

            return y, row_id
        
        
    def get_transforms(phase: str):
        transforms = CFG.transforms
        if transforms is None:
            return None
        else:
            if transforms[phase] is None:
                return None
            trns_list = []
            for trns_conf in transforms[phase]:
                trns_name = trns_conf["name"]
                trns_params = {} if trns_conf.get("params") is None else \
                    trns_conf["params"]
                if globals().get(trns_name) is not None:
                    trns_cls = globals()[trns_name]
                    trns_list.append(trns_cls(**trns_params))

            if len(trns_list) > 0:
                return Compose(trns_list)
            else:
                return None


    def get_waveform_transforms(config: dict, phase: str):
        return get_transforms(config, phase)


    def get_spectrogram_transforms(config: dict, phase: str):
        transforms = config.get('spectrogram_transforms')
        if transforms is None:
            return None
        else:
            if transforms[phase] is None:
                return None
            trns_list = []
            for trns_conf in transforms[phase]:
                trns_name = trns_conf["name"]
                trns_params = {} if trns_conf.get("params") is None else \
                    trns_conf["params"]
                if hasattr(A, trns_name):
                    trns_cls = A.__getattribute__(trns_name)
                    trns_list.append(trns_cls(**trns_params))
                else:
                    trns_cls = globals().get(trns_name)
                    if trns_cls is not None:
                        trns_list.append(trns_cls(**trns_params))

            if len(trns_list) > 0:
                return A.Compose(trns_list, p=1.0)
            else:
                return None
            
            
    def prepare_model_for_inference(model, path: Path):
        if not torch.cuda.is_available():
            ckpt = torch.load(path, map_location="cpu")
        else:
            ckpt = torch.load(path)
        model.load_state_dict(ckpt["model_state_dict"])
        model.eval()
        return model
    
    
    def prediction_for_clip(test_df: pd.DataFrame, 
                        clip: np.ndarray, 
                        model,model_2,model_3,model_4,model_5,
                        threshold=0.5):

        dataset = TestDataset(df=test_df, 
                              clip=clip,
                              waveform_transforms=get_transforms(phase="test"))
        loader = torchdata.DataLoader(dataset, batch_size=1, shuffle=False)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model_2.eval()
        model_3.eval()
        model_4.eval()
        model.eval()
        model_5.eval()
        prediction_dict = {}
        probas = []
        for image, row_id in tqdm(loader):
            row_id = row_id[0]
            image = image.to(device)

            with torch.no_grad():
                prediction = model(image)
                prediction_2 =model_2(image)
                prediction_3 =model_3(image)
                prediction_4 =model_4(image)
                prediction_5 =model_5(image)
                preds= ( 0.2* prediction["clipwise_output"] + 0.2*prediction_2["clipwise_output"]+
                       0.2* prediction_3["clipwise_output"] + 0.2*prediction_4["clipwise_output"] +
                        0.2* prediction_5["clipwise_output"]
                       )
                
                proba = preds.detach().cpu().numpy().reshape(-1)
                
                
            events = proba >= threshold
            labels = np.argwhere(events).reshape(-1).tolist()

            probas.append(proba)
#             print(events.shape)
            
            if len(labels) == 0:
                prediction_dict[row_id] = "nocall"
            else:
                labels_str_list = list(map(lambda x: CFG.target_columns[x], labels))
                label_string = " ".join(labels_str_list)
                prediction_dict[row_id] = label_string
        return prediction_dict, probas
    
    
    def prediction(test_audios,
               weights_path: Path,
               threshold=0.5):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model_5 = TimmSED(base_model_name=CFG.base_model_name,
                        pretrained=False,
                        num_classes=CFG.num_classes,
                        in_channels=CFG.in_channels).cuda()
        
        model = CNNSED(base_model_name=CFG.base_model_name,
                        pretrained=False,
                        num_classes=CFG.num_classes,
                        in_channels=CFG.in_channels).cuda()
        model = prepare_model_for_inference(model, weights_path).to(device)
        model_2 = CNNSED(base_model_name=CFG.base_model_name,
                        pretrained=False,
                        num_classes=CFG.num_classes,
                        in_channels=CFG.in_channels).cuda()
        model_3 = CNNSED(base_model_name=CFG.base_model_name,
                        pretrained=False,
                        num_classes=CFG.num_classes,
                        in_channels=CFG.in_channels).cuda()

        model_4 = CNNSED(base_model_name=CFG.base_model_name,
                        pretrained=False,
                        num_classes=CFG.num_classes,
                        in_channels=CFG.in_channels).cuda()

        model_2.load_state_dict(torch.load("../input/birdfold2/best.pth")["model_state_dict"])
        model_3.load_state_dict(torch.load("../input/b2-fold3/checkpoints/best.pth")["model_state_dict"])
        model_4.load_state_dict(torch.load("../input/b2-fold4/checkpoints/best.pth")["model_state_dict"])
        model_5.load_state_dict(torch.load("../input/bird-30s-effb0/checkpoints/best.pth")["model_state_dict"])
        warnings.filterwarnings("ignore")
        prediction_dfs = []
        probs = []
        for audio_path in test_audios:
            with timer(f"Loading {str(audio_path)}", logger):
                clip, _ = sf.read(audio_path)

            seconds = []
            row_ids = []
            for second in range(5, 605, 5):
                row_id = "_".join(audio_path.name.split("_")[:2]) + f"_{second}"
                seconds.append(second)
                row_ids.append(row_id)

            test_df = pd.DataFrame({
                "row_id": row_ids,
                "seconds": seconds
            })
            with timer(f"Prediction on {audio_path}", logger):
                prediction_dict, events = prediction_for_clip(test_df,
                                                      clip=clip,
                                                      model=model,model_2=model_2,model_3=model_3,model_4=model_4,model_5=model_5,
                                                      threshold=threshold)
                
                
            
            row_id = list(prediction_dict.keys())
            birds = list(prediction_dict.values())
            prediction_df = pd.DataFrame({
                "row_id": row_id,
                "birds": birds
            })
            prediction_dfs.append(prediction_df)
            
            probs.append(events)
            

        prediction_df = pd.concat(prediction_dfs, axis=0, sort=False).reset_index(drop=True)
        return prediction_df, np.concatenate(np.array(probs), axis = 0)
    
    
    weights_path = Path("../input/b2-fold1/checkpoints/best.pth")
    submission, preds_raw = prediction(test_audios=all_audios,
                            weights_path=weights_path,
                            threshold=0.6)

    
    return preds_raw

In [None]:
probs_sed = get_preds_sed()

In [None]:
## ioannis exp 21: CV 0.8


### global vars timer, logger, all_audios

## Effnet-B0-ap 1x128x854 BCE Folds 0 - Ioannis
def get_preds_exp21():

    ##
    ## conda env: 192.168.2.200 dev_ime_fastai (Work server)

    import os, random, gc
    import re, time, json
    from ast import literal_eval
    import numpy as np
    import pandas as pd
#     import torchsummary
    from matplotlib import pyplot as plt
    from  sklearn.model_selection  import StratifiedKFold
    from sklearn.metrics import label_ranking_average_precision_score
    from tqdm import tqdm
    import joblib
    import librosa as lb
    import librosa.display as lbd
    import soundfile as sf
    from soundfile import SoundFile
    from pathlib import Path
    import torch
    from torch import nn, optim
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader
    import timm
    import warnings

    #%% Inference helpers from SED


    def set_seed(seed: int = 42):
        random.seed(seed)
        np.random.seed(seed)
        os.environ["PYTHONHASHSEED"] = str(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)  # type: ignore
        torch.backends.cudnn.deterministic = True  # type: ignore
        torch.backends.cudnn.benchmark = True  # type: ignore
    
    
    def get_logger(out_file=None):
        logger = logging.getLogger()
        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
        logger.handlers = []
        logger.setLevel(logging.INFO)

        handler = logging.StreamHandler()
        handler.setFormatter(formatter)
        handler.setLevel(logging.INFO)
        logger.addHandler(handler)

        if out_file is not None:
            fh = logging.FileHandler(out_file)
            fh.setFormatter(formatter)
            fh.setLevel(logging.INFO)
            logger.addHandler(fh)
        logger.info("logger set up")
        return logger


    @contextmanager
    def timer(name: str, logger: Optional[logging.Logger] = None):
        t0 = time.time()
        msg = f"[{name}] start"
        if logger is None:
            print(msg)
        else:
            logger.info(msg)
        yield

        msg = f"[{name}] done in {time.time() - t0:.2f} s"
        if logger is None:
            print(msg)
        else:
            logger.info(msg)
    
    
    logger = get_logger("main.log")
    set_seed(1213)
    
    
    class CFG:
        ######################
        # Globals #
        ######################
        seed = 1213
        epochs = 35
        train = True
        folds = [0]
        img_size = 224
        main_metric = "epoch_f1_at_05"
        minimize_metric = False

        ######################
        # Data #
        ######################
        train_datadir = Path("../input/birdclef-2021/train_short_audio")
        train_csv = "../input/birdclef-2021/train_metadata.csv"
        train_soundscape = "../input/birdclef-2021/train_soundscape_labels.csv"

        ######################
        # Dataset #
        ######################
        transforms = {
            "train": [{"name": "Normalize"}],
            "valid": [{"name": "Normalize"}],
            "test": [{"name": "Normalize"}]
        }
        period = 10
        n_mels = 256
        fmin = 40
        fmax = 16000
        n_fft = 1024
        hop_length = 320
        sample_rate = 32000
        melspectrogram_parameters = {
            "n_mels": 224,
            "fmin": 20,
            "fmax": 16000
        }

        target_columns = [
            'acafly', 'acowoo', 'aldfly', 'ameavo', 'amecro',
            'amegfi', 'amekes', 'amepip', 'amered', 'amerob',
            'amewig', 'amtspa', 'andsol1', 'annhum', 'astfly',
            'azaspi1', 'babwar', 'baleag', 'balori', 'banana',
            'banswa', 'banwre1', 'barant1', 'barswa', 'batpig1',
            'bawswa1', 'bawwar', 'baywre1', 'bbwduc', 'bcnher',
            'belkin1', 'belvir', 'bewwre', 'bkbmag1', 'bkbplo',
            'bkbwar', 'bkcchi', 'bkhgro', 'bkmtou1', 'bknsti', 'blbgra1',
            'blbthr1', 'blcjay1', 'blctan1', 'blhpar1', 'blkpho',
            'blsspa1', 'blugrb1', 'blujay', 'bncfly', 'bnhcow', 'bobfly1',
            'bongul', 'botgra', 'brbmot1', 'brbsol1', 'brcvir1', 'brebla',
            'brncre', 'brnjay', 'brnthr', 'brratt1', 'brwhaw', 'brwpar1',
            'btbwar', 'btnwar', 'btywar', 'bucmot2', 'buggna', 'bugtan',
            'buhvir', 'bulori', 'burwar1', 'bushti', 'butsal1', 'buwtea',
            'cacgoo1', 'cacwre', 'calqua', 'caltow', 'cangoo', 'canwar',
            'carchi', 'carwre', 'casfin', 'caskin', 'caster1', 'casvir',
            'categr', 'ccbfin', 'cedwax', 'chbant1', 'chbchi', 'chbwre1',
            'chcant2', 'chispa', 'chswar', 'cinfly2', 'clanut', 'clcrob',
            'cliswa', 'cobtan1', 'cocwoo1', 'cogdov', 'colcha1', 'coltro1',
            'comgol', 'comgra', 'comloo', 'commer', 'compau', 'compot1',
            'comrav', 'comyel', 'coohaw', 'cotfly1', 'cowscj1', 'cregua1',
            'creoro1', 'crfpar', 'cubthr', 'daejun', 'dowwoo', 'ducfly', 'dusfly',
            'easblu', 'easkin', 'easmea', 'easpho', 'eastow', 'eawpew', 'eletro',
            'eucdov', 'eursta', 'fepowl', 'fiespa', 'flrtan1', 'foxspa', 'gadwal',
            'gamqua', 'gartro1', 'gbbgul', 'gbwwre1', 'gcrwar', 'gilwoo',
            'gnttow', 'gnwtea', 'gocfly1', 'gockin', 'gocspa', 'goftyr1',
            'gohque1', 'goowoo1', 'grasal1', 'grbani', 'grbher3', 'grcfly',
            'greegr', 'grekis', 'grepew', 'grethr1', 'gretin1', 'greyel',
            'grhcha1', 'grhowl', 'grnher', 'grnjay', 'grtgra', 'grycat',
            'gryhaw2', 'gwfgoo', 'haiwoo', 'heptan', 'hergul', 'herthr',
            'herwar', 'higmot1', 'hofwoo1', 'houfin', 'houspa', 'houwre',
            'hutvir', 'incdov', 'indbun', 'kebtou1', 'killde', 'labwoo', 'larspa',
            'laufal1', 'laugul', 'lazbun', 'leafly', 'leasan', 'lesgol', 'lesgre1',
            'lesvio1', 'linspa', 'linwoo1', 'littin1', 'lobdow', 'lobgna5', 'logshr',
            'lotduc', 'lotman1', 'lucwar', 'macwar', 'magwar', 'mallar3', 'marwre',
            'mastro1', 'meapar', 'melbla1', 'monoro1', 'mouchi', 'moudov', 'mouela1',
            'mouqua', 'mouwar', 'mutswa', 'naswar', 'norcar', 'norfli', 'normoc', 'norpar',
            'norsho', 'norwat', 'nrwswa', 'nutwoo', 'oaktit', 'obnthr1', 'ocbfly1',
            'oliwoo1', 'olsfly', 'orbeup1', 'orbspa1', 'orcpar', 'orcwar', 'orfpar',
            'osprey', 'ovenbi1', 'pabspi1', 'paltan1', 'palwar', 'pasfly', 'pavpig2',
            'phivir', 'pibgre', 'pilwoo', 'pinsis', 'pirfly1', 'plawre1', 'plaxen1',
            'plsvir', 'plupig2', 'prowar', 'purfin', 'purgal2', 'putfru1', 'pygnut',
            'rawwre1', 'rcatan1', 'rebnut', 'rebsap', 'rebwoo', 'redcro', 'reevir1',
            'rehbar1', 'relpar', 'reshaw', 'rethaw', 'rewbla', 'ribgul', 'rinkin1',
            'roahaw', 'robgro', 'rocpig', 'rotbec', 'royter1', 'rthhum', 'rtlhum',
            'ruboro1', 'rubpep1', 'rubrob', 'rubwre1', 'ruckin', 'rucspa1', 'rucwar',
            'rucwar1', 'rudpig', 'rudtur', 'rufhum', 'rugdov', 'rumfly1', 'runwre1',
            'rutjac1', 'saffin', 'sancra', 'sander', 'savspa', 'saypho', 'scamac1',
            'scatan', 'scbwre1', 'scptyr1', 'scrtan1', 'semplo', 'shicow', 'sibtan2',
            'sinwre1', 'sltred', 'smbani', 'snogoo', 'sobtyr1', 'socfly1', 'solsan',
            'sonspa', 'soulap1', 'sposan', 'spotow', 'spvear1', 'squcuc1', 'stbori',
            'stejay', 'sthant1', 'sthwoo1', 'strcuc1', 'strfly1', 'strsal1', 'stvhum2',
            'subfly', 'sumtan', 'swaspa', 'swathr', 'tenwar', 'thbeup1', 'thbkin',
            'thswar1', 'towsol', 'treswa', 'trogna1', 'trokin', 'tromoc', 'tropar',
            'tropew1', 'tuftit', 'tunswa', 'veery', 'verdin', 'vigswa', 'warvir',
            'wbwwre1', 'webwoo1', 'wegspa1', 'wesant1', 'wesblu', 'weskin', 'wesmea',
            'westan', 'wewpew', 'whbman1', 'whbnut', 'whcpar', 'whcsee1', 'whcspa',
            'whevir', 'whfpar1', 'whimbr', 'whiwre1', 'whtdov', 'whtspa', 'whwbec1',
            'whwdov', 'wilfly', 'willet1', 'wilsni1', 'wiltur', 'wlswar', 'wooduc',
            'woothr', 'wrenti', 'y00475', 'yebcha', 'yebela1', 'yebfly', 'yebori1',
            'yebsap', 'yebsee1', 'yefgra1', 'yegvir', 'yehbla', 'yehcar1', 'yelgro',
            'yelwar', 'yeofly1', 'yerwar', 'yeteup1', 'yetvir']

        ######################
        # Loaders #
        ######################
        loader_params = {
            "train": {
                "batch_size": 64,
                "num_workers": 20,
                "shuffle": True
            },
            "valid": {
                "batch_size": 64,
                "num_workers": 20,
                "shuffle": False
            },
            "test": {
                "batch_size": 64,
                "num_workers": 20,
                "shuffle": False
            }
        }

        ######################
        # Split #
        ######################
        split = "StratifiedKFold"
        split_params = {
            "n_splits": 5,
            "shuffle": True,
            "random_state": 1213
        }

        ######################
        # Model #
        ######################
        base_model_name = "tf_efficientnet_b0_ns"
        pooling = "max"
        pretrained = True
        num_classes = 397
        in_channels = 1

        ######################
        # Criterion #
        ######################
        loss_name = "BCEFocal2WayLoss"
        loss_params: dict = {}

        ######################
        # Optimizer #
        ######################
        optimizer_name = "Adam"
        base_optimizer = "Adam"
        optimizer_params = {
            "lr": 0.001
        }
        # For SAM optimizer
        base_optimizer = "Adam"

        ######################
        # Scheduler #
        ######################
        scheduler_name = "CosineAnnealingLR"
        scheduler_params = {
            "T_max": 10
        }
      
    class CFG_2:
            period = 30
            n_mels = 128
            fmin = 40
            fmax = 16000
            n_fft = 2048
            hop_length = 256
            sample_rate = 32000
    
    
    class TestDataset(Dataset):
        def __init__(self, df: pd.DataFrame, clip: np.ndarray, waveform_transforms=None):
            self.df = df
            self.clip = clip
            self.waveform_transforms = waveform_transforms
    
        def __len__(self):
            return len(self.df)
    
        def __getitem__(self, idx: int):
            SR = 32000
            sample = self.df.loc[idx, :]
            row_id = sample.row_id
            end_seconds = int(sample.seconds)
            start_seconds = max(0, int(end_seconds - 5))
            start_index = SR * start_seconds
            end_index = SR * end_seconds
            y = self.clip[start_index:end_index].astype(np.float32)
            y = np.nan_to_num(y)
            if self.waveform_transforms:
                y = self.waveform_transforms(y)
            y = np.nan_to_num(y)
            return y, row_id



    def prediction_for_clip(test_df: pd.DataFrame, clip: np.ndarray, model, threshold=0.5):

        dataset = TestDataset(df=test_df, clip=clip, waveform_transforms=get_transforms(phase="test"))
        loader = DataLoader(dataset, batch_size=1, shuffle=False)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.eval()
        prediction_dict = {}
        probas = []
        for image, row_id in tqdm(loader):
            row_id = row_id[0]
            image = image.to(device)
            with torch.no_grad():
                preds = model(image)
                proba = preds.detach().cpu().numpy().reshape(-1)
            events = proba >= threshold
            labels = np.argwhere(events).reshape(-1).tolist()
            probas.append(proba)
            if len(labels) == 0:
                prediction_dict[row_id] = "nocall"
            else:
                labels_str_list = list(map(lambda x: CFG.target_columns[x], labels))
                label_string = " ".join(labels_str_list)
                prediction_dict[row_id] = label_string
        return prediction_dict, probas


    #%%

    NUM_CLASSES = 397
    SR = 32_000
    N_MELS = 128
    FMIN, FMAX = 100, 16000
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    
    TARGET_SR = 32000
    TEST = (len(list(Path("../input/birdclef-2021/test_soundscapes/").glob("*.ogg"))) != 0)
    if TEST:
        DATADIR = Path("../input/birdclef-2021/test_soundscapes/")
    else:
        DATADIR = Path("../input/birdclef-2021/train_soundscapes/")   
    
    
        
    all_audios = list(DATADIR.glob("*.ogg"))
    all_audio_ids = ["_".join(audio_id.name.split("_")[:2]) for audio_id in all_audios]
    submission_df = pd.DataFrame({
        "row_id": all_audio_ids
    })
    submission_df
    
    
    THRESH = 0.4

    ckps = [
#         Path("../input/bird-models-ioa/tf_efficientnet_b0_ap_d10_audio/tf_efficientnet_b0_ap_fold0_epoch_44_f1_val_07993.pth"),
        Path("../input/exp21-effnet-ioa-temp/tf_efficientnet_b0_ap_fold0_epoch_58_f1_val_08005.pth")
    ]

    ### ------------------------------------------
    ### Model Loader
    ### ------------------------------------------

    from torchlibrosa.stft import LogmelFilterBank, Spectrogram
    from torchlibrosa.augmentation import SpecAugmentation

    def init_bn(bn):
        bn.bias.data.fill_(0.)
        bn.weight.data.fill_(1.0)


    class CustomAudioModel(nn.Module):
        def __init__(self, base_model_name: str, pretrained=False, num_classes=397, in_channels=3):
            super().__init__()

            # Spectrogram extractor - 1  #  bs x 1 x 854 x 513
            self.spectrogram_extractor = Spectrogram(n_fft=1024,
                                                     hop_length=375,
                                                     win_length=892,
                                                     window="hann", center=True, pad_mode="reflect",
                                                     freeze_parameters=True)

            # Logmel feature extractor  #  bs x 1 x 854 x 128
            self.logmel_extractor = LogmelFilterBank(sr=SR, n_fft=1024,
                                                     n_mels=N_MELS, fmin=FMIN, fmax=FMAX,
                                                     ref=1.0, amin=1e-10, top_db=None,
                                                     freeze_parameters=True)

            # Spec augmenter
            self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
                                                   freq_drop_width=8, freq_stripes_num=2)
            self.bn0 = nn.BatchNorm2d(N_MELS)
            self.encoder = timm.create_model(base_model_name, pretrained=pretrained, in_chans=in_channels)


            ## rexnet family
            if hasattr(self.encoder, "head"):
                nb_ft = self.encoder.head.fc.in_features  # 1280
                self.encoder.head.fc = nn.Identity()
                # self.encoder.head.fc = nn.Linear(nb_ft, NUM_CLASSES)
                self.encoder.head.fc = nn.Sequential(nn.Linear(nb_ft, 1024),
                                                     nn.SiLU(),  # nn.ELU()
                                                     nn.Dropout(0.2),
                                                     nn.Linear(1024, num_classes))

            ## effnets/..
            if hasattr(self.encoder, "fc"):
                nb_ft = self.encoder.fc.in_features
                self.encoder.fc = nn.Identity()
                self.encoder.fc = nn.Sequential(
                    nn.Linear(nb_ft, 1024), nn.ELU(), nn.Dropout(0.2), # todo: change to 1280 for effB0
                    nn.Linear(1024, 1024), nn.ELU(), nn.Dropout(0.2),
                    nn.Linear(1024, num_classes))

            ## densenet
            if hasattr(self.encoder, "classifier"):
                nb_ft = self.encoder.classifier.in_features  # 1024
                self.encoder.classifier = nn.Identity()
                self.encoder.classifier = nn.Sequential(
                    nn.Linear(nb_ft, 1024), nn.ELU(), nn.Dropout(0.2),
                    # nn.Linear(1024, 1024), nn.ELU(), nn.Dropout(0.2),
                    nn.Linear(1024, num_classes))

            self.init_weight()

        def init_weight(self):
            init_bn(self.bn0)

        def forward(self, input):                    # (320000,)
            s1 = self.spectrogram_extractor(input)   # (batch_size, 1, time_steps, freq_bins) // torch.Size([1, 1, 854, 513])
            # print('s1', s1.shape)
            x = self.logmel_extractor(s1)            # (batch_size, 1, time_steps, mel_bins) // torch.Size([1, 1, 854, 513])
            # print('mel', x.shape)

            x = x.transpose(1, 3)
            x = self.bn0(x)
            x = x.transpose(1, 3)
            if self.training:
                x = self.spec_augmenter(x)

            x = x.transpose(2, 3)
            # (batch_size, channels, freq, frames)
            out = self.encoder(x)
            # out = self.head(x)
            return out
        


    def load_net(checkpoint_path, model_name="", num_classes=NUM_CLASSES):
        net = CustomAudioModel(model_name, pretrained=False, in_channels=1)
        net = nn.DataParallel(net)
        dummy_device = torch.device("cpu")
        d = torch.load(checkpoint_path, map_location=dummy_device)
        net.load_state_dict(d)
        net = net.to(DEVICE)
        net = net.eval()
        
        return net

    ### ------------------------------------------
    ### Select nets
    ### ------------------------------------------

    nets = [
        load_net(c.as_posix(), model_name='tf_efficientnet_b0_ap') for c in ckps
    ]


    ### use SED inference pipeline
    def prediction(test_audios, weights_path: Path=None, threshold=0.5):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model = nets[0]
        # model_1 = nets[1]
        # model_2 = nets[2]
        # # model = prepare_model_for_inference(model, weights_path).to(device)

        warnings.filterwarnings("ignore")
        prediction_dfs = []
        probs = []
        for audio_path in test_audios:
            with timer(f"Loading {str(audio_path)}", logger):
                clip, _ = sf.read(audio_path)

            seconds = []
            row_ids = []
            for second in range(5, 605, 5):
                row_id = "_".join(audio_path.name.split("_")[:2]) + f"_{second}"
                seconds.append(second)
                row_ids.append(row_id)

            test_df = pd.DataFrame({
                "row_id": row_ids,
                "seconds": seconds
            })
            with timer(f"Prediction on {audio_path}", logger):
                prediction_dict, events = prediction_for_clip(test_df,
                                                              clip=clip,
                                                              model=model,
                                                              threshold=threshold)

            row_id = list(prediction_dict.keys())
            birds = list(prediction_dict.values())
            prediction_df = pd.DataFrame({
                "row_id": row_id,
                "birds": birds
            })
            prediction_dfs.append(prediction_df)
            probs.append(events)

        prediction_df = pd.concat(prediction_dfs, axis=0, sort=False).reset_index(drop=True)
        return prediction_df, np.concatenate(np.array(probs), axis=0)



    ### Run inference
    subm, preds_raw = prediction(test_audios=all_audios,
                            weights_path=None,
                            threshold=0.5)    # 0.6

    return preds_raw

#%%



In [None]:
probs_exp21 = get_preds_exp21()


In [None]:
# first ensemble of 3 models 
pred_probas = list((0.85*np.array(probs1) + 0.15*np.array(probs2) + 0.8*(np.array(probs3))) / 1.8 )
# 20*397

In [None]:
# raw_preds[0]
# raw_preds[0]


ps1 = []
for val in pred_probas:
    ps1.append(val.cpu().numpy())


In [None]:
probs_sed = np.array(probs_sed).reshape(np.array(ps1).shape)

probs_exp21 = np.array(probs_sed).reshape(np.array(ps1).shape)

In [None]:
# # concatenate all predictions to have same shape

# pred1 = np.concatenate(np.array(ps1), axis = 0)
# pred2 = np.concatenate(np.array(raw_preds), axis = 0)
# # pred3 = np.concatenate(np.array(probs_sed), axis = 0)

# pred3 = np.array(probs_sed)

# print(pred1.shape), print(pred2.shape), print(pred3.shape)

In [None]:
# final_probs =  list( (0.85* np.array(ps1)  + 0.15* np.array(raw_preds) + 0.6*  np.array(probs_sed))/ 1.6)

# final_probs =  list( (0.85* np.array(ps1)  + 0.15* np.array(raw_preds) + 0.6* np.array(probs_sed))/ 1.6)

final_probs =  list( (0.85* np.array(ps1)  + 0.15* np.array(raw_preds) + 0.6* np.array(probs_sed) +  0.09*np.array(probs_exp21))/ 1.69)


# final_probs = list(0.9*np.array(final_probs) + 0.1*np.array(probs_exp21))


# final_probs = np.array(probs_exp21)


In [None]:
# pred1.shape

In [None]:


# final_probs.shape

In [None]:

ps = []
for val in final_probs:
    preds_pp = post_process(val, threshold=0.27)  # 0.22
    ps.append(preds_pp)

# final_probs =  list(np.array(ps1))


In [None]:
# ps = []
# for val in final_probs:
#     preds_pp = post_process(val, threshold=0.25)  # 0.23
#     ps.append(preds_pp)

In [None]:
# Add post processing here

def get_threshold(threshold_dict, pred_probas):
    threshold = np.ones_like(pred_probas) * threshold_dict['median']
    threshold_matrix = np.ones_like(pred_probas) * threshold_dict['median']
    for i in range(0, pred_probas.shape[0], 120):
        is_confident = np.sum(pred_probas[i : i + 120] > threshold_dict['high'], axis = 0).astype(bool)
        threshold_slice = threshold_matrix[i : i + 120]
        threshold_slice[:, is_confident] = threshold_dict['low']
        #if threshold_slice[:, is_confident]
        code_confident = np.where(is_confident)[0]
        
        for rank in range(1):
            col_max = np.argsort(pred_probas[i : i + 120], axis = 1)[:, -rank-1]
            row_max = np.isin(col_max, code_confident)
            threshold_slice[row_max, col_max[row_max]] = threshold_dict['bottom'] * (1 - (10 - rank) / 10)
            
            
        threshold_matrix[i : i + 120] = threshold_slice
    return threshold_matrix


In [None]:
pred_exp = np.concatenate(np.array(final_probs), axis = 0)

# pred_exp = np.concatenate(np.array(ps), axis = 0)

In [None]:
# pred_exp = final_probs

In [None]:
# x.shape  #(2400, 397)

# row_ids = []
# for name in filenames:
#     for seconds in range(0, 600, 5):
#         row_ids.append(name[0] + f"_{seconds + 5}")
# print(len(row_ids))

In [None]:
def get_preds(probas, threshold):
    rows, values = np.where(probas > threshold)
    y_class = dict(enumerate([[-1]] * len(probas)))
    for row in rows:
        y_class[row] = values[np.where(rows == row)]
        
    INV_LABEL_IDS[-1] = "nocall"
    y_class = list(y_class.values())
    y_class = [[INV_LABEL_IDS[c] for c in c_list] for c_list in y_class]
    submit_preds = [' '.join(c_list) for c_list in y_class]
    return submit_preds

In [None]:
# 0.51 | 0.45
# 0.51
# 2400 2400
# f1        0.803819
# prec      0.815125
# rec       0.805778
# n_true    1.130000
# n_pred    1.117917
# n         0.882917
# dtype: float64
# Your LB will be around 0.7846548562214767 | Other Metrics 0.8940483976455086, 0.6454267125908906

In [None]:
pred_exp.shape

In [None]:
for high in [0.1, .2, .3, .4, 0.49,  .5, .51, .52, .6, .7, .8, .9]:

    threshold_dict = {
        'high': high, # 0.7
        'median': 0.45,
        'low' : 0.1,
        'bottom': 0.05
    }
    
    print(high)
    
    kk_threshold = get_threshold(threshold_dict, pred_exp)


    kk_preds = get_preds(pred_exp, kk_threshold)


    row_ids = []

    for row, pred in zip(data.itertuples(False), final_probs):
        row_id = [f"{row.id}_{row.site}_{5*i}" for i in range(1, len(pred)+1)]
        row_ids.extend(row_id)


    sub2 = pd.DataFrame({"row_id" : row_ids, "birds" : kk_preds})
    sub2["birds"].fillna("nocall", inplace = True)
    sub2.to_csv('submission.csv', index=False)



    def get_metrics(s_true, s_pred):
        s_true = set(s_true.split())
        s_pred = set(s_pred.split())
        n, n_true, n_pred = len(s_true.intersection(s_pred)), len(s_true), len(s_pred)

        prec = n/n_pred
        rec = n/n_true
        f1 = 2*prec*rec/(prec + rec) if prec + rec else 0

        return {"f1": f1, "prec": prec, "rec": rec, "n_true": n_true, "n_pred": n_pred, "n": n}


    if TARGET_PATH:
        sub_target = pd.read_csv(TARGET_PATH)
        sub_target = sub_target.merge(sub2, how="left", on="row_id")

        print(sub_target["birds_x"].notnull().sum(), sub_target["birds_x"].notnull().sum())
        assert sub_target["birds_x"].notnull().all()
        assert sub_target["birds_y"].notnull().all()

        df_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_target.birds_x, sub_target.birds_y)])

        print(df_metrics.mean())


    w_nocall = 0.56
    sc1 = row_wise_micro_averaged_f1_score(sub_target[sub_target.birds_x=='nocall']['birds_x'], sub_target[sub_target.birds_x=='nocall']['birds_y'])
    sc2 = row_wise_micro_averaged_f1_score(sub_target[sub_target.birds_x!='nocall']['birds_x'], sub_target[sub_target.birds_x!='nocall']['birds_y'])

    final_score = w_nocall*sc1 + (1-w_nocall)*sc2
    final_score, sc1, sc2


    print(f"Your LB will be around {final_score} | Other Metrics {sc1}, {sc2}")

In [None]:
threshold_dict = {
    'high': 0.51,
    'median': 0.45,
    'low' : 0.1,
    'bottom': 0.05
}

kk_threshold = get_threshold(threshold_dict, pred_exp)

# kk_preds = get_preds(final_probs, kk_threshold)

In [None]:
# kk_threshold[1000]

In [None]:
kk_preds = get_preds(pred_exp, kk_threshold)

In [None]:
# kk_preds 

In [None]:
row_ids = []

for row, pred in zip(data.itertuples(False), final_probs):
    row_id = [f"{row.id}_{row.site}_{5*i}" for i in range(1, len(pred)+1)]
    row_ids.extend(row_id)

In [None]:
len(row_ids), len(kk_preds)

In [None]:
# len(row_ids)

In [None]:
sub2 = pd.DataFrame({"row_id" : row_ids, "birds" : kk_preds})
sub2["birds"].fillna("nocall", inplace = True)

sub2

In [None]:
# ps = []
# for val in final_probs:
#     preds_pp = post_process(val, threshold=0.23)  # 0.23
#     ps.append(preds_pp)

In [None]:
# THRESH = 0.28 # 0.28

# # preds4 = [get_bird_names(get_thresh_preds(pred, thresh=THRESH, use_pp=True)) for pred in final_probs]
# preds4 = [get_bird_names(get_thresh_preds(pred, thresh=THRESH, use_pp=True)) for pred in ps]



In [None]:
# # preds4

# sub2 = preds_as_df(data, preds4)
# print(sub2.shape)
# sub2

In [None]:



sub2.to_csv('submission.csv', index=False)

In [None]:
# sub2.birds.value_counts()[:10]

In [None]:


def get_metrics(s_true, s_pred):
    s_true = set(s_true.split())
    s_pred = set(s_pred.split())
    n, n_true, n_pred = len(s_true.intersection(s_pred)), len(s_true), len(s_pred)
    
    prec = n/n_pred
    rec = n/n_true
    f1 = 2*prec*rec/(prec + rec) if prec + rec else 0
    
    return {"f1": f1, "prec": prec, "rec": rec, "n_true": n_true, "n_pred": n_pred, "n": n}


if TARGET_PATH:
    sub_target = pd.read_csv(TARGET_PATH)
    sub_target = sub_target.merge(sub2, how="left", on="row_id")
    
    print(sub_target["birds_x"].notnull().sum(), sub_target["birds_x"].notnull().sum())
    assert sub_target["birds_x"].notnull().all()
    assert sub_target["birds_y"].notnull().all()
    
    df_metrics = pd.DataFrame([get_metrics(s_true, s_pred) for s_true, s_pred in zip(sub_target.birds_x, sub_target.birds_y)])
    
    print(df_metrics.mean())
    
    
w_nocall = 0.56
sc1 = row_wise_micro_averaged_f1_score(sub_target[sub_target.birds_x=='nocall']['birds_x'], sub_target[sub_target.birds_x=='nocall']['birds_y'])
sc2 = row_wise_micro_averaged_f1_score(sub_target[sub_target.birds_x!='nocall']['birds_x'], sub_target[sub_target.birds_x!='nocall']['birds_y'])

final_score = w_nocall*sc1 + (1-w_nocall)*sc2
final_score, sc1, sc2


print(f"Your LB will be around {final_score} | Other Metrics {sc1}, {sc2}")

In [None]:
# TARGET_PATH, CFG.train_soundscape

# Your LB will be around 0.6951567092844804 | Other Metrics 0.9437540876389673, 0.37876004592422424

In [None]:
# sub1.birds.value_counts()

In [None]:
def get_scores(preds_th, ):
    
    # load ground truth
    train_sc = pd.read_csv(CFG.train_soundscape)
    # merge with preds
    train_preds = preds_th.merge(train_sc[['row_id', 'birds']], on='row_id')
    train_preds.columns = ['row_id', 'birds', 'label']
    # train_preds.head(3)
    
    print(f'Competition Metric - Validation score on {len(all_audios)} Train soundscape clips\n')
    print()
    print(f'Row-wise F1 [th={THRES}]:', np.round(row_wise_micro_averaged_f1_score(train_preds.label, train_preds.birds), 6) )
    print(f'Fbeta-sklearn [th={THRES}]:', np.round(fbeta_score(train_preds.label, train_preds.birds, average='micro', beta=1), 6) )
#     print(f'F1-sklearn [th={THRES}]:', np.round(metrics.f1_score(train_preds.label, train_preds.birds, average='samples'), 6) )
    print('-'*30)
    print('macro-precision:', np.round(precision_score(train_preds.label, train_preds.birds, average='macro'), 6) )
    print('micro-precision:', np.round(precision_score(train_preds.label, train_preds.birds, average='micro'), 6) )
    print('weighted-precision:', np.round(precision_score(train_preds.label, train_preds.birds, average='weighted'), 6) )
    print()
    print('macro-recall:', np.round(recall_score(train_preds.label, train_preds.birds, average='macro'), 6) )
    print('micro-recall:', np.round(recall_score(train_preds.label, train_preds.birds, average='micro'), 6) )
    print('weighted-recall:', np.round(recall_score(train_preds.label, train_preds.birds, average='weighted'), 6) )
    print()
    print('ACC:', np.round(metrics.accuracy_score(train_preds.label, train_preds.birds), 6) )

In [None]:
import warnings
warnings.filterwarnings('ignore')

print(f'CV Scores for Threshold: {THRES} \n')
get_scores(sub2)

# print('Model 2')
# get_scores(train_preds2_th)