In [None]:
## setup
## TODO: move all under bird2022wheels
!pip install ../input/birds-inference-pip-wheels/torchaudio-0.8.1-cp37-cp37m-manylinux1_x86_64.whl ../input/birds-inference-pip-wheels/torch-1.8.1-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/birds-inference-pip-wheels/audiomentations-0.16.0-py3-none-any.whl --no-index --no-deps
!pip install ../input/birds-inference-pip-wheels/torchlibrosa-0.0.9-py3-none-any.whl --no-index --no-deps
!pip install ../input/birds2022wheels/nnAudio-0.3.1-py3-none-any.whl
!cp -r ../input/timmlatest ../working/timmlatest
!pip install -U ../working/timmlatest
!rm -rf ../working/timmlatest

In [None]:
## https://github.com/Selimonder/birdclef2022/
import os, sys, glob, math

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"

github_folder = "/kaggle/input/birdclef2022-dev/birdclef2022-cls_exp/birdclef2022-cls_exp/"
sys.path.append(github_folder)

In [None]:
import cv2
import gc
import torch
import librosa
import argparse, warnings

import numpy as np
import pandas as pd
import IPython.display as ipd

import zoo
from training.config import load_config
from training.datasets import BirdDatasetOOF


cv2.ocl.setUseOpenCL(False)
cv2.setNumThreads(0)

from tqdm import tqdm
from torch.utils.data import DataLoader

warnings.simplefilter("ignore")

## models

In [None]:
def load_model(conf_path: str, weights_path: str, prefix: str, suffix: str, fold: int, to_device: bool = True):
    conf = load_config(conf_path)
    conf['encoder_params']['pretrained'] = False
    
    snapshot_name = "{}{}_{}_{}_{}".format(prefix, conf["network"], conf["encoder_params"]["encoder"], fold, suffix)
    weights_path = os.path.join(weights_path, snapshot_name)
    print(weights_path)
    model = zoo.__dict__[conf["network"]](**conf["encoder_params"])
    model = torch.nn.DataParallel(model).cuda()
    print("=> loading checkpoint '{}''".format(weights_path))
    checkpoint = torch.load(weights_path, map_location="cpu")
    print("epoch", checkpoint["epoch"])
    model.load_state_dict(checkpoint["state_dict"])
    model.eval()
    if to_device: model.cuda()
    return model

models = []

## best only for now
suffixes = ["lb"]#, "f1_score" "last"]
folds    = 5

for i in range(folds):
    for sx in suffixes:
        try:
            model = load_model(conf_path    = f"{github_folder}/configs/cls_nf0_v3.json",
                               weights_path = "/kaggle/input/nfnet-baseline-bs16",
                               prefix       = "baseline_submit_bs16",
                               suffix       = sx,
                               fold         = i)
            models.append(model)
        except Exception as e:
            print(f"model not found", e)

In [None]:
len(models)

## infer

In [None]:

def prepare_clip(fpath, frame_length, sample_rate):
    """
    Prepare audio clip for inference
    """
    infer_frame_length = frame_length
    batch = {"wav_tensors": [], "end_times": []}
    
    waveform, sample_rate = librosa.load(fpath, sr=sample_rate, mono=True)
    n_parts = math.ceil(len(waveform) / int(infer_frame_length * sample_rate))
    
    for seg_idx in range(n_parts): 
        end_time = (seg_idx + 1) * frame_length
        seg_wav  = waveform[(end_time*sample_rate)-(sample_rate*frame_length):end_time*sample_rate]
        
        wav_tensor = torch.from_numpy(seg_wav)
        
        if len(wav_tensor) == frame_length * sample_rate:
            batch["wav_tensors"].append(wav_tensor.unsqueeze(0))
        elif len(wav_tensor) < frame_length * sample_rate:
            wav_tensor = torch.nn.functional.pad(wav_tensor, (0, (frame_length * sample_rate) - len(wav_tensor)))
            batch["wav_tensors"].append(wav_tensor.unsqueeze(0))
        elif len(wav_tensor) > frame_length * sample_rate:
            wav_tensor = wav_tensor[:(frame_length * sample_rate)]
            batch["wav_tensors"].append(wav_tensor.unsqueeze(0))
            
        batch["end_times"].append(end_time)
    batch["wav_tensors"] = torch.stack(batch["wav_tensors"]).cuda()
    return batch, n_parts, len(waveform)

@torch.no_grad()
def predict_clip(models, batch, n_parts, frame_length):
    preds = np.zeros([len(models), n_parts, 21])
    for m_idx, model in enumerate(models):
        with torch.cuda.amp.autocast():
            preds[m_idx] = model(batch["wav_tensors"], is_test=True)["logit"].sigmoid().cpu().numpy()
    return preds.max(0) ## max by model    
    

In [None]:
len(models)

In [None]:
%%time

@torch.no_grad()
def generate_preds():
    debug = False
    preds        = []
    test_files   = sorted(glob.glob("/kaggle/input/birdclef-2022/test_soundscapes/*.ogg")) 
    sample_rate  = 32000
    
    scored_birds = np.array(['akiapo', 'aniani', 'apapan', 'barpet', 'crehon', 'elepai', 'ercfra',
                              'hawama', 'hawcre', 'hawgoo', 'hawhaw', 'hawpet1', 'houfin', 'iiwi',
                              'jabwar', 'maupar', 'omao', 'puaioh', 'skylar', 'warwhe1', 'yefcan'])

    frame_length = 5
    infer_frame_length = 5

    for fpath in test_files:
        file_id = os.path.basename(fpath).replace(".ogg", "")
        batch, n_parts, len_waveform = prepare_clip(fpath, frame_length, sample_rate)
        clip_preds = predict_clip(models, batch, n_parts, frame_length)
        preds.append(clip_preds)
        
        ## switch to infer frame length
        n_parts_sub = math.ceil(len_waveform / int(infer_frame_length * sample_rate))
        clip_preds = np.array_split(clip_preds, n_parts_sub, axis=0)
        
                
    prob_array = np.array(preds)
    
    scored_bird_name2idx = {}
    for i, x in enumerate(scored_birds):
        scored_bird_name2idx[x] = i

    return prob_array, scored_bird_name2idx

prob_array, scored_bird_name2idx = generate_preds()
# del models
# gc.collect()
# torch.cuda.empty_cache()

In [None]:
prob_array.shape

----

In [None]:
%cd /kaggle/input/birdnetgit

In [None]:
import os
import sys
import json
import math
import glob
import librosa
import operator
import argparse
import datetime
import traceback

from multiprocessing import Pool, freeze_support

import numpy as np

import config as cfg
import audio
import model
from analyze import *

import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("../birdclef-2022/eBird_Taxonomy_v2021.csv")
s2id = lambda x: df[(df['PRIMARY_COM_NAME'] == x)].SPECIES_CODE.tolist()[0]

cfg.MODEL_PATH = '/kaggle/input/birdnetgit/checkpoints/V2.1/BirdNET_GLOBAL_2K_V2.1_Model_FP32.tflite'
cfg.MDATA_MODEL_PATH = '/kaggle/input/birdnetgit/checkpoints/V2.1/BirdNET_GLOBAL_2K_V2.1_MData_Model_FP32.tflite'
cfg.LABELS_FILE = '/kaggle/input/birdnetgit/checkpoints/V2.1/BirdNET_GLOBAL_2K_V2.1_Labels.txt'
cfg.TRANSLATED_LABELS_PATH = '/kaggle/input/birdnetgit/labels/V2.1'

cfg.SIG_LENGTH = 5
cfg.SIG_OVERLAP = 0 
cfg.SIG_MINLEN = 5 

# Load eBird codes, labels

def loadCodes(CODEC_FILE):

    with open(CODEC_FILE, 'r') as cfile:
        codes = json.load(cfile)

    return codes

cfg.CODES = loadCodes("./eBird_taxonomy_codes_2021E.json")
def loadLabels(labels_file):

    labels = []
    with open(labels_file, 'r') as lfile:
        for line in lfile.readlines():
            labels.append(line.replace('\n', ''))    

    return labels

def prepare_clip(fpath):
    """
    Prepare audio clip for inference
    """
    sample_rate = cfg.SAMPLE_RATE
    frame_length = cfg.SIG_LENGTH
    infer_frame_length = frame_length
    
    sample_rate = 48000
    chunks = []
    
    waveform, sample_rate = librosa.load(fpath, sr=sample_rate, mono=True, res_type='kaiser_fast')
    n_parts = math.ceil(len(waveform) / int(infer_frame_length * sample_rate))
    
    for seg_idx in range(n_parts): 
        end_time = (seg_idx + 1) * frame_length
        chunk  = waveform[(end_time*sample_rate)-(sample_rate*frame_length):end_time*sample_rate]
                
        if len(chunk) == frame_length * sample_rate:
            chunks.append(chunk)
        elif len(chunk) < frame_length * sample_rate:
            chunk = np.pad(chunk, (0, (frame_length * sample_rate) - len(chunk)))
            chunks.append(chunk)
        elif len(chunk) > frame_length * sample_rate:
            chunk = chunk[:(frame_length * sample_rate)]
            chunks.append(chunk)
            
    return chunks


scored_birds = np.array(['akiapo', 'aniani', 'apapan', 'barpet', 'crehon', 'elepai', 'ercfra',
                          'hawama', 'hawcre', 'hawgoo', 'hawhaw', 'hawpet1', 'houfin', 'iiwi',
                          'jabwar', 'maupar', 'omao', 'puaioh', 'skylar', 'warwhe1', 'yefcan'])

cfg.LABELS = loadLabels(cfg.LABELS_FILE)

model.loadModel()



In [None]:
bird_dict = {}
for i, lb in enumerate(cfg.LABELS):
    try:
        lbb = s2id(lb.split("_")[-1])
        if lbb in scored_birds:
            bird_dict[lbb] = i
    except Exception as e:
        continue
    
bird_dict["aniani"] = 0

sorted_bd = dict(sorted(bird_dict.items(), key=lambda item: item[0]))
get_idx = list(sorted_bd.values())

In [None]:
# def predict_single(test_path)

def generate_preds_bn():
    test_files   = sorted(glob.glob("/kaggle/input/birdclef-2022/test_soundscapes/*.ogg"))
    
    preds = []
    file_ids = []
    
    for fpath in test_files:
        chunks = prepare_clip(fpath)
        pred = predict(chunks)[:, get_idx]
        preds.append(pred)
        file_id = os.path.basename(fpath).replace(".ogg", "")
        file_ids.append(file_id)
        
    preds = np.array(preds)
    return preds, file_ids
        

In [None]:
%%time
preds_bn, file_ids = generate_preds_bn()


In [None]:
preds_bn[:, :, 1] = prob_array[:, :, 1]

In [None]:
## fix aniani
avg_preds = (3 * preds_bn + prob_array) / 4

avg_preds.shape

In [None]:
submission   = []
infer_frame_length = 5
threshold = 0.1

for clip_preds, file_id in zip(avg_preds, file_ids):
    for frame_idx, pred in enumerate(clip_preds):
        end_time = (frame_idx + 1) * infer_frame_length
        for bi, bird in enumerate(scored_birds):
            submission.append({
                "row_id": f"{file_id}_{bird}_{end_time}",
                "target": pred[bi] > threshold,
            })

In [None]:
%cd /kaggle/working

In [None]:
df_submission = pd.DataFrame(submission).set_index("row_id")
df_submission.to_csv("submission.csv")

In [None]:
print(f"""
total rows : {len(df_submission)}
activated  : {len(df_submission[(df_submission.target==True)])}
""")

df_submission[(df_submission.target==True)].head()