In [None]:
!rm -rf /tmp/birdclef2022
!cp -r "/kaggle/input/birdclef22-clone-source-code-repository/birdclef2022" /tmp/birdclef2022

In [None]:
! tree -L 2 /tmp/birdclef2022

In [None]:
%%bash
cat << 'EOF' > /tmp/run.bash
PIP_DEP_PATH='/kaggle/input/birdclef22-create-build-environment/pip_deps'
echo ${PIP_DEP_PATH}
pip install ${PIP_DEP_PATH}/* -f ./ --no-index --no-deps --find-links="${PIP_DEP_PATH}"

EOF
chmod +x /tmp/run.bash

In [None]:
!/tmp/run.bash

In [None]:
import timm

print(f"timm version: {timm.__version__}")

In [None]:
%cd /tmp/birdclef2022/binary_classifier

In [None]:
%%bash

(cat << 'EOF'
export DATA_DIR="/kaggle/input/birdclef-2022"
EOF
) > .env

In [None]:
import importlib
import json
import os
from itertools import product
from os.path import basename

import glob
import numpy as np
import multiprocessing as mp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch

from copy import copy
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

from train_util import set_module_path, auto_set_config_param
from torch.optim.swa_utils import AveragedModel

plt.style.use("ggplot")

%load_ext dotenv
%load_ext lab_black
%load_ext autoreload
%dotenv
%autoreload 2

In [None]:
os.environ["DATA_DIR"]

In [None]:
set_module_path()

In [None]:
def create_test_df(test_soundscapes):
    file_ids = [basename(path)[:-4] for path in test_soundscapes]
    test_df = pd.DataFrame(
        {
            "file_id": file_ids,
            "filename": [f"{fid}.ogg" for fid in file_ids],
        }
    )
    return test_df

In [None]:
test_soundscapes = glob.glob(
    "/kaggle/input/birdclef-2022/test_soundscapes/soundscape_*.ogg"
)


test_df = create_test_df(test_soundscapes)
test_df = test_df

In [None]:
test_df

In [None]:
COMP_FOLDER = "/kaggle/input/birdclef-2022"
TEST_AUDIO_ROOT = f"{COMP_FOLDER}/test_soundscapes"


sample_submission = pd.read_csv(f"{COMP_FOLDER}/sample_submission.csv")
N_CORES = mp.cpu_count()
PUBLIC_RUN = False

RAM_CHECK = False
MIXED_PRECISION = False
DEVICE = "cuda"

In [None]:
test_fns = [item for item in os.listdir(TEST_AUDIO_ROOT) if item.endswith(".ogg")]

In [None]:
test_df.to_csv("/kaggle/working/test_metadata.csv", index=False)

In [None]:
test_df["rating"] = 5
test_df["target"] = 0
test_df["secondary_labels"] = "[]"
test_df["pseudo_labels"] = " ".join(["0"] * 152)
test_df["length"] = 32_000 * 60
test_df["fold"] = -1

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_passt_1_v3")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None
cfg.infer = True

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    return sd


state_dicts = []
backbones = []
filepaths = [
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-passt_1_v3-fold-1-seed970058:v3/checkpoint_swa_model_seed970058.pth",
]
for filepath in filepaths:
    state_dicts.append(filepath)

nets = []

for i, state_dict in enumerate(state_dicts):
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_1 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_1 += [preds_]

preds_1 = np.array(preds_1)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_panns_2_v5")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    # sd = {k.replace("module.", ""): v for k, v in sd.items()}
    return sd


state_dicts = []
backbones = []
for filepath in glob.iglob(
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-panns_2_v5-fold-1-seed781952:v3/checkpoint_swa_model_seed781952.pth"
):
    state_dicts.append(filepath)
    backbones.append("resnet34")

nets = []

for i, state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_2 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_2 += [preds_]

preds_2 = np.array(preds_2)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_panns_2_v6")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    # sd = {k.replace("module.", ""): v for k, v in sd.items()}
    return sd


state_dicts = []
backbones = []
for filepath in glob.iglob(
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-panns_2_v6-fold-1-seed58594:v3/checkpoint_swa_model_seed58594.pth"
):
    state_dicts.append(filepath)
    backbones.append("resnet34")

nets = []

for i, state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_3 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_3 += [preds_]

preds_3 = np.array(preds_3)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_panns_2_v7")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    # sd = {k.replace("module.", ""): v for k, v in sd.items()}
    return sd


state_dicts = []
backbones = []
for filepath in glob.iglob(
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-panns_2_v7-fold-1-seed285669:v3/checkpoint_swa_model_seed285669.pth"
):
    state_dicts.append(filepath)
    backbones.append("eca_nfnet_l0")

nets = []

for i, state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_4 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_4 += [preds_]

preds_4 = np.array(preds_4)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_panns_2_v8")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    # sd = {k.replace("module.", ""): v for k, v in sd.items()}
    return sd


state_dicts = []
backbones = []
for filepath in glob.iglob(
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-panns_2_v8-fold-1-seed174663:v3/checkpoint_swa_model_seed174663.pth"
):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnet_b0_ns")

nets = []

for i, state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_5 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_5 += [preds_]

preds_5 = np.array(preds_5)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_passt_1_v5")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None
cfg.infer = True

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    return sd


state_dicts = []
backbones = []
filepaths = [
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-passt_1_v5-fold-1-seed167728:v3/checkpoint_swa_model_seed167728.pth",
]
for filepath in filepaths:
    state_dicts.append(filepath)

nets = []

for i, state_dict in enumerate(state_dicts):
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_6 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_6 += [preds_]

preds_6 = np.array(preds_6)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_passt_1_v6")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None
cfg.infer = True

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    return sd


state_dicts = []
backbones = []
filepaths = [
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-passt_1_v6-fold-1-seed280349:v3/checkpoint_swa_model_seed280349.pth",
]
for filepath in filepaths:
    state_dicts.append(filepath)

nets = []

for i, state_dict in enumerate(state_dicts):
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_7 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_7 += [preds_]

preds_7 = np.array(preds_7)

In [None]:
cfg = importlib.import_module("default_config")
importlib.reload(cfg)
cfg = importlib.import_module("cfg_passt_1_v7")
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_bins)

cfg.test_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False
cfg.pretrained_weights = None
cfg.infer = True

auto_set_config_param(cfg)

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="test")
test_dl = DataLoader(
    test_ds, shuffle=False, batch_size=cfg.batch_size, num_workers=N_CORES
)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net


def get_state_dict(sd_fp):
    state_dict = torch.load(sd_fp, map_location="cpu")
    sd = state_dict["model"]
    return sd


state_dicts = []
backbones = []
filepaths = [
    "/kaggle/input/birdclef2022-model-checkpoints/swa-model-passt_1_v7-fold-1-seed572879:v3/checkpoint_swa_model_seed572879.pth",
]
for filepath in filepaths:
    state_dicts.append(filepath)

nets = []

for i, state_dict in enumerate(state_dicts):
    net = Net(cfg).eval().cuda()
    swa_model = AveragedModel(net, device=cfg.device)
    swa_model.update_parameters(net)
    sd = get_state_dict(state_dict)
    print("loading dict")
    swa_model.load_state_dict(sd, strict=True)
    nets += [swa_model]

# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():
    preds_8 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)["logits"]
                preds_ += [out.cpu().numpy()]

        preds_8 += [preds_]

preds_8 = np.array(preds_8)

In [None]:
preds_1.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_2.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_3.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_4.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_5.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_6.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_7.shape  # (bs, n_models, parts, n_classes)

In [None]:
preds_8.shape  # (bs, n_models, parts, n_classes)

# Make Submission

In [None]:
def create_submit_df(test_df, cfg):
    file_ids = test_df.file_id.tolist()
    scored_birds = cfg.birds[:21]
    parts = list(range(5, 65, 5))
    row_ids = [
        f"{f}_{s}_{p}"
        for f, p, s in product(file_ids, parts, scored_birds)  # (bs, parts, n_classes)
    ]
    submit_df = pd.DataFrame({"row_id": row_ids})
    submit_df["target"] = None
    return submit_df

In [None]:
n_scored = 21
preds = np.concatenate(
    [preds_1, preds_2, preds_3, preds_4, preds_5, preds_6, preds_7, preds_8], axis=1
)
print(preds.shape)

bn, n_models, part, n_classes = preds.shape
gem_p = 3
preds = (preds**gem_p).mean(axis=1) ** (1 / gem_p)  # (batch, parts, n_classes)
# preds = preds.max(axis=1)  # (batch, parts, n_classes)
print(preds.shape)

preds = preds[..., :n_scored]
preds = preds.reshape(bn * part, n_scored)
preds.shape  # (batch * parts, n_classes)

In [None]:
groups = [preds[..., :5], preds[..., 5:10], preds[..., 10:15], preds[..., 15:]]
pos_ratios = [0.100, 0.550, 0.350, 0.334]

thresholds = np.concatenate(
    [np.quantile(g, 1 - pr, axis=0) for g, pr in zip(groups, pos_ratios)],
    axis=-1,
)
thresholds = thresholds.reshape(1, -1)
thresholds, thresholds.shape

In [None]:
submits = preds > thresholds  # (batch * parts, n_classes)
submits = submits.reshape(-1)  # (batch * parts * n_classes)
submits.shape

In [None]:
submit_df = create_submit_df(test_df, cfg)

In [None]:
%time
submit_df["target"] = submits

In [None]:
submit_df

In [None]:
submit_df.target.sum()

In [None]:
submit_df.to_csv("/kaggle/working/submission.csv", index=False)

# Visualize

In [None]:
import warnings
from types import SimpleNamespace

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import librosa
import librosa.display
import IPython.display as ipd

warnings.filterwarnings("ignore")
plt.style.use("ggplot")

%load_ext lab_black

In [None]:
def visualize(rel_path, vis_df):
    path = f"{TEST_AUDIO_ROOT}/{rel_path}"
    assert os.path.isfile(path), path
    display(ipd.Audio(path))

    # show mel spec
    fig, (ax1, ax2) = plt.subplots(
        ncols=1, nrows=2, figsize=(18, 8), gridspec_kw={"height_ratios": [1, 3]}
    )
    audio, _ = librosa.core.load(path, sr=cfg.sample_rate, mono=True)
    melspec = librosa.feature.melspectrogram(
        audio,
        sr=cfg.sample_rate,
        n_fft=cfg.window_size,
        hop_length=cfg.hop_length,
        n_mels=cfg.mel_bins,
        power=1.0,
        fmin=cfg.fmin,
        fmax=cfg.fmax,
    )
    spec = librosa.pcen(
        melspec * (2**31),
        time_constant=0.06,
        eps=1e-6,
        gain=0.8,
        power=0.25,
        bias=10,
        sr=cfg.sample_rate,
        hop_length=cfg.hop_length,
    )
    colormesh = librosa.display.specshow(
        spec,
        hop_length=cfg.hop_length,
        sr=cfg.sample_rate,
        fmin=cfg.fmin,
        fmax=cfg.fmax,
        x_axis="time",
        y_axis="mel",
        ax=ax1,
    )
    ax1.set_title(
        f"[{rel_path}]",
        fontsize=15,
    )

    sns.heatmap(vis_df, cmap="viridis", ax=ax2, cbar=False, vmin=0, vmax=1)
    plt.tight_layout()
    plt.show()

In [None]:
bn, _, parts, n_classes = preds_1.shape
vis_preds = preds.reshape(bn, part, n_scored)
vis_preds = vis_preds.transpose(0, 2, 1)  # (batch, class, parts)
vis_preds.shape

In [None]:
times = np.arange(5, 65, 5) - 2.5
vis_df = pd.DataFrame(
    vis_preds[0, :21], index=cfg.birds[:21], columns=times
)  # (n_classes, parts)

In [None]:
rel_path = test_df.loc[0, "filename"]
visualize(rel_path, vis_df)

In [None]:
rel_path = test_df.loc[0, "filename"]
visualize(rel_path, vis_df > thresholds.reshape(-1, 1))

In [None]:
rel_path = test_df.loc[0, "filename"]
visualize(
    rel_path,
    (vis_df - thresholds.reshape(-1, 1)) / np.sqrt(thresholds.reshape(-1, 1)) + 0.5,
)

In [None]:
(vis_df > thresholds.reshape(-1, 1)).sum().sum()

# LB Probing I

In [None]:
def invert_target(input_df, birds_to_invert):
    assert type(birds_to_invert) == list, type(birds)
    tmp_df = input_df.copy()
    prefixes, dates, birds, end_secs = zip(*tmp_df.row_id.str.split("_"))
    tmp_df["prefix"] = prefixes
    tmp_df["date"] = dates
    tmp_df["bird"] = birds
    tmp_df["end_sec"] = end_secs

    idxs = tmp_df.query("bird in @birds_to_invert").index
    tmp_df.loc[idxs, "target"] = tmp_df.loc[idxs, "target"].apply(lambda x: not (x))

    return tmp_df[["row_id", "target"]]

In [None]:
scored_birds = pd.read_json("/kaggle/input/birdclef-2022/scored_birds.json")[0].tolist()
train = pd.read_csv("/kaggle/input/birdclef-2022/train_metadata.csv")
scored = train.query("primary_label in @scored_birds")
scored_count = scored["primary_label"].value_counts()
top5 = scored_count[:5].index.tolist()
mid_top5 = scored_count[5:10].index.tolist()
mid_low5 = scored_count[10:15].index.tolist()
low6 = scored_count[15:].index.tolist()
top5, mid_top5, mid_low5, low6

In [None]:
#inverted_df = invert_target(submit_df, low6)
#inverted_df

In [None]:
#inverted_df.to_csv("/kaggle/working/submission.csv", index=False)

# LB Probe II

In [None]:
def random_invert_pos_target(input_df, p=1.0):
    assert (p >= 0.0) and (p <= 1.0), p
    tmp_df = input_df.copy()
    pos_df = tmp_df.query("target == True").reset_index()
    n_rows = len(pos_df)
    n_inverted = int(n_rows * p)
    idxs = np.random.permutation(n_rows)[:n_inverted]
    pos_df.loc[idxs, "target"] = pos_df.loc[idxs, "target"].apply(lambda x: not (x))
    pos_df = pos_df.set_index("index")
    pos_idxs = pos_df.index
    tmp_df.loc[pos_idxs, "target"] = pos_df.loc[pos_idxs, "target"]

    return tmp_df

In [None]:
def get_positive_ratio_of_birds(input_df, target_birds):
    assert type(target_birds) == list, type(target_birds)
    tmp_df = input_df.copy()
    prefixes, dates, birds, end_secs = zip(*tmp_df.row_id.str.split("_"))
    tmp_df["prefix"] = prefixes
    tmp_df["date"] = dates
    tmp_df["bird"] = birds
    tmp_df["end_sec"] = end_secs

    target_df = tmp_df.query("bird in @target_birds")
    pos_count = target_df["target"].sum()
    if len(target_df) == 0:
        print("warning: no target birds detected")
        return 0.0

    pos_ratio = pos_count / len(target_df)
    return pos_ratio

In [None]:
#p = get_positive_ratio_of_birds(submit_df, low6)
#print(f"p: {p}")
#probe_df = random_invert_pos_target(submit_df, p=p)
#probe_df

In [None]:
#probe_df.to_csv("/kaggle/working/submission.csv", index=False)