In [None]:
MONTH_DIST = 3
KM_DIST = 350

In [None]:
!pip install ../input/birds-inference-pip-wheels/torchaudio-0.8.1-cp37-cp37m-manylinux1_x86_64.whl ../input/birds-inference-pip-wheels/torch-1.8.1-cp37-cp37m-manylinux1_x86_64.whl
# !pip install ../input/birds-inference-pip-wheels/timm-0.4.8.zip --no-index --no-deps
!pip install ../input/birdclef21trainmeta/timm-0.4.9_23052021/pytorch-image-models-master --no-index --no-deps
!pip install ../input/birds-inference-pip-wheels/audiomentations-0.16.0-py3-none-any.whl --no-index --no-deps
!pip install ../input/birds-inference-pip-wheels/torchlibrosa-0.0.9-py3-none-any.whl --no-index --no-deps

In [None]:
import timm
timm.__version__

In [None]:
import sys
import os
import importlib
import multiprocessing as mp

from tqdm import tqdm
import numpy as np
import pandas as pd
import glob
import torch
from copy import copy

from torch.utils.data import DataLoader

In [None]:
!cp -r ../input/kaggle-birdclef2021-2nd-place-github/* ./

In [None]:
sys.path.append('./configs')
sys.path.append('./data')
sys.path.append('./models')

In [None]:
train_meta = pd.read_csv("../input/birdclef21trainmeta/train_meta_4folded_v3.csv")
train_meta[["year", "month", "day"]] = train_meta['date'].str.split("-", expand=True)
train_meta["month"] = train_meta["month"].astype(int)
train_meta

In [None]:
# %%checkerror

COMP_FOLDER = '../input/birdclef-2021/'
TEST_AUDIO_ROOT = f'{COMP_FOLDER}test_soundscapes/'
test_df = pd.read_csv(f'{COMP_FOLDER}test.csv')
test_df['birds'] = 'acafly'
sample_submission = pd.read_csv(COMP_FOLDER + 'sample_submission.csv')
N_CORES = mp.cpu_count()
PUBLIC_RUN = False

RAM_CHECK = False
MIXED_PRECISION = False
DEVICE = "cuda"

In [None]:
# %%checkerror

test_fns = [item for item in os.listdir(TEST_AUDIO_ROOT) if item.endswith('.ogg')]
if len(test_fns) == 0:
    PUBLIC_RUN = True
    TEST_AUDIO_ROOT = TEST_AUDIO_ROOT.replace('test','train')
    test_fns = [item for item in os.listdir(TEST_AUDIO_ROOT) if item.endswith('.ogg')]
    test_df = pd.read_csv(f'{COMP_FOLDER}train_soundscape_labels.csv', usecols=['row_id','site','audio_id','seconds'])
    
    test_df['birds'] = 'acafly'
    sample_submission = pd.read_csv(COMP_FOLDER + 'train_soundscape_labels.csv',usecols=['row_id'])
    sample_submission['birds'] = 'nocall'
    
    # load train_soundscape_labels to eval in PUBLIC RUN
    train_soundscape_labels = pd.read_csv(f'{COMP_FOLDER}train_soundscape_labels.csv')

In [None]:
# %%checkerror
fn_starts = {'_'.join(fn.split('_')[:2]):fn for fn in test_fns}
test_df['filename'] = (test_df['audio_id'].astype(str) + '_' + test_df['site']).map(fn_starts)
test_df["month"] = test_df["filename"].str[-8:-6]
test_df["month"] = test_df["month"].astype(int)
test_df.head()

In [None]:
cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ch_binary_ext1_3')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ch_binary_ext1_3/checkpoint_last_seed*.pth'):
    state_dicts.append(filepath)
    backbones.append("seresnext26t_32x4d")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_1 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_1 += [preds_]

In [None]:
cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('pp_binary_ext3_1')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/pp_binary_ext3_1/checkpoint_last_seed*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnet_b0_ns")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts[:5]):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_2 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_2 += [preds_]

In [None]:
cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('pp_binary_ext3_2')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/pp_binary_ext3_2/checkpoint_last_seed*.pth'):
    state_dicts.append(filepath)
    backbones.append("seresnext26t_32x4d")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts[:5]):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_3 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_3 += [preds_]

In [None]:
preds_1 = np.array(preds_1).transpose(1,0,2,3)
preds_1 = preds_1.reshape(preds_1.shape[0], preds_1.shape[1]*preds_1.shape[2], preds_1.shape[3])
preds_1.shape

In [None]:
preds_2 = np.array(preds_2).transpose(1,0,2,3)
preds_2 = preds_2.reshape(preds_2.shape[0], preds_2.shape[1]*preds_2.shape[2], preds_2.shape[3])
preds_2.shape

In [None]:
preds_3 = np.array(preds_3).transpose(1,0,2,3)
preds_3 = preds_3.reshape(preds_3.shape[0], preds_3.shape[1]*preds_3.shape[2], preds_3.shape[3])
preds_3.shape

In [None]:
preds = np.concatenate([preds_1, preds_2, preds_3], axis=0)

In [None]:
preds.shape

In [None]:
binary_median = np.median(preds, axis=0).copy()
binary_mean = np.mean(preds, axis=0).copy()

In [None]:
binary_median = binary_median.flatten()
binary_mean = binary_mean.flatten()

In [None]:
cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_6_v2')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_6_v2/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("resnet34")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_1 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_1 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v8_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_12_v8/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("resnet34")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_2 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_2 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v11_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_12_v11/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnetv2_s_in21k")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_3 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_3 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v13_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_12_v13/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnetv2_m_in21k")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_4 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_4 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v21_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_12_v21/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("eca_nfnet_l0")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_5 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_5 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v30_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ch_12_v25a/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnetv2_s_in21k")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_6 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_6 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v30_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_12_v30/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnetv2_s_in21k")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_7 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_7 += [preds_]

In [None]:

cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v30_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ps_12_v32/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnetv2_s_in21k")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_8 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_8 += [preds_]

In [None]:
cfg = importlib.import_module('default_config')
importlib.reload(cfg)
cfg = importlib.import_module('cfg_ps_12_v30_inf')
importlib.reload(cfg)
cfg = copy(cfg.cfg)
print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)

cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False

ds = importlib.import_module(cfg.dataset)
importlib.reload(ds)

CustomDataset = ds.CustomDataset
batch_to_device = ds.batch_to_device

cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

model = importlib.import_module(cfg.model)
importlib.reload(model)
Net = model.Net

def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd
state_dicts = []
backbones = []
for filepath in glob.iglob('../input/2ndplacebirdclef2021-models/cfg_ch_12_v25g/checkpoint_*.pth'):
    state_dicts.append(filepath)
    backbones.append("tf_efficientnetv2_s_in21k")
print(state_dicts)

nets = []

for i,state_dict in enumerate(state_dicts):
    cfg.backbone = backbones[i]
    net = Net(cfg).eval().cuda()
    sd = get_state_dict(state_dict)
    print("loading dict")
    net.load_state_dict(sd, strict=True)
    nets += [net]
    
# %%checkerror
from scipy.stats.mstats import gmean

with torch.no_grad():    

    preds_9 = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            preds_ = []
            for net in nets:
                out = net(batch)['logits']
                preds_ += [out.cpu().numpy()]
            
        preds_9 += [preds_]

In [None]:
preds_1 = np.array(preds_1).transpose(1,0,2,3)
preds_2 = np.array(preds_2).transpose(1,0,2,3)
preds_3 = np.array(preds_3).transpose(1,0,2,3)
preds_4 = np.array(preds_4).transpose(1,0,2,3)
preds_5 = np.array(preds_5).transpose(1,0,2,3)
preds_6 = np.array(preds_6).transpose(1,0,2,3)
preds_7 = np.array(preds_7).transpose(1,0,2,3)
preds_8 = np.array(preds_8).transpose(1,0,2,3)
preds_9 = np.array(preds_9).transpose(1,0,2,3)

In [None]:
all_preds = [preds_1, preds_2, preds_3, preds_4, preds_5, preds_6, preds_7, preds_8, preds_9]  

In [None]:
preds_bag_all = []
for preds in all_preds:
    
    preds = preds.reshape(preds.shape[0], preds.shape[1]*preds.shape[2], preds.shape[3])
    
    preds[:,:,275] += preds[:,:,397]
    preds = preds[:,:,:397]
    
    cols = [f"p{i}" for i in range(preds.shape[2])]
    preds_bag = []

    for model in tqdm(range(preds.shape[0])):
        curr_df = test_df.copy()
        curr_df[cols] = preds[model]
        curr_df[cols] = curr_df[cols].fillna(0)

        for c in cols:
            z1 = curr_df.groupby('audio_id')[c].shift(1)
            z2 = curr_df.groupby('audio_id')[c].shift(-1)
            z3 = curr_df.groupby('audio_id')[c].shift(2)
            z4 = curr_df.groupby('audio_id')[c].shift(-2)
            z5 = curr_df.groupby('audio_id')[c].shift(3)
            z6 = curr_df.groupby('audio_id')[c].shift(-3)

            z = curr_df[c]

            idx = (~np.isnan(z5)) & (~np.isnan(z6))
            z[idx] = np.average([z.fillna(0)[idx], z1.fillna(0)[idx], z2.fillna(0)[idx], z3.fillna(0)[idx], z4.fillna(0)[idx], z5.fillna(0)[idx], z6.fillna(0)[idx]], axis=0, weights=[7,2,2,1,1,0.5,0.5])
            z = z + curr_df["audio_id"].map(curr_df.groupby("audio_id")[c].mean()*0.65)

            curr_df[c] = z

        p = curr_df[cols].values

        preds_bag.append(p)
        
    preds_bag_all.append(preds_bag)

In [None]:
preds_bag_all_2 = [np.mean(x, axis=0) for x in preds_bag_all]

In [None]:
test_mean = np.average(preds_bag_all_2, axis=0, weights=[0.8,1,1,1,1,1,1,1,1])

In [None]:
test_mean.shape

In [None]:
test_df[cols] = test_mean
test_df[cols]

In [None]:
test_df[cols].values.shape

In [None]:
binary_median.shape

In [None]:
bb = binary_mean.reshape(-1,1)
test_df[cols] *= (1 + (bb * 0.8))

In [None]:
threshold = np.quantile(test_df[cols].values.flatten(), 0.9981)
print(threshold)

In [None]:
pred_strings = []
for i, pred in tqdm(enumerate(test_df[cols].values), total=len(test_df)):
    th = np.array([threshold] * (len(cfg.birds)-1))
    
    if MONTH_DIST < 12:
        if test_df["filename"].str.contains("COL")[i]:
            th += (train_meta.loc[abs(train_meta["month"] - test_df["month"][i]) <= MONTH_DIST].groupby("primary_label")["COL_dist"].min() > KM_DIST)*1
        elif test_df["filename"].str.contains("COR")[i]:
            th += (train_meta.loc[abs(train_meta["month"] - test_df["month"][i]) <= MONTH_DIST].groupby("primary_label")["COR_dist"].min() > KM_DIST)*1
        elif test_df["filename"].str.contains("SNE")[i]:
            th += (train_meta.loc[abs(train_meta["month"] - test_df["month"][i]) <= MONTH_DIST].groupby("primary_label")["SNE_dist"].min() > KM_DIST)*1
        elif test_df["filename"].str.contains("SSW")[i]:
            th += (train_meta.loc[abs(train_meta["month"] - test_df["month"][i]) <= MONTH_DIST].groupby("primary_label")["SSW_dist"].min() > KM_DIST)*1
    else:
        if test_df["filename"].str.contains("COL")[i]:
            th += (train_meta.groupby("primary_label")["COL_dist"].min() > KM_DIST)*1
        elif test_df["filename"].str.contains("COR")[i]:
            th += (train_meta.groupby("primary_label")["COR_dist"].min() > KM_DIST)*1
        elif test_df["filename"].str.contains("SNE")[i]:
            th += (train_meta.groupby("primary_label")["SNE_dist"].min() > KM_DIST)*1
        elif test_df["filename"].str.contains("SSW")[i]:
            th += (train_meta.groupby("primary_label")["SSW_dist"].min() > KM_DIST)*1
    
    pred_bird_idx = np.where(pred > th)[0]
    pred_bird_idx_high = np.where(pred > (th*1.10))[0]
    
    if len(pred_bird_idx) > 0:
        pred_bird = ' '.join(list(cfg.birds[pred_bird_idx]))
        if len(pred_bird_idx_high) == 0:
            pred_bird = pred_bird + " nocall"
    else:
        pred_bird = 'nocall'

    pred_strings += [pred_bird]

In [None]:
submission = sample_submission.copy()
submission['birds'] = pred_strings
submission.to_csv('submission.csv',index=False)

In [None]:
submission

In [None]:
def row_wise_f1_score_micro(y_true, y_pred):
    """ author @shonenkov """
    F1 = []
    for preds, trues in zip(y_pred, y_true):
        TP, FN, FP = 0, 0, 0
        preds = preds.split()
        trues = trues.split()
        for true in trues:
            if true in preds:
                TP += 1
            else:
                FN += 1
        for pred in preds:
            if pred not in trues:
                FP += 1
        F1.append(2*TP / (2*TP + FN + FP))
    return np.mean(F1)

if PUBLIC_RUN:
    y_pred = submission['birds'].values
    y_true = train_soundscape_labels['birds'].values
    print(row_wise_f1_score_micro(y_true, y_pred))

In [None]:
if PUBLIC_RUN:
    mask = [~test_df["filename"].isin(["7019_COR_20190904.ogg", "7954_COR_20190923.ogg", "31928_COR_20191004.ogg"])][0]
    mask

In [None]:
if PUBLIC_RUN:
    y_pred = submission['birds'].values[mask]
    y_true = train_soundscape_labels['birds'].values[mask]
    print(row_wise_f1_score_micro(y_true, y_pred))

In [None]:
pred_birds = " ".join(pred_strings).split(" ")
len(pred_birds)