In [None]:
! touch submission.csv

In [None]:
# environment setup

import os
import sys

os.environ["__KAGGLE__"] = "1"
sys.path.append("/kaggle/input/bc21-code/")

In [None]:
import numpy as np
import pandas as pd
import tensorflow_addons as tfa
from sklearn.metrics import f1_score
from tensorflow import keras
from tqdm import tqdm

from lib.utils import fix_random_seed, read_json
from src.config import c
from src.data_utils import (
    geofilter_predictions,
    normalize_soundscapes_df,
    predictions_to_text_labels,
    read_soundscapes_info,
)
from src.generator import Generator
from src.geo_filter import filters as geo_filters
from src.models import Div, SinCos, YMToDate
from src.services import get_msg_provider, get_wave_provider

In [None]:
IN_KAGGLE_SUBMIT = (('KAGGLE_KERNEL_RUN_TYPE' not in os.environ) or
    (os.environ["KAGGLE_KERNEL_RUN_TYPE"] != "Interactive"))

IN_CSV = (
    "/kaggle/input/birdclef-2021/test.csv"
    if IN_KAGGLE_SUBMIT
    else "/kaggle/input/birdclef-2021/train_soundscape_labels.csv"
)

MODEL = "/kaggle/input/bc21-models/B1_nrsw_2.h5"

# GEOFILTER = "all-500mi-last_5y-1mo_tolerance"
GEOFILTER = None

In [None]:
# metadata from model training run
meta = read_json(MODEL.replace(".h5", ".json"))

# copy kaggle-specific configs
for k in [
    "WORK_DIR",
    "CACHE_DIR",
    "COMPETITION_DATA",
    "SRC_DATA_DIRS",
    "CACHE_AUDIO_FRAGMENTS",
]:
    meta["config"][k] = c[k]

In [None]:
# prepare soundscapes df
df = pd.read_csv(IN_CSV)
df = normalize_soundscapes_df(df, quiet=True, seconds=5)

In [None]:
# load model
model = keras.models.load_model(
    MODEL,
    custom_objects={
        "SinCos": SinCos,
        "Div": Div,
        "YMToDate": YMToDate,
    },
)

In [None]:
# create generator
input_shape = model.get_layer("i_msg").input_shape[0][1:]
wave_p = get_wave_provider(meta["config"])
msg_p = get_msg_provider(
    meta["config"], n_mels=input_shape[0], time_steps=input_shape[1]
)

g = Generator(
    df=df,
    shuffle=False,
    augmentation=None,
    rating_as_sw=False,
    rareness_as_sw=False,
    msg_provider=msg_p,
    wave_provider=wave_p,
    msg_as_rgb=(3 == input_shape[-1]),
    geo_coordinates_bins=meta["config"]["GEO_COORDINATES_BINS"],
    batch_size=1,
)

In [None]:
# predict
Y_pred = model.predict(
    x=g,
    use_multiprocessing=False,
    verbose=0 if IN_KAGGLE_SUBMIT else 1,
)

In [None]:
# geofiltering

if GEOFILTER is not None:

    Y_pred = geofilter_predictions(
        df=df,
        Y_pred=Y_pred,
        site_labels=geo_filters[GEOFILTER],
        labels=meta["labels"],
        downgrade_const=0.0,
    )

In [None]:
# convert predictions to words

labels_pred = predictions_to_text_labels(
    Y_pred,
    meta["labels"],
    default_label="nocall",
    max_labels=None,
    priority_to_nocall=False,
)

df["birds"] = labels_pred

In [None]:
if not IN_KAGGLE_SUBMIT:

    df[df["_primary_labels"] != df["birds"]].to_csv("wrong.csv")

    print(
        "f1:",
        f1_score(
            df["_primary_labels"],
            df["birds"],
            labels=meta["labels"],
            average="micro",
        ),
    )

In [None]:
df[["row_id", "birds"]].to_csv("submission.csv", index=False)