In [16]:
! touch submission.csv

In [17]:
# environment setup

import os
import sys

os.environ["__KAGGLE__"] = "1"
sys.path.append("/kaggle/input/bc21-code/")

In [18]:
import numpy as np
import pandas as pd
import tensorflow_addons as tfa
from sklearn.metrics import f1_score
from tensorflow import keras
from tqdm import tqdm

from lib.utils import fix_random_seed, read_json
from src.config import c
from src.data_utils import (
    boost_multiple_occurences,
    geofilter_predictions,
    normalize_soundscapes_df,
    predictions_to_text_labels,
    read_soundscapes_info,
)
from src.generator import Generator
from src.geo_filter import filters as geo_filters
from src.models import Div, SinCos, YMToDate, MelSpectrogram, Float2DToFloatRGB, Float2DToRGB, PowerToDb
from src.services import get_msg_provider, get_wave_provider

In [19]:
IN_KAGGLE_SUBMIT = os.environ["KAGGLE_KERNEL_RUN_TYPE"] != "Interactive"

IN_CSV = (
    "/kaggle/input/birdclef-2021/test.csv"
    if IN_KAGGLE_SUBMIT
    else "/kaggle/input/birdclef-2021/train_soundscape_labels.csv"
)

MODEL = "/kaggle/input/bc21-models/C2_enb4.h5"

GEOFILTER = "all-500mi-last_5y-1mo_tolerance"
BOOST_COEF = 2.4
MAX_BOOST_COEF = BOOST_COEF * 5
THRESHOLD = 0.5

RES_COL = "birds_geof_boosted"

In [20]:
# ! md5sum $MODEL

In [21]:
# metadata from model training run
meta = read_json(MODEL.replace(".h5", ".json"))

# copy kaggle-specific configs
for k in [
    "WORK_DIR",
    "CACHE_DIR",
    "COMPETITION_DATA",
    "SRC_DATA_DIRS",
    "CACHE_AUDIO_FRAGMENTS",
]:
    meta["config"][k] = c[k]

In [22]:
# prepare soundscapes df
df = pd.read_csv(IN_CSV)
df = normalize_soundscapes_df(df, quiet=True, seconds=5)

In [23]:
# load model
model = keras.models.load_model(
    MODEL,
    custom_objects={
        "SinCos": SinCos,
        "Div": Div,
        "YMToDate": YMToDate,
        'MelSpectrogram': MelSpectrogram,
        'Float2DToFloatRGB': Float2DToFloatRGB,
        'Float2DToRGB': Float2DToRGB,
        'PowerToDb': PowerToDb
    },
)

In [24]:
# create generator

wave_p = get_wave_provider(meta["config"])

if meta['args']['model'].startswith('msg_'):

    input_shape = model.get_layer("i_msg").input_shape[0][1:]
    msg_p = get_msg_provider(meta["config"])

    generator = Generator(
        df=df,
        shuffle=False,
        augmentation=None,
        rating_as_sw=False,
        rareness_as_sw=False,
        msg_provider=msg_p,
        wave_provider=wave_p,
        msg_output_size=input_shape,
        msg_power=meta["config"]["MSG_POWER"],
        geo_coordinates_bins=meta["config"]["GEO_COORDINATES_BINS"],
        batch_size=1,
    )
    
else:
    
    generator = Generator(
        df=df,
        shuffle=False,
        augmentation=None,
        rating_as_sw=False,
        rareness_as_sw=False,
        msg_provider=None,
        wave_provider=wave_p,
        geo_coordinates_bins=meta["config"]["GEO_COORDINATES_BINS"],
        batch_size=1,
    )

In [25]:
# predict
Y_pred = model.predict(
    x=generator,
    use_multiprocessing=False,
    verbose=0 if IN_KAGGLE_SUBMIT else 1,
)



In [26]:
df["_y_pred"] = list(map(lambda x: x, Y_pred))

In [27]:
# boost
if BOOST_COEF is not None:

    df = boost_multiple_occurences(
        df=df,
        labels=meta["labels"],
        pred_col="_y_pred",
        out_col="_y_pred_boosted",
        boost_coef=BOOST_COEF,
        max_boost_coef=MAX_BOOST_COEF,
        threshold=THRESHOLD,
    )

In [28]:
# geofiltering

if GEOFILTER is not None:

    yp = geofilter_predictions(
        df=df,
        Y_pred=np.array(list(df["_y_pred"])),
        site_labels=geo_filters[GEOFILTER],
        labels=meta["labels"],
        downgrade_const=0.0,
    )
    df["_y_pred_geof"] = list(map(lambda x: x, yp))

    if "_y_pred_boosted" in df:

        yp = geofilter_predictions(
            df=df,
            Y_pred=np.array(list(df["_y_pred_boosted"])),
            site_labels=geo_filters[GEOFILTER],
            labels=meta["labels"],
            downgrade_const=0.0,
        )
        df["_y_pred_geof_boosted"] = list(map(lambda x: x, yp))

In [29]:
# convert predictions to words

labels_cols = []

for col in [
    "_y_pred",
    "_y_pred_boosted",
    "_y_pred_geof",
    "_y_pred_geof_boosted",
]:
    if col in df:

        labels_pred = predictions_to_text_labels(
            np.array(list(df[col])),
            meta["labels"],
            default_label="nocall",
            max_labels=None,
            priority_to_nocall=False,
            threshold=THRESHOLD,
        )

        labels_col = col.replace("_y_pred", "birds")
        labels_cols.append(labels_col)
        df[labels_col] = labels_pred

        if not IN_KAGGLE_SUBMIT:

            # print f1 info and save wrong predictions

            df[df["_primary_labels"] != df["birds"]][
                ["filename", "_primary_labels"] + labels_cols
            ].to_csv("wrong.csv", index=False)

            print(
                labels_col,
                "f1",
                f1_score(
                    df["_primary_labels"],
                    df[labels_col],
                    labels=meta["labels"],
                    average="micro",
                ),
            )

birds f1 0.9855947955390334
birds_boosted f1 0.9817842129845866
birds_geof f1 0.9855947955390334
birds_geof_boosted f1 0.9817842129845866


In [30]:
df[["row_id", RES_COL]].rename(columns={RES_COL: "birds"}).to_csv(
    "submission.csv", index=False
)