In [1]:
! touch submission.csv

In [2]:
import os
import sys

In [3]:
os.environ["__KAGGLE__"] = "1"
sys.path.append("/kaggle/input/bc21-code/")

In [4]:
import numpy as np
import pandas as pd
import tensorflow_addons as tfa

from src.config import c
from src.data_utils import (
    boost_multiple_occurences,
    geofilter_predictions,
    normalize_soundscapes_df,
    predictions_to_text_labels,
)
from src.generator import Generator
from src.geo_filter import filters as geo_filters
from src.kaggle_utils import predict

In [5]:
IN_KAGGLE_SUBMIT = os.environ["KAGGLE_KERNEL_RUN_TYPE"] != "Interactive"

IN_CSV = "/kaggle/input/birdclef-2021/train_soundscape_labels.csv"

if IN_KAGGLE_SUBMIT:
    IN_CSV = "/kaggle/input/birdclef-2021/test.csv"


GEOFILTER = "all-500mi-last_5y-1mo_tolerance"

BOOST_COEF = 2.4
BOOST_MAX = BOOST_COEF * 5
THRESHOLD = 0.5

MODELS = [
    "/kaggle/input/bc21-models/E1_g_xae332.h5",
    "/kaggle/input/bc21-models/D1_xae332.h5",
    "/kaggle/input/bc21-models/C1.h5",
]

In [6]:
Ys = []
labels = None

df = pd.read_csv(IN_CSV)
df = normalize_soundscapes_df(df)

In [7]:
# from lib.utils import load_pickle, read_json, save_pickle
# # save_pickle('Ys', Ys)
# Ys = load_pickle("Ys")
# meta = read_json(MODELS[0].replace(".h5", ".json"))
# labels = meta["labels"]

In [None]:
for model in MODELS:

    Y, labels = predict(
        model_path=model,
        df=df,
        quiet=IN_KAGGLE_SUBMIT,
    )

    Ys.append(Y)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA Tesla V100-DGXS-32GB, compute capability 7.0


In [None]:
# combine predictions

# sqrt(sum_squares)
# Y = np.sqrt(np.sum(np.array(Ys).astype(np.float64) ** 2, axis=0))

# mean
Y = np.mean(np.array(Ys).astype(np.float64), axis=0)

# sum+threshold
Y = np.where(
    np.sum(np.array(Ys).astype(np.float64), axis=0) > len(MODELS) * THRESHOLD, 1, 0
)
BOOST_COEF = None

In [None]:
# add Y column
df["y"] = list(map(lambda x: x, Y))

# geofilter
if GEOFILTER is not None:

    Y = geofilter_predictions(
        df=df,
        Y_pred=Y,
        site_labels=geo_filters[GEOFILTER],
        labels=labels,
        downgrade_const=0,
    )

    df["y"] = list(map(lambda x: x, Y))


# boost
if BOOST_COEF is not None:

    df = boost_multiple_occurences(
        df=df,
        labels=labels,
        pred_col="y",
        out_col="y",
        boost_coef=BOOST_COEF,
        max_boost_coef=BOOST_MAX,
        threshold=THRESHOLD,
    )

# convert to text labels
df["birds"] = predictions_to_text_labels(
    df["y"],
    labels,
    default_label="nocall",
    threshold=THRESHOLD,
)

In [None]:
# verify that we've created not a complete garbage

if not IN_KAGGLE_SUBMIT:

    from sklearn.metrics import f1_score

    print(
        "f1",
        f1_score(
            list(df["_primary_labels"]),
            list(df["birds"]),
            labels=labels,
            average="micro",
        ),
    )

In [None]:
# create submission
df[["row_id", "birds"]].to_csv("submission.csv", index=False)