In [None]:
import os
import numpy as np
import pandas as pd
import torchaudio
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import json
import librosa
import tensorflow_addons as tfa
import torch
from PIL import Image
from tensorflow import keras

In [None]:
import sys
sys.path.append("../input/generetor")
from gen_test import  Test_Kaggle, Functions

In [None]:
TEST_AUDIO_PATH = '../input/birdclef-2021/test_soundscapes/'
TRAIN_AUDIO_PATH = '../input/birdclef-2021/train_soundscapes/'
PATHS = [
    '../input/2inputs260x260/',
    '../input/nocall21x260/',
    '../input/nocall22x260/',
    '../input/nocall23x260/',
    '../input/nocall24x260/',
    '../input/imginput260eff25/',
    '../input/imginput260x338cl23/',
    '../input/2inputs380x380/',
    '../input/imginput260x319cl/',
    '../input/2inputs260x260319cl/',
    '../input/2inputs224x380319cl/',
    '../input/imginput260x338cl22/'
]

In [None]:
CONFIG = []
MODEL_PATH = []
DICTS = []
for path in PATHS:
    for file in os.listdir(path):
        if file == 'config.json':
            with open(path+file, 'r') as f:
                CONFIG.append(json.load(f))
        elif 'dict_birds' in file:
            with open(path+file, 'r') as f:
                DICTS.append(json.load(f))
        elif file[-3:] == '.h5':
            MODEL_PATH.append(path+file)
    if 'config.json' not in os.listdir(path):
        with open('../input/generetor/base_config.json', 'r') as f:
            CONFIG.append(json.load(f))

train_soundscape_labels = pd.read_csv('../input/birdclef-2021/train_soundscape_labels.csv')

list_sites = []
for file_name in os.listdir(TEST_AUDIO_PATH):
    if file_name[-3:] == "txt":
        with open(TEST_AUDIO_PATH + file_name, "r") as f:
            site = f.readlines()
            list_sites.append([file_name.split("_")[0], site[-2:]])
    coord_sites = pd.DataFrame()
    for i in list_sites:
        ids = i[0]
        coord_sites.loc[ids, "longitude"] = i[1][1].split(":")[1].split("\n")[0].split()[0]
        coord_sites.loc[ids, "latitude"] = i[1][0].split(":")[1].split("\n")[0].split()[0]
    coord_sites = coord_sites.astype("float")
    coord_sites["site"] = coord_sites.index

In [None]:
audio_path = TEST_AUDIO_PATH
# audio_path = TRAIN_AUDIO_PATH
list_files = []
boost_threshold = 0.15
pred_threshold = 0.24
for files in os.listdir(audio_path):
    if files[-4:] == '.ogg' or  files[-4:] == '.OGG':
        list_files.append(files)
    

In [None]:
if len(list_files) != 0:
    data = Functions.make_df(audio_path, sample_rate=32000, df_coord_sites=coord_sites)
    all_models_df = pd.DataFrame(columns=sorted(list(DICTS[0].keys())))
    for j in range(len(MODEL_PATH)):
        model = keras.models.load_model(MODEL_PATH[j])
        pred_one_model = pd.DataFrame(columns=sorted(list(DICTS[j].keys())))
        for filename in data.filename.unique().tolist():
            short_df = data[data["filename"] == filename].reset_index(drop=True)
            wave, _ = librosa.load(audio_path + filename, sr=32000)
            mel_long = librosa.power_to_db(
                librosa.feature.melspectrogram(
                    y=wave,
                    power=2,
                    n_fft=CONFIG[j]["N_FFT"],
                    win_length=CONFIG[j]["WIN_LENGHT"],
                    n_mels=CONFIG[j]["N_MELS"],
                    sr=CONFIG[j]["SAMPLE_RATE"],
                    hop_length=int(
                        (CONFIG[j]["SIGNAL_LENGTH"] * CONFIG[j]["SAMPLE_RATE"])
                        / (CONFIG[j]["IMG_SIZE"] - 1)
                    ),
                    fmin=CONFIG[j]["FREQ_MIN"],
                    fmax=CONFIG[j]["FREQ_MAX"],
                ),
                ref=np.max,
            )
            tk = Test_Kaggle(
                df=short_df,
                mel_long=mel_long,
                mel_image_size=CONFIG[j]["IMG_SIZE"],
                n_mels=CONFIG[j]["N_MELS"],
                signal_lenght=CONFIG[j]["SIGNAL_LENGTH"],
                img_dtype=CONFIG[j]["IMG_DTYPE"],
                sin_cos_img=CONFIG[j]["SIN_COS_IMG"],
                batch_size=len(short_df),
                img_year = CONFIG[j]["IMG_YEAR"]
            )
            labels = sorted(list(DICTS[j].keys()))
            prediction = model.predict(tk.__getitem__(0)[0], workers=4)
            prediction = pd.DataFrame(prediction)
            prediction.columns = sorted(list(DICTS[j].keys()))
            prediction["row_id"] = short_df["row_id"]
            prediction["filename"] = short_df["filename"]
            pred_one_model = pd.concat(
                [pred_one_model, prediction], axis=0, ignore_index=True
            )

        del model
        keras.backend.clear_session()         
        pred_one_model['model'] = j
        all_models_df = pd.concat([all_models_df, pred_one_model], axis=0, ignore_index=True
        )

    col_birds = [
        i for i in all_models_df.columns.tolist() if i != "row_id" and i != "filename" and i!= 'model'
    ]
    submission = all_models_df.fillna(0).groupby('row_id')[col_birds].mean()
    submission['row_id'] = submission.index
    submission = submission.reset_index(drop=True).merge(data[['row_id', 'filename']], on='row_id')
    prediction_for_boost = submission
    columns_before_boost = [i for i in prediction_for_boost.columns.tolist()
        if i != "row_id" and i != "filename" and i!= 'model'
]
    dict_pred = Functions.boost_multiple_occurences(
        prediction_for_boost,
        pred_col=columns_before_boost,
        labels=columns_before_boost,
        threshold=boost_threshold,
        boost_coef=1.15,
    )
    submission = pd.DataFrame(dict_pred).T
    submission.columns=columns_before_boost
    submission['row_id'] = submission.index
    sub_columns =  [
    i for i in submission.columns.tolist() if i != "row_id" and i != "filename" and i!= 'model'
]
    submission = Functions.pred_from_dict(
            submission,
            cols=sub_columns,
            thresh=pred_threshold,
            as_is=False,
            labels=sub_columns,
        )

    submission = submission[["row_id", "birds"]]
    submission.columns = ["row_id", "birds"]
else:
    submission = pd.DataFrame(columns=["row_id", "birds"])

In [None]:
submission = submission[['row_id', 'birds']].reset_index(drop=True)
submission.head()
submission.to_csv("submission.csv", index=False)