In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.MEL_Gen import MEL_Gen, Mel_Provider
from src.prepare_dataset import (
    choose_ids,
    make_dict_birds,
    make_intervals,
    make_intervals_upsampling,
    prepare_dataset,
)

In [2]:
SEED = 37
IMG_SIZE = 224
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 50
list_drop = [
    "XC509721.ogg",
    "XC428067.ogg",
    "XC523831.ogg",
    "XC523960.ogg",
    "XC237870.ogg",
    "XC129924.ogg",
    "XC576851.ogg",
    "XC579430.ogg",
    "XC590621.ogg",
]

In [3]:
final = pd.read_csv("/app/_data/labels_nocall/nocall_predictions.csv")
nocall = pd.read_csv("/app/_data/labels_nocall/all_nocalls.csv")

In [4]:
all_audio_init = pd.read_csv("/app/_data/all_audio_initial.csv")
all_audio = all_audio_init.query("filename not in @list_drop").reset_index(drop=True)
all_audio = all_audio.query("year >=1980 and year<=2021").reset_index(drop=True)

all_audio.shape

(65128, 24)

# dict_birds and weights

In [5]:
dict_birds, all_audio = make_dict_birds(all_audio)

In [6]:
len(dict_birds)

398

In [7]:
dict_weights = (1 / all_audio["primary_label"].value_counts()).to_dict()
all_audio["class_weights"] = all_audio["primary_label"].replace(dict_weights)
# all_audio["class_weights"] = all_audio["class_weights"] * all_audio["rating"]

In [8]:
all_audio = all_audio[all_audio["rating"] >= 3]

## calls

In [9]:
drop_list_final = final[(final["bird"] == 1) & (final["nocall"] == 1)][
    "row_id"
].tolist()

In [10]:
final = final.query("row_id not in @drop_list_final")
final = final.query("bird == 1")

In [11]:
final = final.merge(
    all_audio[["filename", "primary_label", "class_weights"]], on="filename", how="left"
)

In [12]:
final.isna().sum()

nocall                0
bird                  0
row_id                0
filename              0
end_sec               0
primary_label    252740
class_weights    252740
dtype: int64

In [13]:
final = final.dropna(axis=0)

In [14]:
final_sample = final.sample(400000, weights="class_weights", random_state=SEED)

In [15]:
final_sample.sample()

Unnamed: 0,nocall,bird,row_id,filename,end_sec,primary_label,class_weights
1309728,0,1,XC172660_18_0,XC172660.ogg,18.0,lazbun,0.008333


In [16]:
final_sample = final_sample[
    ["row_id", "filename", "end_sec", "primary_label", "class_weights"]
].merge(
    all_audio[
        [
            "filename",
            "secondary_labels",
            "label_id",
            "secondary_labels_id",
            "duration",
            "rating",
            "year",
            "file_path",
            "sin_month",
            "cos_month",
            "sin_longitude",
            "cos_longitude",
            "latitude",
            "norm_latitude",
        ]
    ],
    on="filename",
    how="left",
)

In [17]:
final_sample

Unnamed: 0,row_id,filename,end_sec,primary_label,class_weights,secondary_labels,label_id,secondary_labels_id,duration,rating,year,file_path,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude
0,XC575588_25_0,XC575588.ogg,25.0,wiltur,0.014286,casfin chispa ruckin amerob,376,84 95 285 9,212.468031,4.5,2018,/app/_data/train_short_audio/wiltur/XC575588.ogg,1.224647e-16,-1.000000e+00,-0.911385,-0.411554,39.010800,0.716727
1,XC268189_98_0,XC268189.ogg,98.0,incdov,0.010204,moudov houfin norcar cogdov gamqua yebcha gilw...,182,214 178 221 103 139 382 144 32,373.199469,3.5,2015,/app/_data/train_short_audio/incdov/XC268189.ogg,1.224647e-16,-1.000000e+00,-0.943957,-0.330070,31.340000,0.674111
2,XC343095_10_0,XC343095.ogg,10.0,burwar1,0.006289,,72,,21.997000,4.0,2013,/app/_data/train_short_audio/burwar1/XC343095.ogg,1.224647e-16,-1.000000e+00,-0.937315,0.348484,-10.790000,0.440056
3,XC124487_25_0,XC124487.ogg,25.0,nutwoo,0.020000,,228,,61.872125,5.0,2013,/app/_data/train_short_audio/nutwoo/XC124487.ogg,1.000000e+00,6.123234e-17,-0.858232,-0.513263,35.275100,0.695973
4,XC348156_90_0,XC348156.ogg,90.0,pasfly,0.006369,acowoo pasfly wesblu daejun mouchi,244,1 244 355 121 213,158.107219,3.5,2001,/app/_data/train_short_audio/pasfly/XC348156.ogg,1.224647e-16,-1.000000e+00,-0.902555,-0.430574,30.980000,0.672111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,XC137714_57_0,XC137714.ogg,57.0,buggna,0.004587,,68,,283.117000,5.0,2013,/app/_data/train_short_audio/buggna/XC137714.ogg,1.224647e-16,-1.000000e+00,-0.998313,-0.058069,44.707000,0.748372
399996,XC427421_16_0,XC427421.ogg,16.0,webwoo1,0.008403,,352,,24.589000,4.5,2015,/app/_data/train_short_audio/webwoo1/XC427421.ogg,-8.660254e-01,5.000000e-01,-0.906574,0.422047,-9.616208,0.446577
399997,XC425817_28_0,XC425817.ogg,28.0,chswar,0.006098,bkcchi reevir1 indbun,96,36 266 183,95.597125,3.5,2018,/app/_data/train_short_audio/chswar/XC425817.ogg,-5.000000e-01,-8.660254e-01,-0.969580,0.244776,42.580000,0.736556
399998,XC371638_39_0,XC371638.ogg,39.0,amered,0.003534,,8,,51.292531,3.0,2017,/app/_data/train_short_audio/amered/XC371638.ogg,5.000000e-01,-8.660254e-01,-0.962453,0.271449,40.605200,0.725584


In [18]:
final_sample["primary_label"].value_counts()

normoc     3566
yelgro     2968
bulori     2770
brnthr     2679
thbkin     2587
           ... 
gretin1     186
whcpar      160
hofwoo1     112
stvhum2      53
wegspa1      50
Name: primary_label, Length: 397, dtype: int64

# nocall

In [19]:
nocall = nocall.drop_duplicates()

In [20]:
nocall = nocall.merge(
    all_audio[
        [
            "filename",
            "rating",
            "year",
            "sin_month",
            "cos_month",
            "sin_longitude",
            "cos_longitude",
            "latitude",
            "norm_latitude",
        ]
    ],
    on="filename",
    how="left",
).drop_duplicates()

In [21]:
nocall["label_id"] = dict_birds["nocall"]
nocall["secondary_labels_id"] = np.nan

In [22]:
nocall["class_weights"] = all_audio["class_weights"].median()

In [23]:
nocall["file_path"] = nocall["file_path"].str.replace("birdclef-2021/", "")

In [24]:
nocall_sample = nocall.sample(5000, random_state=SEED)

# concat

In [25]:
final_audio = (
    pd.concat(
        [final_sample, nocall_sample],
        axis=0,
        ignore_index=True,
    )
    .sample(frac=1, random_state=SEED)
    .reset_index(drop=True)
)
final_audio.shape

(405000, 20)

In [26]:
final_audio["primary_label"].value_counts().tail(20)

scamac1    395
brwpar1    392
norsho     366
rebsap     364
sthwoo1    359
grhcha1    352
lotduc     339
whimbr     327
goowoo1    323
rehbar1    274
runwre1    249
sander     244
bucmot2    218
heptan     217
rthhum     201
gretin1    186
whcpar     160
hofwoo1    112
stvhum2     53
wegspa1     50
Name: primary_label, dtype: int64

In [27]:
train, valid = train_test_split(final_audio, train_size=0.8, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

# MEL_Gen

In [28]:
NUM_CLASSES = len(dict_birds)
BATCH_SIZE = 400

In [29]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [30]:
# shutil.rmtree('/app/_data/npy/short_mels')

In [31]:
gen_train = MEL_Gen(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels_224_uint8/",
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_concat_labels=True,
    augment=False,
    img_dtype="uint8",
)
gen_valid = MEL_Gen(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels_224_uint8/",
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_concat_labels=True,
    augment=False,
    img_dtype="uint8",
)

# model

In [32]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB0(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.0005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model


policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [33]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/from_nocall/Eff0_2/Eff0_2.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(
        log_dir="/app/.tensorboard/Eff0_2_nc2/", histogram_freq=0
    ),
    keras.callbacks.experimental.BackupAndRestore(
        "/app/_data/models/from_nocall/Eff0_2/backup/"
    ),
    keras.callbacks.TerminateOnNaN(),
]

In [None]:
model = get_model()
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=100,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=20,
    max_queue_size=50,
    callbacks=callbacks,
)

Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_f1_score improved from -inf to 0.22116, saving model to /app/_data/models/from_nocall/Eff0_2/Eff0_2.h5
Epoch 2/100
Epoch 00002: val_f1_score improved from 0.22116 to 0.50073, saving model to /app/_data/models/from_nocall/Eff0_2/Eff0_2.h5
Epoch 3/100
Epoch 00003: val_f1_score improved from 0.50073 to 0.57958, saving model to /app/_data/models/from_nocall/Eff0_2/Eff0_2.h5
Epoch 4/100
Epoch 00004: val_f1_score improved from 0.57958 to 0.62016, saving model to /app/_data/models/from_nocall/Eff0_2/Eff0_2.h5
Epoch 5/100
Epoch 00005: val_f1_score improved from 0.62016 to 0.64425, saving model to /app/_data/models/from_nocall/Eff0_2/Eff0_2.h5
Epoch 6/100
Epoch 00006: val_f1_score improved from 0.64425 to 0.66184, saving model to /app/_data/models/from_nocall/Eff0_2/Eff0_2.h5
Epoch 7/100
Epoch 00007: val_f1_score improved from 0.66184 to 0.67251, saving model to /app/_data/models/from_nocall/Eff

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-34-2f01e9ec22f2>", line 2, in <module>
    history = model.fit(
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
    tmp_logs = train_function(iterator)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
    result = self._call(*args, **kwds)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py", line 807, in _call
    return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/f

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-34-2f01e9ec22f2>", line 2, in <module>
    history = model.fit(
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
    tmp_logs = train_function(iterator)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
    result = self._call(*args, **kwds)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/def_function.py", line 807, in _call
    return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/f