In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short import MEL_Generator_Short, Mel_Provider
from src.prepare_dataset import (
    choose_ids,
    make_dict_birds,
    make_intervals,
    make_intervals_upsampling,
    prepare_dataset,
)

In [2]:
birds_SNE_600 = list(np.load("/app/sandbox/sites/birds_SNE_600.npy", allow_pickle=True))
birds_SNE_600.append('nocall')
birds_SNE_600.sort()

In [3]:
len(birds_SNE_600)

181

In [4]:
SEED = 42
IMG_SIZE = 224
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 50

## Initial dataframes

In [5]:
all_audio_init = pd.read_csv("/app/_data/all_audio_initial.csv")
all_audio = all_audio_init.query("primary_label in @birds_SNE_600").reset_index(drop=True)
all_audio.shape

(35249, 24)

In [6]:
list_ixs = []
for i in tqdm(all_audio.index.tolist()):
    sec_labels = all_audio.loc[i,'secondary_labels']
    if type(sec_labels)!=float:
        sec_labels = sec_labels.split()
        for j in sec_labels:
            if j not in birds_SNE_600:
                list_ixs.append(i)
len(set(list_ixs))

100%|██████████| 35249/35249 [00:00<00:00, 51726.93it/s]


3077

In [7]:
list_ixs = set(list_ixs)

all_audio = all_audio.query("index not in @list_ixs").reset_index(drop=True)
all_audio.shape

(32172, 24)

In [8]:
dict_birds, all_audio = make_dict_birds(all_audio)

In [9]:
file_json = open("/app/_data/models/sites/sne/dict_birds_sne_600.json", "w")
json.dump(dict_birds, file_json)
file_json.close()

dict_weights = (1 / all_audio["primary_label"].value_counts()).to_dict()
all_audio["class_weights"] = all_audio["primary_label"].replace(dict_weights)
all_audio["class_weights"] = all_audio["class_weights"] * all_audio["rating"]

short_audio = all_audio[all_audio["rating"] != 6].reset_index(
    drop=True
)  # only short audio
train_sondscapes = all_audio[all_audio["rating"] == 6].reset_index(
    drop=True
)  # only train_soundscapes
short_audio.shape
train_sondscapes.shape

len(dict_birds)

train_sondscapes['duration']=600

## Make intervals for short audio

array_for_cut = short_audio[["filename", "duration", "class_weights"]].values

birds_intervals = make_intervals(array_for_cut, 5, max_intervals=200, max_lenght=400)
birds_intervals.shape

birds_intervals["row_id"] = birds_intervals.index

birds_intervals["filename"].value_counts()

birds_intervals_sample = birds_intervals.sample(
    n=500000, weights="class_weights", random_state=SEED
)

short_audio = birds_intervals_sample.merge(
    short_audio[
        [
            "filename",
            "primary_label",
            "secondary_labels",
            "label_id",
            "secondary_labels_id",
            "duration",
            "rating",
            "year",
            "file_path",
            "frames",
            "sin_month",
            "cos_month",
            "sin_longitude",
            "cos_longitude",
            "latitude",
            "norm_latitude",
            "date",
        ]
    ],
    on="filename",
    how="left",
)

short_audio["filename"].value_counts()

short_audio["end_sec"] = short_audio["end_sec"].apply(lambda x: x if x > 5 else 5)

short_audio["start_sec"] = short_audio["end_sec"] - 5

# concat short+long

short_audio = prepare_dataset(short_audio)
train_sondscapes = prepare_dataset(train_sondscapes)

final_audio = (
    pd.concat(
        [short_audio, train_sondscapes, train_sondscapes, train_sondscapes],
        axis=0,
        ignore_index=True,
    )
    .sample(frac=1, random_state=SEED)
    .reset_index(drop=True)
)
final_audio.shape

final_audio["primary_label"].value_counts().tail(20)

final_audio.query("end_sec > duration and rating!=6")[['end_sec', 'duration']]

for ix in final_audio.query("end_sec > duration and rating!=6")[['end_sec', 'duration']].index:
    final_audio.loc[ix, 'end_sec']= final_audio.loc[ix, 'duration']

In [10]:
final_audio.to_csv('/app/_data/models/sites/sne/final_audio.csv', index=False)
# final_audio=pd.read_csv('/app/_data/models/sites/sne/final_audio.csv')

# train_test_split

In [11]:
train, valid = train_test_split(final_audio, train_size=0.85, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [12]:
final_audio[final_audio["end_sec"] > final_audio["duration"]]["rating"].mean()

nan

In [13]:
final_audio["end_sec"].isna().sum()

0

In [14]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [15]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/sites/sne/eff0_sne_2.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/sne_0_2/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore(
        "/app/_data/models/sites/sne/backup/"
    ),
    keras.callbacks.TerminateOnNaN(),
]

In [17]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB0(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.0005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

In [19]:
NUM_CLASSES = len(dict_birds)
BATCH_SIZE = 410

In [20]:
gen_train = MEL_Generator_Short(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False,
)

gen_valid = MEL_Generator_Short(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False,
)

In [23]:
model = get_model()

In [24]:
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=100,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=20,
    max_queue_size=50,
    callbacks=callbacks,
)

Epoch 39/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00039: val_f1_score improved from -inf to 0.76501, saving model to /app/_data/models/sites/sne/eff0_sne_2.h5
Epoch 40/100
Epoch 00040: val_f1_score did not improve from 0.76501
Epoch 41/100
Epoch 00041: val_f1_score improved from 0.76501 to 0.76519, saving model to /app/_data/models/sites/sne/eff0_sne_2.h5
Epoch 42/100
Epoch 00042: val_f1_score improved from 0.76519 to 0.77074, saving model to /app/_data/models/sites/sne/eff0_sne_2.h5
Epoch 43/100
Epoch 00043: val_f1_score did not improve from 0.77074
Epoch 44/100
Epoch 00044: val_f1_score improved from 0.77074 to 0.77296, saving model to /app/_data/models/sites/sne/eff0_sne_2.h5
Epoch 45/100
Epoch 00045: val_f1_score did not improve from 0.77296
Epoch 46/100
Epoch 00046: val_f1_score did not improve from 0.77296
Epoch 47/100
Epoch 00047: val_f1_score did not improve from 0.77296
Epoch 48/100
Epoch 00048: val_f1_score did not improve from 0.77296

KeyboardInterrupt: 