In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short import MEL_Generator_Short, Mel_Provider
from src.prepare_dataset import (
    choose_ids,
    make_dict_birds,
    make_intervals,
    make_intervals_upsampling,
    prepare_dataset,
)

In [2]:
list_sites = [
    "birds_COL_300.npy",
    "birds_COR_300.npy",
    "birds_SSW_300.npy",
    "birds_SNE_300.npy",
]

In [3]:
list_all_birds = []
for site in list_sites:
    birds = list(np.load("/app/sandbox/sites/"+site, allow_pickle=True))
    list_all_birds.extend(birds)
list_all_birds = list(set(list_all_birds))
list_all_birds.sort()

In [4]:
len(list_all_birds)

373

In [5]:
SEED = 42
IMG_SIZE = 224
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 50

## Initial dataframes

In [6]:
all_audio_init = pd.read_csv("/app/_data/all_audio_initial.csv")
all_audio = all_audio_init.query("primary_label in @list_all_birds").reset_index(
    drop=True
)
all_audio.shape

(61578, 24)

In [7]:
list_ixs = []
for i in tqdm(all_audio.index.tolist()):
    sec_labels = all_audio.loc[i, "secondary_labels"]
    if type(sec_labels) != float:
        sec_labels = sec_labels.split()
        for j in sec_labels:
            if j not in list_all_birds:
                list_ixs.append(i)
len(set(list_ixs))

100%|██████████| 61578/61578 [00:01<00:00, 42522.52it/s]


1218

In [8]:
list_ixs = set(list_ixs)

In [9]:
all_audio = all_audio.query("index not in @list_ixs").reset_index(drop=True)
all_audio.shape

(60360, 24)

In [10]:
dict_birds, all_audio = make_dict_birds(all_audio)

In [11]:
file_json = open("/app/_data/models/sites/all/all_birds_300.json", "w")
json.dump(dict_birds, file_json)
file_json.close()

In [12]:
dict_weights = (1 / all_audio["primary_label"].value_counts()).to_dict()
all_audio["class_weights"] = all_audio["primary_label"].replace(dict_weights)
all_audio["class_weights"] = all_audio["class_weights"] * all_audio["rating"]

short_audio = all_audio[all_audio["rating"] != 6].reset_index(
    drop=True
)  # only short audio
train_sondscapes = all_audio[all_audio["rating"] == 6].reset_index(
    drop=True
)  # only train_soundscapes
short_audio.shape
train_sondscapes.shape

len(dict_birds)

train_sondscapes["duration"] = 600

## Make intervals for short audio

array_for_cut = short_audio[["filename", "duration", "class_weights"]].values

birds_intervals = make_intervals(array_for_cut, 5, max_intervals=150, max_lenght=300)
birds_intervals.shape

birds_intervals["row_id"] = birds_intervals.index

birds_intervals["filename"].value_counts()

birds_intervals_sample = birds_intervals.sample(
    n=500000, weights="class_weights", random_state=SEED
)

short_audio = birds_intervals_sample.merge(
    short_audio[
        [
            "filename",
            "primary_label",
            "secondary_labels",
            "label_id",
            "secondary_labels_id",
            "duration",
            "rating",
            "year",
            "file_path",
            "frames",
            "sin_month",
            "cos_month",
            "sin_longitude",
            "cos_longitude",
            "latitude",
            "norm_latitude",
            "date",
        ]
    ],
    on="filename",
    how="left",
)

short_audio["filename"].value_counts()

short_audio["end_sec"] = short_audio["end_sec"].apply(lambda x: x if x > 5 else 5)

short_audio["start_sec"] = short_audio["end_sec"] - 5

# concat short+long

short_audio = prepare_dataset(short_audio)
train_sondscapes = prepare_dataset(train_sondscapes)

final_audio = (
    pd.concat(
        [short_audio, train_sondscapes, train_sondscapes],
        axis=0,
        ignore_index=True,
    )
    .sample(frac=1, random_state=SEED)
    .reset_index(drop=True)
)
final_audio.shape

final_audio["primary_label"].value_counts().tail(20)

final_audio.query("end_sec > duration and rating!=6")[["end_sec", "duration"]]

for ix in final_audio.query("end_sec > duration and rating!=6")[
    ["end_sec", "duration"]
].index:
    final_audio.loc[ix, "end_sec"] = final_audio.loc[ix, "duration"]

# upsampling

# upsampling|

rare_birds = list(final_audio["primary_label"].value_counts().tail(5).index.values)
rare_bird_filenames = (
    all_audio.query("primary_label in @rare_birds")["filename"].unique().tolist()
)
rare_birds

df = pd.DataFrame(columns=["end_sec", "class_weights", "filename"])
for bird in rare_birds:
    rare_bird_filenames = (
    all_audio.query("primary_label ==@bird")["filename"].unique().tolist())
    rare_array = all_audio.query(
        "primary_label ==@bird and rating!=0 and rating!=6 and filename in @rare_bird_filenames"
    )[["filename", "duration", "class_weights"]].values
    intervals = make_intervals_upsampling(rare_array)
    df = pd.concat([df, intervals], axis=0)

df

df["row_id"] = df.index
df = df.merge(
    all_audio[
        [
            "filename",
            "primary_label",
            "secondary_labels",
            "label_id",
            "secondary_labels_id",
            "duration",
            "rating",
            "year",
            "file_path",
            "frames",
            "sin_month",
            "cos_month",
            "sin_longitude",
            "cos_longitude",
            "latitude",
            "norm_latitude",
            "date",
        ]
    ],
    on="filename",
    how="left",
)
df["end_sec"] = df["end_sec"].apply(lambda x: x if x > 5 else 5)
df["start_sec"] = df["end_sec"] - 5
df = prepare_dataset(df)
final_audio = (
    pd.concat(
        [final_audio, df],
        axis=0,
        ignore_index=True,
    )
    .sample(frac=1, random_state=SEED)
    .reset_index(drop=True)
)
final_audio.shape

final_audio["primary_label"].value_counts().tail(20)

for ix in final_audio.query("end_sec > duration and rating!=6")[
    ["end_sec", "duration"]
].index:
    final_audio.loc[ix, "end_sec"] = final_audio.loc[ix, "duration"]

In [13]:
final_audio.to_csv('/app/_data/models/sites/all/final_audio.csv', index=False)
# final_audio = pd.read_csv('/app/_data/models/sites/all/final_audio.csv')


# train_test_split

In [14]:
train, valid = train_test_split(final_audio, train_size=0.85, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [16]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [17]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB0(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.001),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [18]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [20]:
skf = StratifiedKFold(n_splits=6, random_state=SEED, shuffle=True)
train_index =[0,0,0,0,0,0]
valid_index=[0,0,0,0,0,0]
for i, (train_ixs, valid_ixs) in enumerate(
    skf.split(final_audio, final_audio["primary_label"])
):
    train_index[i]=train_ixs
    valid_index[i]=valid_ixs

In [None]:
train = final_audio.loc[train_index[0]]
valid = final_audio.loc[valid_index[0]]
NUM_CLASSES = len(dict_birds)
BATCH_SIZE = 2000
gen_train = MEL_Generator_Short(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False,
)

gen_valid = MEL_Generator_Short(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False,
)
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/sites/all/eff0_all_1_400.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/all_eff0_1/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore(
        "/app/_data/models/sites/all/backup/"
    ),
    keras.callbacks.TerminateOnNaN(),
]

model = get_model()
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=200,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=30,
    callbacks=callbacks,
)