In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short import MEL_Generator_Short, Mel_Provider
from src.prepare_dataset import (
    choose_ids,
    make_dict_birds,
    make_intervals,
    make_intervals_upsampling,
    prepare_dataset,
)

In [2]:
birds_COR_600 = list(np.load("/app/sandbox/sites/birds_COR_600.npy", allow_pickle=True))

In [3]:
SEED = 42
IMG_SIZE = 224
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 50

## Initial dataframes

In [4]:
all_audio_init = pd.read_csv("/app/_data/all_audio_initial.csv")
all_audio = all_audio_init.query("primary_label in @birds_COR_600").reset_index(
    drop=True
)
all_audio.shape

(34508, 24)

In [5]:
list_ixs = []
for i in tqdm(all_audio.index.tolist()):
    sec_labels = all_audio.loc[i, "secondary_labels"]
    if type(sec_labels) != float:
        sec_labels = sec_labels.split()
        for j in sec_labels:
            if j not in birds_COR_600:
                list_ixs.append(i)
len(set(list_ixs))

100%|██████████| 34508/34508 [00:00<00:00, 42420.71it/s]


6003

In [6]:
list_ixs = set(list_ixs)

all_audio = all_audio.query("index not in @list_ixs").reset_index(drop=True)
all_audio.shape

(28505, 24)

In [7]:
dict_birds, all_audio = make_dict_birds(all_audio)

In [8]:
file_json = open("/app/_data/models/sites/cor/dict_birds_cor_600.json", "w")
json.dump(dict_birds, file_json)
file_json.close()

In [9]:
dict_weights = (1 / all_audio["primary_label"].value_counts()).to_dict()
all_audio["class_weights"] = all_audio["primary_label"].replace(dict_weights)
all_audio["class_weights"] = all_audio["class_weights"] * all_audio["rating"]

In [10]:
short_audio = all_audio[all_audio["rating"] != 6].reset_index(
    drop=True
)  # only short audio
train_sondscapes = all_audio[all_audio["rating"] == 6].reset_index(
    drop=True
)  # only train_soundscapes
short_audio.shape
train_sondscapes.shape

(26369, 24)

(2136, 24)

In [11]:
len(dict_birds)

199

In [12]:
# train_sondscapes["duration"] = 600

# ## Make intervals for short audio

# array_for_cut = short_audio[["filename", "duration", "class_weights"]].values

# birds_intervals = make_intervals(array_for_cut, 5, max_intervals=200, max_lenght=400)
# birds_intervals.shape

# birds_intervals["row_id"] = birds_intervals.index

# birds_intervals["filename"].value_counts()

# birds_intervals_sample = birds_intervals.sample(
#     n=500000, weights="class_weights", random_state=SEED
# )

# short_audio = birds_intervals_sample.merge(
#     short_audio[
#         [
#             "filename",
#             "primary_label",
#             "secondary_labels",
#             "label_id",
#             "secondary_labels_id",
#             "duration",
#             "rating",
#             "year",
#             "file_path",
#             "frames",
#             "sin_month",
#             "cos_month",
#             "sin_longitude",
#             "cos_longitude",
#             "latitude",
#             "norm_latitude",
#             "date",
#         ]
#     ],
#     on="filename",
#     how="left",
# )

# short_audio["filename"].value_counts()

# short_audio["end_sec"] = short_audio["end_sec"].apply(lambda x: x if x > 5 else 5)

# short_audio["start_sec"] = short_audio["end_sec"] - 5

# # concat short+long

# short_audio = prepare_dataset(short_audio)
# train_sondscapes = prepare_dataset(train_sondscapes)

# final_audio = (
#     pd.concat(
#         [short_audio, train_sondscapes, train_sondscapes, train_sondscapes],
#         axis=0,
#         ignore_index=True,
#     )
#     .sample(frac=1, random_state=SEED)
#     .reset_index(drop=True)
# )
# final_audio.shape

# final_audio["primary_label"].value_counts().tail(20)

# # upsampling

# rare_birds = list(final_audio["primary_label"].value_counts().tail(5).index.values)
# rare_bird_filenames = (
#     all_audio.query("primary_label in @rare_birds")["filename"].unique().tolist()
# )
# rare_birds

# df = pd.DataFrame(columns=["end_sec", "class_weights", "filename"])
# for bird in rare_birds:
#     #     rare_bird_filenames = (
#     #     all_audio.query("primary_label ==@bird")["filename"].unique().tolist())
#     rare_array = all_audio.query(
#         "primary_label ==@bird and rating!=0 and rating!=6 and filename in @rare_bird_filenames"
#     )[["filename", "duration", "class_weights"]].values
#     intervals = make_intervals_upsampling(rare_array)
#     df = pd.concat([df, intervals], axis=0)

# df

# df["filename"].value_counts()

# df["row_id"] = df.index
# df = df.merge(
#     all_audio[
#         [
#             "filename",
#             "primary_label",
#             "secondary_labels",
#             "label_id",
#             "secondary_labels_id",
#             "duration",
#             "rating",
#             "year",
#             "file_path",
#             "frames",
#             "sin_month",
#             "cos_month",
#             "sin_longitude",
#             "cos_longitude",
#             "latitude",
#             "norm_latitude",
#             "date",
#         ]
#     ],
#     on="filename",
#     how="left",
# )
# df["end_sec"] = df["end_sec"].apply(lambda x: x if x > 5 else 5)
# df["start_sec"] = df["end_sec"] - 5
# df = prepare_dataset(df)
# final_audio = (
#     pd.concat(
#         [final_audio, df],
#         axis=0,
#         ignore_index=True,
#     )
#     .sample(frac=1, random_state=SEED)
#     .reset_index(drop=True)
# )
# final_audio.shape

# final_audio["primary_label"].value_counts().tail(20)

In [13]:
# final_audio.to_csv('/app/_data/models/sites/cor/final_audio.csv', index=False)
final_audio=pd.read_csv('/app/_data/models/sites/cor/final_audio.csv')


# train_test_split

In [14]:
# train, valid = train_test_split(final_audio, train_size=0.85, random_state=SEED)
# train = train.reset_index(drop=True)
# valid = valid.reset_index(drop=True)

# valid[valid["end_sec"] >= valid["duration"]]["rating"].mean()

In [15]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [16]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB0(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [17]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [18]:
# for i in [
#     "/app/_data/npy/waves_npy/",
#     "/app/_data/npy/short_mels/",
# ]:
#     shutil.rmtree(i)

In [19]:
NUM_CLASSES = len(dict_birds)

In [20]:
BATCH_SIZE = 400

In [None]:
skf = StratifiedKFold(n_splits=6, random_state=SEED, shuffle=True)

for i, (train_index, valid_index) in enumerate(
    skf.split(final_audio, final_audio["primary_label"])
):
    train, valid = final_audio.loc[train_index], final_audio.loc[valid_index]
    model_name = "Eff0_cor_kf_" + str(i + 1) + ".h5"
    log_dir = "Eff0_cor_kf_" + str(i + 1) + "/"
    print("\nIter # ", str(i + 1), "starting\n")
    callbacks = [
        keras.callbacks.EarlyStopping(
            monitor="val_f1_score",
            patience=12,
            restore_best_weights=True,
            verbose=1,
            mode="max",
        ),
        keras.callbacks.ModelCheckpoint(
            "/app/_data/models/sites/cor/" + model_name,
            monitor="val_f1_score",
            verbose=1,
            save_best_only=True,
            save_weights_only=False,
            mode="max",
            save_freq="epoch",
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_f1_score",
            factor=0.9,
            patience=5,
            verbose=1,
            mode="max",
            min_delta=1e-4,
            min_lr=0.00000001,
        ),
        keras.callbacks.TensorBoard(
            log_dir="/app/.tensorboard/" + log_dir, histogram_freq=0
        ),
        keras.callbacks.experimental.BackupAndRestore(
            "/app/_data/models/sites/cor/backup/"
        ),
        keras.callbacks.TerminateOnNaN(),
    ]

    gen_train = MEL_Generator_Short(
        df=train,
        n_mels=IMG_SIZE,
        seed=SEED,
        sample_rate=SAMPLE_RATE,
        mel_image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        n_classes=NUM_CLASSES,
        signal_lenght=SIGNAL_LENGTH,
        mel_provider=mel_pr,
        wave_dir="/app/_data/npy/waves_npy/",
        short_mel_dir="/app/_data/npy/short_mels/",
        norm_mel=True,
        convert_to_rgb=True,
        shuffle=True,
        return_primary_labels=False,
        return_secondary_labels=False,
        return_concat_labels=True,
        augment=False,
    )

    gen_valid = MEL_Generator_Short(
        df=valid,
        n_mels=IMG_SIZE,
        seed=SEED,
        sample_rate=SAMPLE_RATE,
        mel_image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        n_classes=NUM_CLASSES,
        signal_lenght=SIGNAL_LENGTH,
        mel_provider=mel_pr,
        wave_dir="/app/_data/npy/waves_npy/",
        short_mel_dir="/app/_data/npy/short_mels/",
        norm_mel=True,
        convert_to_rgb=True,
        shuffle=False,
        return_primary_labels=False,
        return_secondary_labels=False,
        return_concat_labels=True,
        augment=False,
    )
    model = get_model()
    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=200,
        steps_per_epoch=train.shape[0] // BATCH_SIZE,
        validation_steps=valid.shape[0] // BATCH_SIZE,
        verbose=1,
        max_queue_size=50,
        workers=20,
        callbacks=callbacks,
    )
    tf.keras.backend.clear_session()


Iter #  1 starting

Epoch 56/200
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00056: val_f1_score improved from -inf to 0.91794, saving model to /app/_data/models/sites/cor/Eff0_cor_kf_1.h5
Epoch 57/200
Epoch 00057: val_f1_score improved from 0.91794 to 0.92052, saving model to /app/_data/models/sites/cor/Eff0_cor_kf_1.h5
Epoch 58/200
Epoch 00058: val_f1_score did not improve from 0.92052
Epoch 59/200

In [None]:
import kaggle

In [57]:
! kaggle datasets init -p /app/_data/models/Eff2_3

Data package template written to: /app/_data/models/Eff01/dataset-metadata.json


In [60]:
! kaggle datasets create -p /app/_data/models/Eff2_3

Starting upload for file dict_birds.json
100%|██████████████████████████████████████| 5.86k/5.86k [00:01<00:00, 3.59kB/s]
Upload successful: dict_birds.json (6KB)
Starting upload for file eff0_1.h5
100%|███████████████████████████████████████| 52.7M/52.7M [01:21<00:00, 675kB/s]
Upload successful: eff0_1.h5 (53MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Your private Dataset is being created. Please check progress at https://www.kaggle.com/nataliayurasova/Eff01Initial
