In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short import MEL_Generator_Short, Mel_Provider
from src.prepare_dataset import make_dict_birds, prepare_dataset, choose_ids

In [2]:
SEED = 42
IMG_SIZE = 260
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 500
FREQ_MAX = 15000
WIN_LENGHT = 1024
BATCH_SIZE = 128

In [3]:
rand_cut = pd.read_csv("/app/_data/random_cut_edited.csv")
train_sound = pd.read_csv("/app/_data/train_soundscapes_prepared.csv")
train_sound = prepare_dataset(train_sound)
rand_cut = prepare_dataset(rand_cut)
rand_cut.shape

(686847, 21)

In [4]:
_, list_filenames = choose_ids(500)
len(list_filenames)

21416

In [5]:
rand_cut = rand_cut.query('filename in @list_filenames')
rand_cut.shape

(221458, 21)

In [6]:
all_audio = pd.concat([rand_cut, train_sound], axis=0, ignore_index=True)
all_audio = all_audio[all_audio["rating"] >= 3]
all_audio.shape
all_audio = all_audio.reset_index(drop=True)

(191256, 21)

In [7]:
dict_birds, df = make_dict_birds(all_audio)

In [8]:
df.sample(3)

Unnamed: 0,filename,primary_label,secondary_labels,label_id,secondary_labels_id,start_sec,end_sec,row_id,duration,rating,...,year,file_path,frames,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude,date
95687,XC214345.ogg,laufal1,gryhaw2,185,168,149.37,154.37,XC214345_154_36,207.8705,4.5,...,2015,/app/_data/train_short_audio/laufal1/XC214345.ogg,6651856.0,0.866025,0.5,-0.999849,0.017376,14.9028,0.582793,2015-02-21
21652,XC458969.ogg,bkcchi,amegfi norcar,36,5 218,61.87,66.87,XC458969_66_9,82.456625,3.5,...,2019,/app/_data/train_short_audio/bkcchi/XC458969.ogg,2638612.0,0.866025,0.5,-0.969402,0.245478,45.3698,0.752054,2019-02-23
62210,XC245288.ogg,cotfly1,barant1,115,22,7.1,12.1,XC245288_12_1,116.581,3.5,...,2015,/app/_data/train_short_audio/cotfly1/XC245288.ogg,3730592.0,1.0,6.123234000000001e-17,-0.952129,0.305695,5.76,0.532,2015-03-23


In [9]:
file_json = open("/app/_data/models/Eff02_2/dict_birds_eff2_02.json", "w")
json.dump(dict_birds, file_json)
file_json.close()

In [10]:
train, valid = train_test_split(df, train_size=0.85, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [11]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [12]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/Eff02_2/eff02_2.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/eff0_2/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore("/app/_data/models/Eff02_2/backup/"),
    keras.callbacks.TerminateOnNaN(),
]

In [13]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB2(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [14]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [23]:
for i in [
    "/app/_data/npy/waves_npy/",
    "/app/_data/npy/short_mels/",
]:
    shutil.rmtree(i)

In [16]:
NUM_CLASSES = len(dict_birds)

In [17]:
gen_train = MEL_Generator_Short(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False
)

gen_valid = MEL_Generator_Short(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False
)

In [18]:
# model = keras.models.load_model('/app/_data/models/Eff02_2/eff02_2_1.h5')
model = get_model()

In [19]:
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=200,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=30,
    callbacks=callbacks,
)

Epoch 28/200
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00028: val_f1_score improved from -inf to 0.83153, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 29/200
Epoch 00029: val_f1_score improved from 0.83153 to 0.83177, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 30/200
Epoch 00030: val_f1_score did not improve from 0.83177
Epoch 31/200
Epoch 00031: val_f1_score did not improve from 0.83177
Epoch 32/200
Epoch 00032: val_f1_score improved from 0.83177 to 0.83193, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 33/200

KeyboardInterrupt: 

In [20]:
import kaggle



In [21]:
! kaggle datasets init -p /app/_data/models/Eff02_2

Data package template written to: /app/_data/models/Eff02_2/dataset-metadata.json


In [22]:
! kaggle datasets create -p /app/_data/models/Eff02_2

Starting upload for file eff02_2.h5
100%|███████████████████████████████████████| 95.9M/95.9M [02:27<00:00, 682kB/s]
Upload successful: eff02_2.h5 (96MB)
Starting upload for file dict_birds_eff2_02.json
100%|██████████████████████████████████████| 5.78k/5.78k [00:02<00:00, 2.36kB/s]
Upload successful: dict_birds_eff2_02.json (6KB)
Starting upload for file eff02_2_1.h5
100%|███████████████████████████████████████| 95.9M/95.9M [02:26<00:00, 687kB/s]
Upload successful: eff02_2_1.h5 (96MB)
Skipping folder: backup; use '--dir-mode' to upload folders
Starting upload for file eff02_2_2.h5
100%|███████████████████████████████████████| 95.9M/95.9M [02:27<00:00, 683kB/s]
Upload successful: eff02_2_2.h5 (96MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Your private Dataset is being created. Please check progress at https://www.kaggle.com/nataliayurasova/Eff2Version3
