In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short import MEL_Generator_Short, Mel_Provider
from src.prepare_dataset import make_dict_birds, prepare_dataset, choose_ids

In [2]:
SEED = 42
IMG_SIZE = 260
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 500
FREQ_MAX = 15000
WIN_LENGHT = 1024
BATCH_SIZE = 128

In [3]:
rand_cut = pd.read_csv("/app/_data/random_cut_edited.csv")
train_sound = pd.read_csv("/app/_data/train_soundscapes_prepared.csv")
train_sound = prepare_dataset(train_sound)
rand_cut = prepare_dataset(rand_cut)
rand_cut.shape

(686847, 21)

In [4]:
_, list_filenames = choose_ids(500)
len(list_filenames)

21416

In [5]:
rand_cut = rand_cut.query('filename in @list_filenames')
rand_cut.shape

(221458, 21)

In [6]:
all_audio = pd.concat([rand_cut, train_sound], axis=0, ignore_index=True)
all_audio = all_audio[all_audio["rating"] >= 3]
all_audio.shape
all_audio = all_audio.reset_index(drop=True)

(191256, 21)

In [7]:
dict_birds, df = make_dict_birds(all_audio)

In [8]:
df.sample(3)

Unnamed: 0,filename,primary_label,secondary_labels,label_id,secondary_labels_id,start_sec,end_sec,row_id,duration,rating,...,year,file_path,frames,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude,date
117701,XC358899.ogg,oaktit,,226,,26.62,31.62,XC358899_31_62,36.901,4.0,...,2017,/app/_data/train_short_audio/oaktit/XC358899.ogg,1180832.0,0.8660254,0.5,-0.897477,-0.441061,34.0148,0.688971,2017-02-16
132186,XC322509.ogg,reevir1,rebwoo acafly,263,261 0,7.6,12.6,XC322509_12_6,24.177437,4.5,...,2016,/app/_data/train_short_audio/reevir1/XC322509.ogg,773678.0,0.5,-0.866025,-0.968333,0.249663,39.1742,0.717634,2016-05-15
170700,XC436105.ogg,wesant1,,349,,4.18,9.18,XC436105_9_19,69.229,4.0,...,2017,/app/_data/train_short_audio/wesant1/XC436105.ogg,2215328.0,1.224647e-16,-1.0,-0.969591,0.24473,6.4764,0.53598,2017-06-27


In [9]:
file_json = open("/app/_data/models/Eff02_2/dict_birds_eff2_02.json", "w")
json.dump(dict_birds, file_json)
file_json.close()

In [10]:
train, valid = train_test_split(df, train_size=0.85, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [11]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [12]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/Eff02_2/eff02_2.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/eff0_2/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore("/app/_data/models/Eff02_2/backup/"),
    keras.callbacks.TerminateOnNaN(),
]

In [13]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB2(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [14]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [15]:
# for i in [
#     "/app/_data/npy/waves_npy/",
#     "/app/_data/npy/short_mels/",
# ]:
#     shutil.rmtree(i)

In [16]:
NUM_CLASSES = len(dict_birds)

In [17]:
gen_train = MEL_Generator_Short(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False
)

gen_valid = MEL_Generator_Short(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False
)

In [18]:
model = keras.models.load_model('/app/_data/models/Eff02_2/eff02_2_1.h5')

In [None]:
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=200,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=30,
    callbacks=callbacks,
)

Epoch 1/200
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_f1_score improved from -inf to 0.83009, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 2/200
Epoch 00002: val_f1_score did not improve from 0.83009
Epoch 3/200
Epoch 00003: val_f1_score did not improve from 0.83009
Epoch 4/200
Epoch 00004: val_f1_score improved from 0.83009 to 0.83050, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 5/200
Epoch 00005: val_f1_score improved from 0.83050 to 0.83068, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 6/200
Epoch 00006: val_f1_score did not improve from 0.83068
Epoch 7/200
Epoch 00007: val_f1_score improved from 0.83068 to 0.83157, saving model to /app/_data/models/Eff02_2/eff02_2.h5
Epoch 8/200
Epoch 00008: val_f1_score did not improve from 0.83157
Epoch 9/200
Epoch 00009: val_f1_score did not improve from 0.83157
Epoch 10/200
Epoch 00010: val_f1_score did not improve from 0.83157
Epoch 11/200
Epoch 00011: va

In [56]:
import kaggle

/bin/bash: import: command not found


In [57]:
! kaggle datasets init -p /app/_data/models/Eff02_2

Data package template written to: /app/_data/models/Eff01/dataset-metadata.json


In [60]:
! kaggle datasets create -p /app/_data/models/Eff01

Starting upload for file dict_birds.json
100%|██████████████████████████████████████| 5.86k/5.86k [00:01<00:00, 3.59kB/s]
Upload successful: dict_birds.json (6KB)
Starting upload for file eff0_1.h5
100%|███████████████████████████████████████| 52.7M/52.7M [01:21<00:00, 675kB/s]
Upload successful: eff0_1.h5 (53MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Your private Dataset is being created. Please check progress at https://www.kaggle.com/nataliayurasova/Eff01Initial
