In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.generator import MEL_Generator, Mel_Provider
from src.prepare_dataset import prepare_dataset

In [2]:
# shutil.rmtree('/app/_data/npy/')

In [3]:
SEED = 42
IMG_SIZE = 260
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 500
FREQ_MAX = 15000
WIN_LENGHT = 1024
BATCH_SIZE = 128
NUM_CLASSES = 398
# FILL_NA = 'missing_values'

In [4]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [5]:
def my_floor(a, precision=2):
    dec = a - np.floor(a)
    dec = dec * 10 ** precision
    dec = np.floor(dec) / 10 ** precision
    b = np.floor(a) + dec
    return b

In [6]:
with open("/app/_data/dict_birds.json", "r") as birds:
    dict_birds = json.load(birds)

with open("/app/_data/dict_birds.json", "r") as birds:
    dict_birds = json.load(birds)
rand_cut = pd.read_csv("/app/_data/random_cut1.csv")
# all_audio_init = pd.read_csv("/app/_data/all_audio_initial.csv")
train_sound = pd.read_csv("/app/_data/train_soundscapes_prepared.csv")
train_sound = prepare_dataset(train_sound)
rand_cut = prepare_dataset(rand_cut)

In [7]:
all_audio = pd.concat([rand_cut, train_sound], axis=0, ignore_index=True)
all_audio['end_sec'] = all_audio['end_sec'].apply(my_floor)
all_audio['start_sec'] = all_audio['end_sec']-5

In [8]:
all_audio = all_audio[all_audio["rating"] >= 3]
all_audio_sample = all_audio.sample(
    n=200000, weights="class_weights", random_state=SEED
)
# all_audio_sample = all_audio_sample.fillna(FILL_NA)
all_audio_sample = all_audio_sample.reset_index(drop=True)
# all_audio_500 = pd.read_csv("/app/_data/my/all_audio_500.csv")

In [9]:
all_audio_sample

Unnamed: 0,filename,primary_label,secondary_labels,label_id,secondary_labels_id,start_sec,end_sec,row_id,duration,rating,class_weights,year,file_path,frames,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude
0,XC499389.ogg,gilwoo,,144,,16.09,21.09,XC499389_21_1,47.530906,5.0,0.010000,2019,/app/_data/train_short_audio/gilwoo/XC499389.ogg,1520989.0,-1.000000e+00,-1.836970e-16,-0.922554,-0.385869,33.9346,0.688526
1,XC133908.ogg,woothr,blujay,379,48,7.36,12.36,XC133908_12_37,58.669000,3.5,0.004367,2013,/app/_data/train_short_audio/woothr/XC133908.ogg,1877408.0,5.000000e-01,-8.660254e-01,-0.998439,-0.055856,44.3850,0.746583
2,XC63303.ogg,rudpig,,289,,5.92,10.92,XC63303_10_93,16.527000,4.0,0.007463,2001,/app/_data/train_short_audio/rudpig/XC63303.ogg,528864.0,-1.000000e+00,-1.836970e-16,-0.947583,0.319510,-11.8667,0.434074
3,XC143680.ogg,orbspa1,wbwwre1,235,351,8.40,13.40,XC143680_13_41,22.009281,3.5,0.007194,2013,/app/_data/train_short_audio/orbspa1/XC143680.ogg,704297.0,8.660254e-01,5.000000e-01,-0.965545,0.260237,5.4309,0.530172
4,XC569949.ogg,brnthr,,60,,122.50,127.50,XC569949_127_48,183.733344,4.0,0.006711,2020,/app/_data/train_short_audio/brnthr/XC569949.ogg,5879467.0,1.224647e-16,-1.000000e+00,-0.998267,-0.058853,44.8349,0.749083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,XC401084.ogg,brbsol1,,55,,20.03,25.03,XC401084_25_03,154.789688,5.0,0.006667,2014,/app/_data/train_short_audio/brbsol1/XC401084.ogg,4953270.0,-2.449294e-16,1.000000e+00,-0.946892,-0.321552,27.0819,0.650455
199996,XC391221.ogg,grekis,,158,,23.01,28.01,XC391221_28_01,69.637000,4.0,0.002770,2017,/app/_data/train_short_audio/grekis/XC391221.ogg,2228384.0,-8.660254e-01,5.000000e-01,-0.679102,0.734044,-22.4508,0.375273
199997,XC11044.ogg,littin1,,198,,0.73,5.73,XC11044_5_73,10.071312,4.0,0.004902,2006,/app/_data/train_short_audio/littin1/XC11044.ogg,322282.0,-2.449294e-16,1.000000e+00,-0.981558,0.191165,-4.9548,0.472473
199998,XC336062.ogg,houwre,,180,,14.89,19.89,XC336062_19_89,53.773000,4.5,0.002000,2016,/app/_data/train_short_audio/houwre/XC336062.ogg,1720736.0,-1.000000e+00,-1.836970e-16,-0.687527,0.726158,-22.5939,0.374478


In [10]:
(all_audio_sample['end_sec'] - all_audio_sample['start_sec']).max()

5.0

In [11]:
all_audio_sample['row_id'].str.split('_')

0           [XC499389, 21, 1]
1          [XC133908, 12, 37]
2           [XC63303, 10, 93]
3          [XC143680, 13, 41]
4         [XC569949, 127, 48]
                 ...         
199995     [XC401084, 25, 03]
199996     [XC391221, 28, 01]
199997       [XC11044, 5, 73]
199998     [XC336062, 19, 89]
199999     [XC351981, 35, 01]
Name: row_id, Length: 200000, dtype: object

In [12]:
train, valid = train_test_split(all_audio_sample, train_size=0.75, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [13]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/Eff22/eff2_2.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/eff0_2/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore("/app/_data/models/Eff22/"),
    keras.callbacks.TerminateOnNaN(),
]

In [14]:
len(valid["primary_label"].unique())

398

In [15]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB2(
        weights="imagenet", include_top=False
    )
    #     base_model.load_weights(
    #         "/app/_data/models/efficientnet-b5_noisy-student_notop.h5",
    #         by_name=True,
    #         skip_mismatch=True,
    #     )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [16]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [21]:
for i in ["/app/_data/npy/waves_npy_02/","/app/_data/npy/long_mels_02_1024_260/","/app/_data/npy/short_mels_02_1024_260/"]:
    shutil.rmtree(i)

In [18]:
gen_train = MEL_Generator(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=398,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy_02/",
    long_mel_dir="/app/_data/npy/long_mels_02_1024_260/",
    short_mel_dir="/app/_data/npy/short_mels_02_1024_260/",
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
)

gen_valid = MEL_Generator(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=398,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy_02/",
    long_mel_dir="/app/_data/npy/long_mels_02_1024_260/",
    short_mel_dir="/app/_data/npy/short_mels_02_1024_260/",
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
)

In [19]:
model = get_model()

In [20]:
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=100,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=30,
    callbacks=callbacks,
)

Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_f1_score improved from -inf to 0.15651, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 2/100
Epoch 00002: val_f1_score improved from 0.15651 to 0.34262, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 3/100
Epoch 00003: val_f1_score improved from 0.34262 to 0.36368, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 4/100
Epoch 00004: val_f1_score improved from 0.36368 to 0.49132, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 5/100
Epoch 00005: val_f1_score improved from 0.49132 to 0.53057, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 6/100
Epoch 00006: val_f1_score did not improve from 0.53057
Epoch 7/100
Epoch 00007: val_f1_score improved from 0.53057 to 0.57789, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 8/100
Epoch 00008: val_f1_score improved from 0.57789 to 0.60675, saving model to /app/_data/models/Eff22/eff2_2.h5
Epoch 9/10

In [56]:
import kaggle 

/bin/bash: import: command not found


In [57]:
! kaggle datasets init -p /app/_data/models/Eff01

Data package template written to: /app/_data/models/Eff01/dataset-metadata.json


In [60]:
! kaggle datasets create -p /app/_data/models/Eff01

Starting upload for file dict_birds.json
100%|██████████████████████████████████████| 5.86k/5.86k [00:01<00:00, 3.59kB/s]
Upload successful: dict_birds.json (6KB)
Starting upload for file eff0_1.h5
100%|███████████████████████████████████████| 52.7M/52.7M [01:21<00:00, 675kB/s]
Upload successful: eff0_1.h5 (53MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Your private Dataset is being created. Please check progress at https://www.kaggle.com/nataliayurasova/Eff01Initial
