In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short import MEL_Generator_Short, Mel_Provider
from src.prepare_dataset import make_dict_birds, prepare_dataset, choose_ids, make_intervals

In [2]:
SEED = 42
IMG_SIZE = 260
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 50

In [3]:
al_audio_init = pd.read_csv('/app/_data/all_audio_initial.csv')
train_sound = pd.read_csv("/app/_data/train_soundscapes_prepared.csv")
train_sound = prepare_dataset(train_sound)


In [4]:
_, list_filenames = choose_ids(500)
len(list_filenames)

21416

In [5]:
short_audio_df = al_audio_init[
    al_audio_init["rating"]!=6].query('filename in @list_filenames').reset_index(drop=True)
short_audio_df.shape

(21396, 24)

In [6]:
dict_weights = (1 / short_audio_df["primary_label"].value_counts()).to_dict()
short_audio_df["class_weights"] = short_audio_df["primary_label"].replace(dict_weights)

In [7]:
array_for_cut = short_audio_df[["filename", "duration", 'class_weights']].values

In [8]:
birds_intervals = make_intervals(array_for_cut, 5)
birds_intervals.shape

(691562, 3)

In [9]:
birds_intervals['row_id'] = birds_intervals.index

In [10]:
birds_intervals_sample = birds_intervals.sample(n=250000, weights='class_weights', random_state=SEED)

In [11]:
birds_intervals['end_sec']

XC127032_5_0         5.00
XC127032_7_0         7.00
XC127032_9_0         9.00
XC127032_11_0       11.00
XC127032_13_0       13.00
                    ...  
XC602701_99_0       99.00
XC602701_101_0     101.00
XC602701_101_01    101.01
XC602701_101_83    101.83
XC602701_100_93    100.93
Name: end_sec, Length: 691562, dtype: float64

In [12]:
birds_intervals['end_sec'].describe()

count    691562.000000
mean         61.579991
std         111.912144
min           4.980000
25%          13.112500
50%          30.510000
75%          67.000000
max        2739.620000
Name: end_sec, dtype: float64

In [13]:
short_audio_df = prepare_dataset(short_audio_df)

In [14]:
short_audio_df_concat = birds_intervals_sample.merge(short_audio_df[['filename', 'primary_label', 'secondary_labels', 'label_id',
       'secondary_labels_id', 'duration',
       'rating', 'year', 'file_path', 'frames', 'sin_month',
       'cos_month', 'sin_longitude', 'cos_longitude', 'latitude',
       'norm_latitude', 'date']], on='filename', how = 'left')

In [15]:
short_audio_df_concat['start_sec'] = short_audio_df_concat['end_sec']-5

In [16]:
all_audio = pd.concat([short_audio_df_concat, train_sound, train_sound,train_sound,train_sound,train_sound], axis=0, ignore_index=True)
# all_audio = all_audio[all_audio["rating"] >= 3]
all_audio.shape
all_audio = all_audio.sample(frac=1, random_state=SEED).reset_index(drop=True)

(262000, 21)

In [17]:
all_audio['end_sec'].describe()

count    262000.000000
mean         71.764552
std         131.468079
min           4.980000
25%          13.000000
50%          31.000000
75%          73.000000
max        2738.660000
Name: end_sec, dtype: float64

In [18]:
all_audio['end_sec'].sum()

18802312.53999999

In [19]:
all_audio['duration'].sum()

28163163.8900625

In [20]:
dict_birds, df = make_dict_birds(all_audio)

In [21]:
df.sample(3)

Unnamed: 0,end_sec,class_weights,filename,row_id,primary_label,secondary_labels,label_id,secondary_labels_id,duration,rating,...,file_path,frames,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude,date,start_sec
41698,183.0,0.012987,XC179676.ogg,XC179676_183_0,canwar,mouwar sonspa bnhcow amerob swathr,81,214 311 50 9 328,238.694969,3.5,...,/app/_data/train_short_audio/canwar/XC179676.ogg,7638239.0,0.5,-0.866025,-0.992638,0.121116,40.0168,0.722316,2014-05-24,178.0
169237,20.0,0.015625,XC212929.ogg,XC212929_20_0,killde,y00475,182,376,33.268031,4.5,...,/app/_data/train_short_audio/killde/XC212929.ogg,1064577.0,0.866025,0.5,-0.881874,-0.471486,34.7917,0.693287,2015-02-08,15.0
166372,57.0,0.019608,XC241226.ogg,XC241226_57_0,thswar1,,332,,65.173,5.0,...,/app/_data/train_short_audio/thswar1/XC241226.ogg,2085536.0,0.5,0.866025,-0.978449,0.206488,1.3001,0.507223,2014-01-22,52.0


In [22]:
file_json = open("/app/_data/models/Eff2_3/dict_birds_eff2_3.json", "w")
json.dump(dict_birds, file_json)
file_json.close()

In [23]:
df.loc[218614,'end_sec']=5

In [24]:
df[df['end_sec']<5]

Unnamed: 0,end_sec,class_weights,filename,row_id,primary_label,secondary_labels,label_id,secondary_labels_id,duration,rating,...,file_path,frames,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude,date,start_sec
143696,4.98,0.016129,XC217441.ogg,XC217441_4_98,littin1,,195,,5.970094,3.0,...,/app/_data/train_short_audio/littin1/XC217441.ogg,191043.0,0.866025,0.5,-0.982069,0.188523,0.1431,0.500795,2015-02-20,-0.02


In [25]:
df.to_csv('/app/_data/models/Eff2_3/df.csv')
df=pd.read_csv('/app/_data/models/Eff2_3/df.csv', index_col=[0])


In [26]:
train, valid = train_test_split(df, train_size=0.85, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [27]:
valid[valid['end_sec']>=valid['duration']]['rating'].mean()

6.0

In [28]:
df['end_sec'].isna().sum()

0

In [29]:
df[df['end_sec']>df['duration']]

Unnamed: 0,end_sec,class_weights,filename,row_id,primary_label,secondary_labels,label_id,secondary_labels_id,duration,rating,...,file_path,frames,sin_month,cos_month,sin_longitude,cos_longitude,latitude,norm_latitude,date,start_sec
4,600.0,0.000654,26709_SSW_20170701.ogg,26709_SSW_600,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/26709_SSW_2017070...,160000.0,-0.500000,-8.660254e-01,-0.972166,0.234294,42.47,0.735944,2017-07-01,595.0
78,80.0,0.000654,14473_SSW_20170701.ogg,14473_SSW_80,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/14473_SSW_2017070...,160000.0,-0.500000,-8.660254e-01,-0.972166,0.234294,42.47,0.735944,2017-07-01,75.0
109,390.0,0.000654,50878_COR_20191004.ogg,50878_COR_390,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/50878_COR_2019100...,160000.0,-0.866025,5.000000e-01,-0.995413,0.095672,10.12,0.556222,2019-10-04,385.0
125,365.0,0.003546,28933_SSW_20170408.ogg,28933_SSW_365,cangoo,sonspa,80,311,5.0,6.0,...,/app/_data/train_soundscapes/28933_SSW_2017040...,160000.0,0.866025,-5.000000e-01,-0.972166,0.234294,42.47,0.735944,2017-04-08,360.0
132,475.0,0.000654,14473_SSW_20170701.ogg,14473_SSW_475,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/14473_SSW_2017070...,160000.0,-0.500000,-8.660254e-01,-0.972166,0.234294,42.47,0.735944,2017-07-01,470.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261924,210.0,0.000654,18003_COR_20190904.ogg,18003_COR_210,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/18003_COR_2019090...,160000.0,-1.000000,-1.836970e-16,-0.995413,0.095672,10.12,0.556222,2019-09-04,205.0
261963,180.0,0.000654,14473_SSW_20170701.ogg,14473_SSW_180,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/14473_SSW_2017070...,160000.0,-0.500000,-8.660254e-01,-0.972166,0.234294,42.47,0.735944,2017-07-01,175.0
261973,350.0,0.000654,11254_COR_20190904.ogg,11254_COR_350,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/11254_COR_2019090...,160000.0,-1.000000,-1.836970e-16,-0.995413,0.095672,10.12,0.556222,2019-09-04,345.0
261988,235.0,0.000654,42907_SSW_20170708.ogg,42907_SSW_235,nocall,,217,,5.0,6.0,...,/app/_data/train_soundscapes/42907_SSW_2017070...,160000.0,-0.500000,-8.660254e-01,-0.972166,0.234294,42.47,0.735944,2017-07-08,230.0


In [30]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [31]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/eff2_3/eff2_3.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=5,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/eff2_3/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore("/app/_data/models/eff2_3/backup/"),
    keras.callbacks.TerminateOnNaN(),
]

In [32]:
def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB2(
        weights="imagenet", include_top=False
    )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [33]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [34]:
# for i in [
#     "/app/_data/npy/waves_npy/",
#     "/app/_data/npy/short_mels/",
# ]:
#     shutil.rmtree(i)

In [35]:
BATCH_SIZE = 128

In [36]:
NUM_CLASSES = len(dict_birds)

In [37]:
gen_train = MEL_Generator_Short(
    df=train,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False
)

gen_valid = MEL_Generator_Short(
    df=valid,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    n_classes=NUM_CLASSES,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=True,
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    augment=False
)

In [38]:
# i

In [39]:
# for i in tqdm(range(55500, 155500)):
# #     print(i)
#     a = gen_train._get_one(i)


In [40]:
model = get_model()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb2_notop.h5


In [41]:
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=100,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=30,
    callbacks=callbacks,
)

Epoch 44/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00044: val_f1_score improved from -inf to 0.82669, saving model to /app/_data/models/Eff4_1/eff2_3.h5
Epoch 45/100
Epoch 00045: val_f1_score improved from 0.82669 to 0.83455, saving model to /app/_data/models/Eff4_1/eff2_3.h5
Epoch 46/100
Epoch 00046: val_f1_score improved from 0.83455 to 0.84455, saving model to /app/_data/models/Eff4_1/eff2_3.h5
Epoch 47/100
Epoch 00047: val_f1_score did not improve from 0.84455
Epoch 48/100
Epoch 00048: val_f1_score improved from 0.84455 to 0.84493, saving model to /app/_data/models/Eff4_1/eff2_3.h5
Epoch 49/100
Epoch 00049: val_f1_score did not improve from 0.84493
Epoch 50/100
Epoch 00050: val_f1_score improved from 0.84493 to 0.84963, saving model to /app/_data/models/Eff4_1/eff2_3.h5
Epoch 51/100
Epoch 00051: val_f1_score did not improve from 0.84963
Epoch 52/100
Epoch 00052: val_f1_score did not improve from 0.84963
Epoch 53/100
Epoch 00053: val_f1_score 

In [None]:
import kaggle

In [57]:
! kaggle datasets init -p /app/_data/models/Eff2_3

Data package template written to: /app/_data/models/Eff01/dataset-metadata.json


In [60]:
! kaggle datasets create -p /app/_data/models/Eff2_3

Starting upload for file dict_birds.json
100%|██████████████████████████████████████| 5.86k/5.86k [00:01<00:00, 3.59kB/s]
Upload successful: dict_birds.json (6KB)
Starting upload for file eff0_1.h5
100%|███████████████████████████████████████| 52.7M/52.7M [01:21<00:00, 675kB/s]
Upload successful: eff0_1.h5 (53MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Your private Dataset is being created. Please check progress at https://www.kaggle.com/nataliayurasova/Eff01Initial
