In [91]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

In [93]:
# from src.generator import MEL_Generator

In [94]:
SEED = 42
IMG_SIZE = 260
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5  # seconds
FREQ_MIN = 500
FREQ_MAX = 15000
MAX_AUDIO_FILES = 100
WIN_LENGHT = 128
AUDIO_PATH = "/app/_data/train_short_audio/"
BATCH_SIZE = 128

In [95]:
with open("/app/_data/dict_birds.json", "r") as birds:
    dict_birds = json.load(birds)

In [97]:
df = pd.read_csv("/app/_data/random_cut1.csv")
df1 = pd.read_csv("/app/_data/all_audio_initial.csv", index_col=[0])

In [99]:
col = [
    "primary_label",
    "filename",
    "row_id",
    "start_sec",
    "end_sec",
    "label_id",
    "secondary_labels_id",
    "class_weights",
    "duration",
    "rating",
    "year",
    "file_path",
    "frames",
    "sin_month",
    "cos_month",
    "sin_longitude",
    "cos_longitude",
    "latitude",
    "norm_latitude",
]

df1 = df1[df1["file_path"].str.contains("train_soundscapes")].reset_index(drop=True)

df = df[col]
df1 = df1[col]
df[["start_sec", "end_sec", "rating"]] = df[["start_sec", "end_sec", "rating"]].astype(
    "float16"
)
df[["year", "label_id"]] = df[["year", "label_id"]].astype("int16")

df1[["start_sec", "end_sec", "rating"]] = df1[
    ["start_sec", "end_sec", "rating"]
].astype("float16")
df1[["year", "label_id"]] = df1[["year", "label_id"]].astype("int16")

df1["end_sec"] = df1["row_id"].apply(lambda x: int(x.split("_")[-1]))
df1["start_sec"] = df1["end_sec"] - 5

all_audio = pd.concat([df, df1], axis=0, ignore_index=True)

all_audio["norm_latitude"] = (all_audio["latitude"] + 90) / 180

all_audio = all_audio[all_audio["rating"] >= 4]
all_audio_sample = all_audio.sample(
    n=200000, weights="class_weights", random_state=SEED
)

all_audio_sample["row_id"] = all_audio_sample["row_id"].str.replace(".ogg_", "_")

all_audio_sample = all_audio_sample.reset_index(drop=True)

train, valid = train_test_split(all_audio_sample, train_size=0.75, random_state=SEED)
train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)

In [114]:
# s = all_audio["primary_label"].value_counts()
# s.describe()

# fig = go.Figure(
#     data=[go.Bar(y=s.values, x=s.index)],
#     layout=go.Layout(margin=go.layout.Margin(l=0, r=0, b=10, t=50)),
# )

# fig.show()

# s1 = all_audio_sample["primary_label"].value_counts()
# s1.describe()

# all_audio_sample['label_id'].hist(bins=399, figsize=(15, 10))
# fig = go.Figure(
#     data=[go.Bar(y=s.values, x=s.index)],
#     layout=go.Layout(margin=go.layout.Margin(l=0, r=0, b=10, t=50)),
# )

# fig.show()

In [22]:
# shutil.rmtree("/app/_data/npy/")
# os.mkdir("/app/_data/npy/")

In [23]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_f1_score",
        patience=12,
        restore_best_weights=True,
        verbose=1,
        mode="max",
    ),
    keras.callbacks.ModelCheckpoint(
        "/app/_data/models/eff0_1.h5",
        monitor="val_f1_score",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_f1_score",
        factor=0.9,
        patience=8,
        verbose=1,
        mode="max",
        min_delta=1e-4,
        min_lr=0.00000001,
    ),
    keras.callbacks.TensorBoard(log_dir="/app/.tensorboard/eff0_1/", histogram_freq=0),
    keras.callbacks.experimental.BackupAndRestore("/app/_data/models/"),
    keras.callbacks.TerminateOnNaN(),
]

In [24]:
NUM_CLASSES = 398


def get_model():
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model = keras.applications.EfficientNetB0(
        weights="imagenet", include_top=False
    )
    #     base_model.load_weights(
    #         "/app/_data/models/efficientnet-b5_noisy-student_notop.h5",
    #         by_name=True,
    #         skip_mismatch=True,
    #     )
    x = base_model(inputs)
    x = keras.layers.GlobalAveragePooling2D(name="avg_pool")(x)
    x = keras.layers.Flatten(name="flatten")(x)
    outputs = keras.layers.Dense(NUM_CLASSES, activation="sigmoid")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(lr=0.005),
        metrics=[
            "acc",
            keras.metrics.Recall(),
            keras.metrics.Precision(),
            tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="micro"),
        ],
    )
    return model

In [25]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [28]:
gen_train = MEL_Generator(
    df=train,
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    batch_size=BATCH_SIZE,
    n_classes=398,
    signal_lenght=5,
    wave_dir="/app/_data/npy/waves_npy/",
    long_mel_dir="/app/_data/npy/long_mels/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=False,
    convert_to_rgb=True,
    shuffle=True,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    seed=SEED
)
gen_valid = MEL_Generator(
    df=valid,
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    batch_size=BATCH_SIZE,
    #     batch_size = 10,
    n_classes=398,
    signal_lenght=5,
    wave_dir="/app/_data/npy/waves_npy/",
    long_mel_dir="/app/_data/npy/long_mels/",
    short_mel_dir="/app/_data/npy/short_mels/",
    norm_mel=False,
    convert_to_rgb=True,
    shuffle=False,
    return_primary_labels=False,
    return_secondary_labels=False,
    return_concat_labels=True,
    seed=SEED
)

In [29]:
model = get_model()

In [30]:
history = model.fit(
    gen_train,
    validation_data=gen_valid,
    epochs=100,
    steps_per_epoch=train.shape[0] // BATCH_SIZE,
    validation_steps=valid.shape[0] // BATCH_SIZE,
    verbose=1,
    workers=25,
    callbacks=callbacks,
)

Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_f1_score improved from -inf to 0.25807, saving model to /app/_data/models/eff0_1.h5
Epoch 2/100
Epoch 00002: val_f1_score improved from 0.25807 to 0.54432, saving model to /app/_data/models/eff0_1.h5
Epoch 3/100
Epoch 00003: val_f1_score did not improve from 0.54432
Epoch 4/100
Epoch 00004: val_f1_score improved from 0.54432 to 0.63707, saving model to /app/_data/models/eff0_1.h5
Epoch 5/100
Epoch 00005: val_f1_score improved from 0.63707 to 0.66516, saving model to /app/_data/models/eff0_1.h5
Epoch 6/100
Epoch 00006: val_f1_score improved from 0.66516 to 0.71738, saving model to /app/_data/models/eff0_1.h5
Epoch 7/100
Epoch 00007: val_f1_score did not improve from 0.71738
Epoch 8/100
Epoch 00008: val_f1_score did not improve from 0.71738
Epoch 9/100
Epoch 00009: val_f1_score did not improve from 0.71738
Epoch 10/100
Epoch 00010: val_f1_score improved from 0.71738 to 0.75138, saving mod

In [56]:
! import kaggle

/bin/bash: import: command not found


In [57]:
! kaggle datasets init -p /app/_data/models/Eff01

Data package template written to: /app/_data/models/Eff01/dataset-metadata.json


In [60]:
! kaggle datasets create -p /app/_data/models/Eff01

Starting upload for file dict_birds.json
100%|██████████████████████████████████████| 5.86k/5.86k [00:01<00:00, 3.59kB/s]
Upload successful: dict_birds.json (6KB)
Starting upload for file eff0_1.h5
100%|███████████████████████████████████████| 52.7M/52.7M [01:21<00:00, 675kB/s]
Upload successful: eff0_1.h5 (53MB)
Skipping folder: .ipynb_checkpoints; use '--dir-mode' to upload folders
Your private Dataset is being created. Please check progress at https://www.kaggle.com/nataliayurasova/Eff01Initial
