In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short_nocall import Mel_Provider
from src.prepare_dataset import (
    choose_ids,
    make_dict_birds,
    make_intervals,
    make_intervals_upsampling,
    prepare_dataset,
)

In [2]:
SEED = 42
IMG_SIZE = 224
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 410
list_drop = [
    "XC509721.ogg",
    "XC428067.ogg",
    "XC523831.ogg",
    "XC523960.ogg",
    "XC237870.ogg",
    "XC129924.ogg",
    "XC576851.ogg",
    "XC579430.ogg",
    "XC590621.ogg",
]

In [3]:
# def my_floor(a, precision=2):
#     dec = a - np.floor(a)
#     dec = dec * 10 ** precision
#     dec = np.floor(dec) / 10 ** precision
#     b = np.floor(a) + dec
#     return b

# def make_intervals(array, sig_lenght=5):
#     dict_intervals = {}

#     for row in array:
#         filename = row[0]
#         duration = row[1]
#         file_path = row[2]
#         if duration <= 10:
#             step = 0.5
#         else:
#             step = 1
#         for i in np.arange(sig_lenght, duration + 0.1, step):
#             start = i - sig_lenght
#             end = i
#             if end <= duration:
#                 row_id = filename[:-4] + "_" + "_".join(str(end).split("."))
#                 dict_intervals[row_id] = [end, filename]
#             dict_intervals[row_id] = [filename, start, end, file_path]
#     birds_intervals = pd.DataFrame(dict_intervals).T
#     birds_intervals.columns = ["filename", "start_sec", "end_sec", "file_path"]
#     birds_intervals["row_id"] = birds_intervals.index
#     return birds_intervals

# all_audio = pd.read_csv("/app/_data/all_audio_initial.csv")
# all_audio = all_audio.query("filename not in @list_drop")

# short_audio_df = all_audio[
#     ~all_audio["file_path"].str.contains("train_soundscapes")
# ].reset_index(drop=True)

# array_for_cut = short_audio_df[["filename", "duration", "file_path"]].values

# birds_intervals = make_intervals(array_for_cut, 5)

# birds_intervals.head()

In [4]:
# birds_intervals.to_csv('/app/_data/labels_nocall/birds_intervals_to_predict.csv')

In [5]:
birds_intervals = pd.read_csv(
    "/app/_data/labels_nocall/birds_intervals_to_predict.csv", index_col=[0]
).reset_index(drop=True)

In [6]:
class MEL_Generator_Short(keras.utils.Sequence):
    def __init__(
        self,
        df,
        n_mels,
        sample_rate,
        mel_image_size,
        signal_lenght,
        seed,
        mel_provider=Mel_Provider,
        norm_mel=True,
        wave_dir=None,
        long_mel_dir=None,
        batch_size=32,
        augment=None,
    ):
        self.mel_provider = mel_provider
        self.df = df.reset_index(drop=True)
        self.n_mels = n_mels
        self.sample_rate = sample_rate
        self.mel_image_size = mel_image_size
        self.signal_lenght = signal_lenght
        self.wave_dir = wave_dir
        self.norm_mel = norm_mel
        self.batch_size = batch_size
        self.seed = seed
        self.file_path = self.df['file_path'].values
        self.row_id = self.df['row_id'].values
        self.end_sec = self.df['end_sec'].values
        self.filename = self.df['filename'].values
    def __len__(self):
        return self.df.shape[0] // self.batch_size

    def get_audio(
        self,
        file_path,
        end_sec=None,
    ):
        wave_dir = self.wave_dir
        file_name = file_path.split("/")[-1][:-4]
        signal_lenght = self.signal_lenght
        if wave_dir is not None:
            if os.path.isfile(wave_dir + file_name + ".npy"):
                try:
                    wave = np.load(wave_dir + file_name + ".npy")
                except:
                    wave, sr = librosa.load(file_path, sr=self.sample_rate)
            else:
                wave, sr = librosa.load(file_path, sr=self.sample_rate)
        else:
            wave, sr = librosa.load(file_path, sr=self.sample_rate)
        if wave_dir is not None:
            if not os.path.isfile(wave_dir + file_name + ".npy"):
                if not os.path.exists(wave_dir):
                    os.mkdir(wave_dir)
                np.save(wave_dir + file_name, wave)
        if end_sec is not None:
            if end_sec < signal_lenght:
                end_sec = signal_lenght
            end = int(end_sec * self.sample_rate)
            end = end if end < len(wave) else len(wave) - 100
            start = int(end - (signal_lenght * self.sample_rate))
            if start < 0:
                start = 0
                end = signal_lenght * self.sample_rate
            wave = wave[start:end]
        return wave

    def __getitem__(self, batch_ix):
        b_X = np.zeros(
            (self.batch_size, self.mel_image_size, self.mel_image_size, 3),
            dtype=np.uint8,
        )

        b_Y = np.zeros(
            (self.batch_size, 1),
        )

        for i in range(self.batch_size):
            b_X[i]= self._get_one(
                i + self.batch_size * batch_ix,
            )

        return b_X, b_Y

    def _get_one(self, ix):
        file_path = self.file_path[ix]
        end_sec = self.end_sec[ix]
        filename = self.filename[ix]
        row_id = self.row_id[ix]

        wave = self.get_audio(file_path, end_sec)
        mel_spec = self.mel_provider.msg(wave)

        if mel_spec.shape != (self.mel_image_size, self.mel_image_size):
            mel_spec = Image.fromarray(mel_spec)
            mel_spec = mel_spec.resize(
                (self.mel_image_size, self.mel_image_size),
                Image.BICUBIC,
            )
            mel_spec = np.array(mel_spec)
        mel_spec = np.round(mel_spec * 255)
        mel_spec = np.repeat(np.expand_dims(mel_spec.astype(np.uint8), 2), 3, 2)
#         mel_spec = np.expand_dims(mel_spec, axis=0)

        return mel_spec

In [7]:
birds_intervals.shape

(3269758, 5)

In [8]:
df1 = birds_intervals.loc[:1500000]
df2 = birds_intervals.loc[1500000:]

In [9]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [10]:
model = keras.models.load_model("/app/_data/models/nocall/eff0_nocall_1.h5"'')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [11]:
all_gen = MEL_Generator_Short(
    df=df2,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    norm_mel=True,
)

In [12]:
dict_birds = {"nocall": 0, "call": 1}

In [13]:
pred2 = model.predict(all_gen, max_queue_size=50,verbose=1,
    workers=30)



In [17]:
pred_df2 = pd.DataFrame(pred2)

pred_df2['row_id'] = all_gen.row_id[:len(pred_df2)]

pred_df2['filename'] = all_gen.filename[:len(pred_df2)]

pred_df2['end_sec'] = all_gen.end_sec[:len(pred_df2)]

In [18]:
l = list(all_gen.row_id[len(pred_df2):])
df22 = df2.query('row_id in @l')

In [19]:
all_gen22 = MEL_Generator_Short(
    df=df22,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=len(df22),
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    norm_mel=True,)

In [20]:
pred22 = model.predict(all_gen22, max_queue_size=50,verbose=1,
    workers=30)



In [21]:
pred_df22 = pd.DataFrame(pred22)

pred_df22['row_id'] = all_gen22.row_id

pred_df22['filename'] = all_gen22.filename

pred_df22['end_sec'] = all_gen22.end_sec

In [22]:
final_pred2= pd.concat([pred_df2, pred_df22], axis=0, ignore_index=True)

In [None]:
pred2 = pd.DataFrame(dict_pred).T

In [23]:
final_pred2.to_csv('/app/_data/labels_nocall/predictions_2.csv')