In [1]:
import os
import shutil
import warnings

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torchaudio

warnings.filterwarnings(action="ignore")
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import json

import geopandas
import librosa
import librosa.display
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
import soundfile
import tensorflow as tf
import tensorflow_addons as tfa
import torch

%matplotlib inline
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from torchaudio.transforms import MelSpectrogram
from tqdm import tqdm

from src.gen_short_nocall import Mel_Provider
from src.prepare_dataset import (
    choose_ids,
    make_dict_birds,
    make_intervals,
    make_intervals_upsampling,
    prepare_dataset,
)

In [2]:
SEED = 42
IMG_SIZE = 224
SAMPLE_RATE = 32000
N_FFT = 2048
SIGNAL_LENGTH = 5
FREQ_MIN = 0
FREQ_MAX = 16000
WIN_LENGHT = 1024
BATCH_SIZE = 410
list_drop = [
    "XC509721.ogg",
    "XC428067.ogg",
    "XC523831.ogg",
    "XC523960.ogg",
    "XC237870.ogg",
    "XC129924.ogg",
    "XC576851.ogg",
    "XC579430.ogg",
    "XC590621.ogg",
]

In [3]:
# def my_floor(a, precision=2):
#     dec = a - np.floor(a)
#     dec = dec * 10 ** precision
#     dec = np.floor(dec) / 10 ** precision
#     b = np.floor(a) + dec
#     return b

# def make_intervals(array, sig_lenght=5):
#     dict_intervals = {}

#     for row in array:
#         filename = row[0]
#         duration = row[1]
#         file_path = row[2]
#         if duration <= 10:
#             step = 0.5
#         else:
#             step = 1
#         for i in np.arange(sig_lenght, duration + 0.1, step):
#             start = i - sig_lenght
#             end = i
#             if end <= duration:
#                 row_id = filename[:-4] + "_" + "_".join(str(end).split("."))
#                 dict_intervals[row_id] = [end, filename]
#             dict_intervals[row_id] = [filename, start, end, file_path]
#     birds_intervals = pd.DataFrame(dict_intervals).T
#     birds_intervals.columns = ["filename", "start_sec", "end_sec", "file_path"]
#     birds_intervals["row_id"] = birds_intervals.index
#     return birds_intervals

# all_audio = pd.read_csv("/app/_data/all_audio_initial.csv")
# all_audio = all_audio.query("filename not in @list_drop")

# short_audio_df = all_audio[
#     ~all_audio["file_path"].str.contains("train_soundscapes")
# ].reset_index(drop=True)

# array_for_cut = short_audio_df[["filename", "duration", "file_path"]].values

# birds_intervals = make_intervals(array_for_cut, 5)

# birds_intervals.head()

In [4]:
# birds_intervals.to_csv('/app/_data/labels_nocall/birds_intervals_to_predict.csv')

In [5]:
birds_intervals = pd.read_csv(
    "/app/_data/labels_nocall/birds_intervals_to_predict.csv", index_col=[0]
).reset_index(drop=True)

In [6]:
class MEL_Generator_Short(keras.utils.Sequence):
    def __init__(
        self,
        df,
        n_mels,
        sample_rate,
        mel_image_size,
        signal_lenght,
        seed,
        mel_provider=Mel_Provider,
        norm_mel=True,
        wave_dir=None,
        long_mel_dir=None,
        batch_size=32,
        augment=None,
    ):
        self.mel_provider = mel_provider
        self.df = df.reset_index(drop=True)
        self.n_mels = n_mels
        self.sample_rate = sample_rate
        self.mel_image_size = mel_image_size
        self.signal_lenght = signal_lenght
        self.wave_dir = wave_dir
        self.norm_mel = norm_mel
        self.batch_size = batch_size
        self.seed = seed
        self.file_path = self.df['file_path'].values
        self.row_id = self.df['row_id'].values
        self.end_sec = self.df['end_sec'].values
        self.filename = self.df['filename'].values
    def __len__(self):
        return self.df.shape[0] // self.batch_size

    def get_audio(
        self,
        file_path,
        end_sec=None,
    ):
        wave_dir = self.wave_dir
        file_name = file_path.split("/")[-1][:-4]
        signal_lenght = self.signal_lenght
        if wave_dir is not None:
            if os.path.isfile(wave_dir + file_name + ".npy"):
                try:
                    wave = np.load(wave_dir + file_name + ".npy")
                except:
                    wave, sr = librosa.load(file_path, sr=self.sample_rate)
            else:
                wave, sr = librosa.load(file_path, sr=self.sample_rate)
        else:
            wave, sr = librosa.load(file_path, sr=self.sample_rate)
        if wave_dir is not None:
            if not os.path.isfile(wave_dir + file_name + ".npy"):
                if not os.path.exists(wave_dir):
                    os.mkdir(wave_dir)
                np.save(wave_dir + file_name, wave)
        if end_sec is not None:
            if end_sec < signal_lenght:
                end_sec = signal_lenght
            end = int(end_sec * self.sample_rate)
            end = end if end < len(wave) else len(wave) - 100
            start = int(end - (signal_lenght * self.sample_rate))
            if start < 0:
                start = 0
                end = signal_lenght * self.sample_rate
            wave = wave[start:end]
        return wave

    def __getitem__(self, batch_ix):
        b_X = np.zeros(
            (self.batch_size, self.mel_image_size, self.mel_image_size, 3),
            dtype=np.uint8,
        )

        b_Y = np.zeros(
            (self.batch_size, 1),
        )

        for i in range(self.batch_size):
            b_X[i]= self._get_one(
                i + self.batch_size * batch_ix,
            )

        return b_X, b_Y

    def _get_one(self, ix):
        file_path = self.file_path[ix]
        end_sec = self.end_sec[ix]
        filename = self.filename[ix]
        row_id = self.row_id[ix]

        wave = self.get_audio(file_path, end_sec)
        mel_spec = self.mel_provider.msg(wave)

        if mel_spec.shape != (self.mel_image_size, self.mel_image_size):
            mel_spec = Image.fromarray(mel_spec)
            mel_spec = mel_spec.resize(
                (self.mel_image_size, self.mel_image_size),
                Image.BICUBIC,
            )
            mel_spec = np.array(mel_spec)
        mel_spec = np.round(mel_spec * 255)
        mel_spec = np.repeat(np.expand_dims(mel_spec.astype(np.uint8), 2), 3, 2)
#         mel_spec = np.expand_dims(mel_spec, axis=0)

        return mel_spec
# {
#             "img": mel_spec,
#             "row_id": row_id,
#             "file_name": filename,
#             "end_sec": end_sec,
#         }

In [7]:
df1 = birds_intervals.loc[:1500000]
df2 = birds_intervals.loc[1500000:]

In [8]:
mel_pr = Mel_Provider(
    n_fft=N_FFT,
    win_length=WIN_LENGHT,
    n_mels=IMG_SIZE,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    min_frequency=FREQ_MIN,
    max_frequency=FREQ_MAX,
    signal_lenght=SIGNAL_LENGTH,
)

In [9]:
all_gen = MEL_Generator_Short(
    df=df1,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    norm_mel=True,
)

In [10]:
all_gen.filename[150:160]
all_gen.row_id[150:160]

array(['XC129981.ogg', 'XC129981.ogg', 'XC129981.ogg', 'XC129981.ogg',
       'XC129981.ogg', 'XC129981.ogg', 'XC129981.ogg', 'XC129981.ogg',
       'XC129981.ogg', 'XC129981.ogg'], dtype=object)

array(['XC129981_31_0', 'XC129981_32_0', 'XC129981_33_0', 'XC129981_34_0',
       'XC129981_35_0', 'XC129981_36_0', 'XC129981_37_0', 'XC129981_38_0',
       'XC129981_39_0', 'XC129981_40_0'], dtype=object)

In [11]:
model = keras.models.load_model("/app/_data/models/nocall/eff0_nocall_1.h5"'')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6


In [12]:
pred = model.predict(all_gen, max_queue_size=50,verbose=1,
    workers=30)



In [19]:
pred_df = pd.DataFrame(pred)

pred_df['row_id'] = all_gen.row_id[:len(pred_df)]

pred_df['filename'] = all_gen.filename[:len(pred_df)]

pred_df['end_sec'] = all_gen.end_sec[:len(pred_df)]

In [29]:
l = list(all_gen.row_id[len(pred_df):])
df11 = df1.query('row_id in @l')

In [32]:
all_gen11 = MEL_Generator_Short(
    df=df11,
    n_mels=IMG_SIZE,
    seed=SEED,
    sample_rate=SAMPLE_RATE,
    mel_image_size=IMG_SIZE,
    batch_size=len(df11),
    signal_lenght=SIGNAL_LENGTH,
    mel_provider=mel_pr,
    wave_dir="/app/_data/npy/waves_npy/",
    norm_mel=True,)

In [33]:
pred11 = model.predict(all_gen11, max_queue_size=50,verbose=1,
    workers=30)



In [34]:
pred_df11 = pd.DataFrame(pred11)

pred_df11['row_id'] = all_gen11.row_id

pred_df11['filename'] = all_gen11.filename

pred_df11['end_sec'] = all_gen11.end_sec

In [35]:
final_pred= pd.concat([pred_df, pred_df11], axis=0, ignore_index=True)

In [36]:
final_pred

Unnamed: 0,0,1,row_id,filename,end_sec
0,1.000000,0.000106,XC109605_5_0,XC109605.ogg,5.0
1,0.998047,0.001275,XC109605_6_0,XC109605.ogg,6.0
2,0.941406,0.056549,XC109605_7_0,XC109605.ogg,7.0
3,1.000000,0.000000,XC109605_8_0,XC109605.ogg,8.0
4,1.000000,0.000000,XC109605_9_0,XC109605.ogg,9.0
...,...,...,...,...,...
1499996,0.000000,1.000000,XC187620_52_0,XC187620.ogg,52.0
1499997,0.000000,1.000000,XC187620_53_0,XC187620.ogg,53.0
1499998,0.000000,1.000000,XC187620_54_0,XC187620.ogg,54.0
1499999,0.000000,1.000000,XC187620_55_0,XC187620.ogg,55.0


In [37]:
# final_pred.to_csv('/app/_data/labels_nocall/predictions_1.csv')

In [40]:
final_pred2=pd.read_csv('/app/_data/labels_nocall/predictions_2.csv', index_col=[0])

In [61]:
final_pred2.columns=['nocall', 'bird', 'row_id', 'filename','end_sec']
final_pred.columns=['nocall', 'bird', 'row_id', 'filename','end_sec']

In [87]:
final= pd.concat([final_pred, final_pred2], axis=0, ignore_index=True)

In [88]:
final['nocall'] = final['nocall'].apply(lambda x: 1 if x>0.5 else 0)
final['bird'] = final['bird'].apply(lambda x: 1 if x>0.5 else 0)

In [89]:
final['nocall'].value_counts()

0    2561052
1     708707
Name: nocall, dtype: int64

In [90]:
final['bird'].value_counts()

1    2560597
0     709162
Name: bird, dtype: int64

In [91]:
final[(final['bird']==1)&(final['nocall']==1)]

Unnamed: 0,nocall,bird,row_id,filename,end_sec
4043,1,1,XC326699_117_0,XC326699.ogg,117.0
4091,1,1,XC328245_11_0,XC328245.ogg,11.0
7064,1,1,XC129244_35_0,XC129244.ogg,35.0
16316,1,1,XC616751_1097_0,XC616751.ogg,1097.0
19117,1,1,XC16966_10_0,XC16966.ogg,10.0
...,...,...,...,...,...
3263837,1,1,XC287302_356_0,XC287302.ogg,356.0
3264490,1,1,XC436170_39_0,XC436170.ogg,39.0
3267441,1,1,XC357422_15_0,XC357422.ogg,15.0
3267460,1,1,XC357422_34_0,XC357422.ogg,34.0


In [92]:
final.to_csv('/app/_data/labels_nocall/nocall_predictions.csv', index=False)

In [86]:
final

Unnamed: 0,nocall,bird,row_id,filename,end_sec
0,1,0,XC109605_5_0,XC109605.ogg,5.0
1,1,0,XC109605_6_0,XC109605.ogg,6.0
2,1,0,XC109605_7_0,XC109605.ogg,7.0
3,1,0,XC109605_8_0,XC109605.ogg,8.0
4,1,0,XC109605_9_0,XC109605.ogg,9.0
...,...,...,...,...,...
3269754,0,1,XC615888_73_0,XC615888.ogg,73.0
3269755,0,1,XC615888_74_0,XC615888.ogg,74.0
3269756,0,1,XC615888_75_0,XC615888.ogg,75.0
3269757,0,1,XC615888_76_0,XC615888.ogg,76.0
