In [None]:
import json
import pandas as pd
import numpy as np
import re
import librosa
import cv2
import pathlib
import os, shutil
from PIL import Image
from scipy import signal as sig
from sklearn.preprocessing import LabelEncoder, minmax_scale

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import image_dataset_from_directory as idfd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping

In [None]:

#now the goal is to create model for every bird in the json file.
def librosa_stft(signal):
    
    X = librosa.stft(signal, window=sig.windows.tukey(M=2048, alpha=.9)) #using default values n_fft=2048, hop_length=win_length/4, win_length=n_fft, center=true
    Xdb = librosa.amplitude_to_db(abs(X)).astype('float16')
    time = librosa.times_like(X, sr=32000).astype('float16') # n_fft=2048, hop_length=512, center=True, filename=None
    
    return Xdb, time

#get the index in the array that is sec in time array. For example for time array sec=3 that is 188
def get_mark(time, sec):

    mark = np.where(np.logical_and(time >= sec, time <= sec + .1))[0][0]
    return mark

                
def create_images_test(sec, norm, audio_path):
    
    '''
    finds the loudest part of the audio and creates images for the loudest parts
    - finds segments audio in sec intervals and finds the norm
    - if norm is less than the norm provided it rejects discards that part of the audio
    - the shape of all the images should be the same
    
    returns shape of the an image.
    '''
    
    rgb_img = []
    time_sec = []
    time_mark = 0
    files = []
          
    audio = librosa.load(audio_path, sr=32000)[0]
    duration = librosa.get_duration(y=audio, sr=32000)

    #print(row['test_files'])

    #get the spectrogram
    Xdb, time = librosa_stft(audio)
    #Xdb = minmax_scale(Xdb, feature_range=(0, 255))
        
    if time[-1] <= sec:

        interval = time[1] #second element gives the interval since the first element is 0
        last_time_element = time[-1]
        extended_time_arr = np.arange(last_time_element+interval, sec+.5, interval)

        num_of_rows = Xdb.shape[0]
        len_extended_time = len(extended_time_arr)
        columns_to_append = np.ones([num_of_rows, len_extended_time])*(-100)
        time_arr_new = np.hstack((time, extended_time_arr))
        Xdb_new = np.hstack([Xdb, columns_to_append])

        length = len(time_arr_new)
        intervals = round(length/duration)
        mark = get_mark(time_arr_new, sec)

        for shift in range(mark, length+mark, mark):
            ext_Dmat = np.exp(0.1*Xdb_new[:, (shift-mark):shift])
            norm_ext_Dmat = np.linalg.norm(ext_Dmat)
            time_mark = time_mark+5
            time_sec.append(time_mark)

            if norm_ext_Dmat>norm:
                ext_Dmat = minmax_scale(ext_Dmat, feature_range=(0, 255))
                img = np.dstack((ext_Dmat, ext_Dmat, ext_Dmat))
                rgb_img.append(img)
            else:
                pass    
    else:

        length = len(time)
        mark = get_mark(time, sec)
        intervals = round(length/duration)

        for shift in range(mark, length+mark, mark):
            trun_Dmat = np.exp(0.1*Xdb[:, (shift-mark):shift])
            norm_trun_Dmat = np.linalg.norm(trun_Dmat)
            time_mark = time_mark+5
            time_sec.append(time_mark)

            if norm_trun_Dmat >norm:
                trun_Dmat = minmax_scale(trun_Dmat, feature_range=(0, 255))
                img = np.dstack((trun_Dmat, trun_Dmat, trun_Dmat))
                rgb_img.append(img)
            else:
                pass
                 
    return rgb_img, time_sec

In [None]:
model = tf.keras.models.load_model('../input/d/datasets/rumman18/vgg19-with-class-csv/trained_model')

In [None]:
df = pd.read_csv('../input/d/datasets/rumman18/vgg19-with-class-csv/Class_encoder.csv')
key_list = list(df['Class'])
val_list = list(df['Encoder'])

In [None]:
def get_bird_label(prediction, key_list):
    bird_idx = np.argmax(prediction)
    bird = key_list[bird_idx]
    return bird

submission = []

for dirname, _, filenames in os.walk('/kaggle/input/birdclef-2022/test_soundscapes'):
    for filename in filenames:
        path = os.path.join(dirname, filename)

        birds = json.load(open('/kaggle/input/birdclef-2022/scored_birds.json'))
        imgs, time = create_images_test(5, 60, path)
        
        for index, image in enumerate(imgs):
            
            #make prediction and get the probability 
            if image.shape[1] != 313:
                rows_to_append = image.shape[0]
                columns_to_append = 313-image.shape[1]
                mat = np.zeros([rows_to_append,columns_to_append])
                mat = np.dstack((mat,mat,mat))
                image = np.hstack((image,mat))
            
            
            image = image.astype('float16')
            prediction = model.predict(image.reshape((1,1025,313,3)))
            #prediction returns a list of list. use prediction[0] get the prediction list
            predicted_idxs = [i for i,v in enumerate(prediction[0]) if v > .3]
            pred_birds = []

            for idx in predicted_idxs: 
                pred_bird = key_list[idx]
                pred_birds.append(pred_bird)
                
            birds = json.load(open('/kaggle/input/birdclef-2022/scored_birds.json'))
            
            #get the prediction and check if it exceeds some threshold. if it does then set that bird == True else False
            for pred_bird in pred_birds:
                row = {
                    "row_id": f"{filename.replace('.ogg', '')}_{pred_bird}_{time[index]}",
                    "target": 'True'
                }
                submission.append(row)
                birds.remove(pred_bird)
                
            for bird in birds:
                row = {
                    "row_id": f"{filename.replace('.ogg', '')}_{bird}_{time[index]}",
                    "target": 'False'
                }
                submission.append(row)

In [None]:
df = pd.DataFrame(submission)
df.to_csv("submission.csv", index=False)