# Imports

In [None]:
import os
import pandas as pd
import numpy as np
import pickle 
import tensorflow as tf 
import librosa

from tensorflow_addons.metrics import F1Score

# Important Params

In [None]:
RANDOM_SEED = 1337
SAMPLE_RATE = 32000
SIGNAL_LENGTH = 5 # seconds
SPEC_SHAPE = (48, 128) # height x width
FMIN = 500
FMAX = 12500


# Test data 

In [None]:
test_dir='../input/birdclef-2021/test_soundscapes'
test=pd.read_csv('../input/birdclef-2021/test.csv')
sample_sub=pd.read_csv('../input/birdclef-2021/sample_submission.csv')

test

In [None]:
sample_sub

In [None]:
#loading presaved pickle
def load_pickle(path):
    with open(path,'rb') as f:
        file=pickle.load(f)
        
    return file


**Labels from model training**

In [None]:
#labels:

LABELS=load_pickle('../input/birdclef2021-model-training/LABELS.pkl')

f1_score=F1Score(num_classes=len(LABELS),average='macro',name='f1_score')

# Loading pretrained models

In [None]:
#loading pretrained models:

model1=tf.keras.models.load_model('../input/birdclef2021-model-training/best_model.h5')
model2=tf.keras.models.load_model('../input/birdclef2021-model-training/best_model2.h5')


**Test audio data**

In [None]:
def list_files(path):
    '''get test sound files'''
    return [os.path.join(path, f) for f in os.listdir(path) if f.rsplit('.', 1)[-1] in ['ogg']]

test_audio=list_files(test_dir)

# test files are hidden,  hence checking on train_soundscapes
if len(test_audio) == 0:
    test_audio = list_files('../input/birdclef-2021/train_soundscapes')
    
print('{} FILES IN TEST SET.'.format(len(test_audio)))

# Prediction

In [None]:
def predict(threshold):
    row_id=[]
    preds=[]
    
    for file_path in test_audio[:2]:
        # Open it with librosa
        sig, rate = librosa.load(file_path, sr=SAMPLE_RATE)

        sig_splits = []
        for i in range(0, len(sig), int(SIGNAL_LENGTH * SAMPLE_RATE)):
            split = sig[i:i + int(SIGNAL_LENGTH * SAMPLE_RATE)]

            # End of signal?
            if len(split) < int(SIGNAL_LENGTH * SAMPLE_RATE):
                break

            sig_splits.append(split)

        seconds= 0
        for chunk in sig_splits:

            # Keep track of the end time of each chunk
            seconds += 5

            # Get the spectrogram
            hop_length = int(SIGNAL_LENGTH * SAMPLE_RATE / (SPEC_SHAPE[1] - 1))
            mel_spec = librosa.feature.melspectrogram(y=chunk, 
                                                      sr=SAMPLE_RATE, 
                                                      n_fft=1024, 
                                                      hop_length=hop_length, 
                                                      n_mels=SPEC_SHAPE[0], 
                                                      fmin=FMIN, 
                                                      fmax=FMAX)

            mel_spec = librosa.power_to_db(mel_spec, ref=np.max) 

            # Normalize to match the value range we used during training.
            # That's something you should always double check!
            mel_spec -= mel_spec.min()
            mel_spec /= mel_spec.max()

            # Add channel axis to 2D array
            mel_spec = np.expand_dims(mel_spec, -1)

            # Add new dimension for batch size
            mel_spec = np.expand_dims(mel_spec, 0)

            # Predict
            p = 0.5*model1.predict(mel_spec)[0] + 0.5* model2.predict(mel_spec)[0]

            # Get highest scoring species
            idx = p.argmax()
            species = LABELS[idx]
            score = p[idx]

            # Prepare submission entry
            row_id.append(file_path.split(os.sep)[-1].rsplit('_', 1)[0] + 
                                  '_' + str(seconds))    

            # Decide if it's a "nocall" or a species by applying a threshold
            if score > threshold:
                preds.append(species)
            else:
                preds.append('nocall')
                
    result=pd.DataFrame({'row_id': row_id, 'birds': preds})

    return result

In [None]:
submission=predict(0.6)

submission.head()

In [None]:
submission.to_csv('submission.csv',index=False)