# Intro
Welcome to the [Rainforest Connection Species Audio Detection](https://www.kaggle.com/c/rfcx-species-audio-detection/data) competition. 
![](https://storage.googleapis.com/kaggle-competitions/kaggle/21669/logos/header.png)
We will give you first a short introduction to start with your work. The nex step is to show a short analysis befor definen a model with keras.

These are the features of the train data:
* recording_id: unique identifier for recording
* species_id: unique identifier for species
* songtype_id: unique identifier for songtype
* t_min: start second of annotated signal
* f_min: lower frequency of annotated signal
* t_max: end second of annotated signal
* f_max: upper frequency of annotated signal
* is_tp: [tfrecords only] an indicator of whether the label is from the train_tp (1) or train_fp (0) file.

We recommend [this notebook](https://www.kaggle.com/drcapa/esc-50-eda-pytorch) for handling audio data.

<span style="color: royalblue;">Please vote the notebook up if it helps you. Feel free to leave a comment above the notebook. Thank you. </span>

# Libraries
We load some standard libraries and packages of the keras library.

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import soundfile as sf
import librosa
import librosa.display
import IPython.display as display

from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPool1D, BatchNormalization
from keras.optimizers import RMSprop,Adam
from keras.applications import VGG19, VGG16, ResNet50

import warnings
warnings.filterwarnings("ignore")

# Path

In [None]:
path = '/kaggle/input/rfcx-species-audio-detection/'
os.listdir(path)

# Functions
We define some helper functions for loading and visualization the data.

In [None]:
def read_flac_file(path, file):
    """ Read flac audio file and return numpay array and samplerate"""
    
    data, samplerate = sf.read(path+file)
    return data, samplerate

def plot_audio_file(data, samplerate, t_min, t_max, species):
    """ Plot the cutout for the speciec label """
    
    sr = samplerate
    fig = plt.figure(figsize=(10, 6))
    x = range(len(data))
    y = data
    plt.plot(x, y)
    x = range(int(t_min*sr), int(t_max*sr))
    y = data[int(t_min*sr):int(t_max*sr)]
    plt.plot(x, y, color='red', label = 'species '+str(species))
    plt.legend(loc='upper center')
    plt.grid()
    
def plot_spectrogram(data, samplerate, t_min, t_max):
    """ Plot spectrogram with mel scaling """
    
    sr = samplerate
    data_sub = data[int(t_min*sr):int(t_max*sr)]
    spectrogram = librosa.feature.melspectrogram(data_sub, sr=sr)
    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    librosa.display.specshow(log_spectrogram, sr=sr, x_axis='time', y_axis='mel')
    
def plot_bar_compare(data1, data2, name, rot=False):
    """ Compare the distribution between train_fp and train_tp data """
    
    fig, axs = plt.subplots(1, 2, figsize=(9, 3), sharey=True)
    
    data1_label = data1[name].value_counts().sort_index()
    dict_data1 = dict(zip(data1_label.keys(), ((100*(data1_label)/len(data1.index)).tolist())))
    data1_names = list(dict_data1.keys())
    data1_values = list(dict_data1.values())
    
    data2_label = data2[name].value_counts().sort_index()
    dict_data2 = dict(zip(data2_label.keys(), ((100*(data2_label)/len(data2.index)).tolist())))
    data2_names = list(dict_data2.keys())
    data2_values = list(dict_data2.values())
    
    axs[0].bar(data1_names, data1_values, color='yellowgreen')
    axs[1].bar(data2_names, data2_values, color='sandybrown')
    axs[0].grid()
    axs[1].grid()
    axs[0].set_title('train_fp')
    axs[1].set_title('train_tp')
    axs[0].set_ylabel('%')
    if(rot==True):
        axs[0].set_xticklabels(data1_names, rotation=45)
        axs[1].set_xticklabels(data2_names, rotation=45)
    plt.show()

# Load Data

csv File:

In [None]:
train_fp = pd.read_csv(path+'train_fp.csv')
train_tp = pd.read_csv(path+'train_tp.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')

audio File:

In [None]:
train_audio_files = os.listdir(path+'train')
test_audio_files = os.listdir(path+'test')

Load example audio file:

In [None]:
data, samplerate = read_flac_file(path+'train/', train_audio_files[0])
print('data array:', data)
print('samplerate:', samplerate) 
print('number of data values:', len(data))

# EDA

In [None]:
print('number of false positive:', len(train_fp))
print('number of true positive:', len(train_tp))
print('number of samp_subm rows:', len(samp_subm))
print('number of train audio files:', len(train_audio_files))
print('number of test audio files:', len(test_audio_files))

Distribution of the feature species_id:

In [None]:
plot_bar_compare(train_fp, train_tp, 'species_id', rot=False)

Distribution of the feature songtype_id:

In [None]:
plot_bar_compare(train_fp, train_tp, 'songtype_id', rot=False)

There could be more than one species for one audio file:

In [None]:
train_fp[0:3]

Discribtsion of the features:

In [None]:
train_fp.describe()

# A Sample File

In [None]:
recording_id = '00204008d'
data, samplerate = read_flac_file(path+'train/', recording_id+'.flac')

In [None]:
display.Audio(path+'train/'+recording_id+'.flac')

In [None]:
t_min = train_fp[train_fp['recording_id']==recording_id]['t_min'][0]
t_max = train_fp[train_fp['recording_id']==recording_id]['t_max'][0]
label = train_fp[train_fp['recording_id']==recording_id]['species_id'][0]
plot_audio_file(data, samplerate, t_min, t_max, label)

Plot [spectrogram](https://en.wikipedia.org/wiki/Spectrogram) with mel scaling:

In [None]:
plot_spectrogram(data, samplerate, t_min, t_max)

# Prepare Data For Model
## Train Lables

In [None]:
y_train_index = [file.split('.')[0] for file in train_audio_files]
y_train_columns = ['s'+str(i) for i in range(24)]
y_train = pd.DataFrame(0, index=y_train_index, columns=y_train_columns)

for row in train_fp.index:
    index = train_fp.loc[row, 'recording_id']
    column = 's'+str(train_fp.loc[row, 'species_id'])
    y_train.loc[index, column] = 1

for row in train_tp.index:
    index = train_tp.loc[row, 'recording_id']
    column = 's'+str(train_tp.loc[row, 'species_id'])
    y_train.loc[index, column] = 1

In [None]:
y_train.head()

## Audio Data Generator

In [None]:
class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, labels, batch_size):
        self.path = path
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.list_IDs))
        
    def __len__(self):
        len_ = int(len(self.list_IDs)/self.batch_size)
        if len_*self.batch_size < len(self.list_IDs):
            len_ += 1
        return len_
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        X = X.reshape((self.batch_size, 1000, 2880//2))
        return X, y
    
    def __data_generation(self, list_IDs_temp):
        X = np.zeros((self.batch_size, 2880000//2))
        y = np.zeros((self.batch_size, 24))
        for i, ID in enumerate(list_IDs_temp):
            audio_file, audio_sr = read_flac_file(self.path, ID)
            audio_file_fft = data_fft = np.abs(np.fft.fft(audio_file)[: len(audio_file)//2])
            # scale data
            audio_file_fft = (audio_file_fft-audio_file_fft.mean())/audio_file_fft.std()
            X[i, ] = audio_file_fft
            y[i, ] = self.labels.loc[ID.split('.')[0]]
        return X, y

# Define Model

In [None]:
epochs = 15
lernrate = 2e-3

In [None]:
model = Sequential()
model.add(Conv1D(128, input_shape=(1000, 2880//2,), kernel_size=5, strides=4, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool1D(pool_size=(4)))
model.add(Conv1D(128, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool1D(pool_size=(4)))
model.add(Conv1D(128, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool1D(pool_size=(4)))
model.add(Conv1D(256, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(24, activation='softmax'))

In [None]:
model.compile(optimizer = Adam(lr=lernrate),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

In [None]:
model.summary()

# Train Model

In [None]:
batch_size = 64

In [None]:
train_generator = DataGenerator(path+'train/', train_audio_files, y_train, batch_size)

In [None]:
history = model.fit_generator(generator=train_generator,
                              epochs = epochs,
                              workers=4)

# Predict Test Data

In [None]:
y_test = pd.read_csv(path+'sample_submission.csv', index_col=0)

In [None]:
test_generator = DataGenerator(path+'test/', test_audio_files, y_test, batch_size)

In [None]:
y_pred = model.predict_generator(test_generator, verbose=1)

# Write Output

In [None]:
output = pd.DataFrame(y_pred, columns = samp_subm.columns[1:25])
output.insert(0, 'recording_id', samp_subm['recording_id'])
output.dropna(inplace=True)

# Export Data

In [None]:
output.to_csv('submission.csv', index=False)