# Prepare Enviroment 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math

import os
import cv2

import IPython.display as ipd 

import librosa 
import librosa.display

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# Load DataFrame

In [None]:
train_path = '../input/freesound-audio-tagging/audio_train/'

print(len(os.listdir(train_path)))

In [None]:
train = pd.read_csv("../input/freesound-audio-tagging/train.csv")

print('The shape of the training data is: ', train.shape)

In [None]:
train.head()

# Unique Labels

In [None]:
uniq_labels = train.label.unique()
print('There are a total of', len(uniq_labels), 'unique labels.\n')
print(uniq_labels)

# Label Distribution

In [None]:
print((train.manually_verified.value_counts() /len(train)).to_frame().T)

In [None]:
train.manually_verified.value_counts().plot(kind='bar', xlabel='MGMT_value', ylabel='Count', 
                                     color=['#1E90FF', '#00C957'], edgecolor='black');

# Exploring Samples

## Sample 1

In [None]:
gunshot = '../input/freesound-audio-tagging/audio_train/0048fd00.wav'
ipd.Audio(gunshot)

In [None]:
signal, sr = librosa.load(gunshot)
print(type(signal))
print(type(sr))

In [None]:
print(signal.shape)
print(sr)
print(len(signal) / sr)

In [None]:
plt.figure(figsize = [12,3])
plt.subplot(2,1,1)
plt.plot(signal)
plt.subplot(2,1,2)
interval = range(2000, 3000)
plt.plot(interval, signal[interval])
plt.tight_layout()
plt.show()

In [None]:
x1 = librosa.feature.melspectrogram(y=signal, sr=22050)   
x2 = librosa.power_to_db(x1, ref=np.max)   

print(x2.shape)

librosa.display.specshow(x2, sr=22050, x_axis='time', y_axis='hz')
plt.colorbar()
plt.show()

## Sample 2

In [None]:
cello = '../input/freesound-audio-tagging/audio_train/0091fc7f.wav'
ipd.Audio(cello)

In [None]:
signal, sr = librosa.load(cello)
print(type(signal))
print(type(sr))

In [None]:
print(signal.shape)
print(sr)
print(len(signal) / sr)

In [None]:
plt.figure(figsize = [12,3])
plt.subplot(2,1,1)
plt.plot(signal)
plt.subplot(2,1,2)
interval = range(2000, 3000)
plt.plot(interval, signal[interval])
plt.tight_layout()
plt.show()

In [None]:
x1 = librosa.feature.melspectrogram(y=signal, sr=22050)   
x2 = librosa.power_to_db(x1, ref=np.max)   

print(x2.shape)

librosa.display.specshow(x2, sr=22050, x_axis='time', y_axis='hz')
plt.colorbar()
plt.show()

# Label Encoder

In [None]:
labels = np.unique(train.label.values)
label_encoder = {label:i for i, label in enumerate(labels)}
print(label_encoder['Cello'])
print(label_encoder['Gunshot_or_gunfire'])

# Displaying Several Spectrogram Images

In [None]:
sample = train.sample(20)

plt.figure(figsize=[20,9])

for i in range(20):
    fname = train_path + sample.fname.iloc[i]
    clip, sr = librosa.load(fname, sr=44100)
    S1 = librosa.feature.melspectrogram(y=clip, sr=44100) 
    S2 = librosa.power_to_db(S1, ref=np.max)                
    
    plt.subplot(5, 4, i+1)
    librosa.display.specshow(S2)
    plt.title(f'{sample.label.iloc[i]} - {S2.shape[:2]} - {sample.fname.iloc[i]} ', color = "white")

plt.tight_layout()
plt.show()

# Data Generators

In [None]:
SPEC_PATH = '../input/freesound-melpec-128-512-2sec/spectrograms'
IMG_SIZE = (128,32)

class DataGenerator(keras.utils.Sequence):
    
    def __init__(self, df, batch_size=32, shuffle=True, is_train=True):
        self.df = df
        self.n = len(df)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.is_train = is_train
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indices = np.arange(self.n)
        if self.shuffle == True:
            np.random.shuffle(self.indices)   
    
    def __len__(self):
        
        return math.ceil( self.n / self.batch_size )
    
    def __getitem__(self, batch_index):
        
        start = batch_index * self.batch_size
        end = (batch_index + 1) * self.batch_size
        
        indices = self.indices[start:end]
        
        return self.__data_generation(indices)
    
    def __data_generation(self, batch_indices):
        batch_size = len(batch_indices)
        
        X = np.zeros(shape=(batch_size, IMG_SIZE[0], IMG_SIZE[1], 3))
        y = np.zeros(batch_size)
        
        for i, idx in enumerate(batch_indices):
            FILE = self.df.fname.values[idx]
            LABEL = self.df.label.values[idx]
            
            SET = 'train_spec' if self.is_train else 'test_spec'
            path = f'{SPEC_PATH}/{SET}/{FILE[:-4]}.npy'

            try:
                data_array = np.load(path)
                resized = cv2.resize(data_array, (IMG_SIZE[1], IMG_SIZE[0]))
                
                for j in range(3):
                    X[i,:,:,j] = resized 
                
            except:
                print('skipped')

            if self.is_train:
                y[i] = label_encoder[LABEL]

        if self.is_train:    
            return X, y
        return X

    
GENERATOR_TEST = True

if GENERATOR_TEST:
    temp_gen = DataGenerator(train, batch_size=8, shuffle=False)
    X,y = temp_gen.__getitem__(0)

    print(X.shape)
    print(y)
    
    librosa.display.specshow(X[0, :, :, 0])

In [None]:
train_df, valid_df = train_test_split(train, test_size=0.2, random_state=1, stratify=train.label)

print(train_df.shape)
print(valid_df.shape)

In [None]:
train_loader = DataGenerator(train_df, batch_size=64, shuffle=True)
valid_loader = DataGenerator(valid_df, batch_size=64, shuffle=False)

In [None]:
TR_STEPS = len(train_loader)
VA_STEPS = len(valid_loader)

print(TR_STEPS)
print(VA_STEPS)

# Building CNN

In [None]:
VGG16_model = tf.keras.applications.VGG16(input_shape=(128,32,3),include_top=False, weights='imagenet')
VGG16_model.trainable = False

In [None]:
cnn = Sequential([
    VGG16_model,
    
    Flatten(),
    
    Dense(128, activation='relu'),
    Dropout(0.35),
    
    Dense(64, activation='relu'),
    Dropout(0.35),
    BatchNormalization(),
    
    Dense(32, activation='relu'),
    Dropout(0.35),
    BatchNormalization(),
    
    Dense(41, activation='softmax')
])

cnn.summary()

# Train Network

## Training Run 1

In [None]:
opt = tf.keras.optimizers.Adam(0.001)
cnn.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
%%time 

h1 = cnn.fit(train_loader, steps_per_epoch = TR_STEPS, epochs = 25, validation_data = valid_loader, 
             validation_steps = VA_STEPS, verbose = 1)

In [None]:
def merge_history(hlist):
    history = {}
    for k in hlist[0].history.keys():
        history[k] = sum([h.history[k] for h in hlist], [])
    return history

In [None]:
def vis_training(h, start=1):
    epoch_range = range(start, len(h['loss'])+1)
    s = slice(start-1, None)

    plt.figure(figsize=[14,4])

    n = int(len(h.keys()) / 2)

    for i in range(n):
        k = list(h.keys())[i]
        plt.subplot(1,n,i+1)
        plt.plot(epoch_range, h[k][s], label='Training')
        plt.plot(epoch_range, h['val_' + k][s], label='Validation')
        plt.xlabel('Epoch'); plt.ylabel(k); plt.title(k)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
history = merge_history([h1])
vis_training(history)

## Training Run 2

In [None]:
K.set_value(cnn.optimizer.learning_rate, 0.0001)

In [None]:
%%time 

h2 = cnn.fit(train_loader, steps_per_epoch = TR_STEPS, epochs = 25, validation_data = valid_loader, 
             validation_steps = VA_STEPS, verbose = 1)

In [None]:
def vis_training(h, start=1):
    epoch_range = range(start, len(h['loss'])+1)
    s = slice(start-1, None)

    plt.figure(figsize=[14,4])

    n = int(len(h.keys()) / 2)

    for i in range(n):
        k = list(h.keys())[i]
        plt.subplot(1,n,i+1)
        plt.plot(epoch_range, h[k][s], label='Training')
        plt.plot(epoch_range, h['val_' + k][s], label='Validation')
        plt.xlabel('Epoch'); plt.ylabel(k); plt.title(k)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
history = merge_history([h1, h2])
vis_training(history, start=10)

## Training Run 3 (Fine-Tuning)

In [None]:
VGG16_model.trainable = True

In [None]:
opt = tf.keras.optimizers.Adam(0.0001)
cnn.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
%%time 

h3 = cnn.fit(train_loader, steps_per_epoch = TR_STEPS, epochs = 25, validation_data = valid_loader, 
             validation_steps = VA_STEPS, verbose = 1)

In [None]:
history = merge_history([h1, h2, h3])
vis_training(history, start=10)

# Saving Model

In [None]:
cnn.save(f'Freesound_Audio_VGG16_v01.h5')

# Load Test DataFrame

In [None]:
test = pd.read_csv('../input/freesound-audio-tagging/sample_submission.csv')

test_loader = DataGenerator(test, batch_size=64, shuffle=False, is_train=False)

probs = cnn.predict(test_loader)
print(probs.shape)

In [None]:
print(probs[0, :].round(2))

## Submit Top 1 Prediction

In [None]:
submission_top1 = test.copy()

N = len(test)
for i in range(N):
    p = probs[i, :]
    idx = np.argmax(p)
    submission_top1.label[i] = labels[idx]

submission_top1.to_csv('submission_top1.csv', index=False, header=True)

submission_top1.head()

## Submit Top 3 Predictions

In [None]:
submission_top3 = test.copy()

N = len(test)
for i in range(N):
    p = probs[i, :]
    idx = np.argsort(-p)[:3]
    top3 = labels[idx]
    submission_top3.label[i] = ' '.join(top3)

submission_top3.to_csv('submission_top3.csv', index=False, header=True)
submission_top3.head()