# Prepare Environment

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import math
import cv2
import librosa 
import librosa.display
import IPython.display as ipd 
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K

In [None]:
def merge_history(hlist):
    history = {}
    for k in hlist[0].history.keys():
        history[k] = sum([h.history[k] for h in hlist], [])
    return history

In [None]:
def vis_training(h, start=1):
    epoch_range = range(start, len(h['loss'])+1)
    s = slice(start-1, None)

    plt.figure(figsize=[14,4])

    n = int(len(h.keys()) / 2)

    for i in range(n):
        k = list(h.keys())[i]
        plt.subplot(1,n,i+1)
        plt.plot(epoch_range, h[k][s], label='Training')
        plt.plot(epoch_range, h['val_' + k][s], label='Validation')
        plt.xlabel('Epoch'); plt.ylabel(k); plt.title(k)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    plt.show()

# Load Data

In [None]:
train = pd.read_csv('../input/freesound-audio-tagging/train.csv')
print(train.shape, '\n')
train.head()

# Explore Sample

In [None]:
harmonica = '../input/freesound-audio-tagging/audio_train/86881793.wav'
ipd.Audio(harmonica)

In [None]:
signal, sr = librosa.load(harmonica)
print(type(signal))
print(type(sr))

In [None]:
print(signal.shape)
print(sr)
print(len(signal) / sr)

In [None]:
plt.figure(figsize = [12,3])
plt.subplot(2,1,1)
plt.plot(signal)
plt.subplot(2,1,2)
interval = range(2000, 3000)
plt.plot(interval, signal[interval])
plt.tight_layout()
plt.show()

In [None]:
x1 = librosa.feature.melspectrogram(y=signal, sr=22050)   
x2 = librosa.power_to_db(x1, ref=np.max)   

print(x2.shape)

librosa.display.specshow(x2, sr=22050, x_axis='time', y_axis='hz')
plt.colorbar()
plt.show()

# Label Encoder

In [None]:
labels = np.unique(train.label.values)
label_encoder = {label:i for i, label in enumerate(labels)}
print(label_encoder['Bark'])
print(label_encoder['Knock'])

# Data Generators

In [None]:
SPEC_PATH = '../input/freesound-mel-spectrograms-128-512/spectrograms'
IMG_SIZE = (128,32)

class DataGenerator(keras.utils.Sequence):
    
    def __init__(self, df, batch_size=32, shuffle=True, is_train=True):
        self.df = df
        self.n = len(df)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.is_train = is_train
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indices = np.arange(self.n)
        if self.shuffle == True:
            np.random.shuffle(self.indices)   
    
    def __len__(self):
        
        return math.ceil( self.n / self.batch_size )
    
    def __getitem__(self, batch_index):
        
        start = batch_index * self.batch_size
        end = (batch_index + 1) * self.batch_size
        
        indices = self.indices[start:end]
        
        return self.__data_generation(indices)
    
    def __data_generation(self, batch_indices):
        batch_size = len(batch_indices)
        
        X = np.zeros(shape=(batch_size, IMG_SIZE[0], IMG_SIZE[1], 1))
        y = np.zeros(batch_size)
        
        for i, idx in enumerate(batch_indices):
            FILE = self.df.fname.values[idx]
            LABEL = self.df.label.values[idx]
            
            SET = 'train_spec' if self.is_train else 'test_spec'
            path = f'{SPEC_PATH}/{SET}/{FILE[:-4]}.npy'

            try:
                data_array = np.load(path)
                resized = cv2.resize(data_array, (IMG_SIZE[1], IMG_SIZE[0]))
                X[i,:,:,0] = resized                
            except:
                print('skipped')

            if self.is_train:
                y[i] = label_encoder[LABEL]

        if self.is_train:    
            return X, y
        return X

    
GENERATOR_TEST = True

if GENERATOR_TEST:
    temp_gen = DataGenerator(train, batch_size=8, shuffle=False)
    X,y = temp_gen.__getitem__(0)

    print(X.shape)
    print(y)
    
    librosa.display.specshow(X[0, :, :, 0])

In [None]:
train_df, valid_df = train_test_split(train, test_size=0.2, random_state=1, stratify=train.label)

print(train_df.shape)
print(valid_df.shape)

In [None]:
train_loader = DataGenerator(train_df, batch_size=64, shuffle=True)
valid_loader = DataGenerator(valid_df, batch_size=64, shuffle=False)

# Build CNN

In [None]:
np.random.seed(1)

cnn = Sequential()

cnn.add(Conv2D(16, (3,3), activation = 'relu', padding = 'same', input_shape=(128,32,1)))
cnn.add(Conv2D(16, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(2,2))
cnn.add(Dropout(0.50))
cnn.add(BatchNormalization())

cnn.add(Conv2D(32, (3,3), activation = 'relu', padding = 'same'))
cnn.add(Conv2D(32, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(2,2))
cnn.add(Dropout(0.50))
cnn.add(BatchNormalization())


cnn.add(Flatten())

cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(BatchNormalization())

cnn.add(Dense(64, activation='relu'))
cnn.add(Dropout(0.50))
cnn.add(BatchNormalization())

cnn.add(Dense(41, activation='softmax'))

cnn.summary()

# Train Network

In [None]:
%%time

opt = tf.keras.optimizers.Adam(0.001)
cnn.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

h1 = cnn.fit(train_loader, epochs=10, validation_data=valid_loader, verbose=1)

In [None]:
history = merge_history([h1])
vis_training(history)

# Save Model


In [None]:
cnn.save(f'my_model_v01.h5')

# Test Predictions

In [None]:
test = pd.read_csv('../input/freesound-audio-tagging/sample_submission.csv')

test_loader = DataGenerator(test, batch_size=64, shuffle=False, is_train=False)

probs = cnn.predict(test_loader)
print(probs.shape)

In [None]:
print(probs[0, :].round(2))


## Submit Top 1 Prediction

In [None]:
submission_top1 = test.copy()

N = len(test)
for i in range(N):
    p = probs[i, :]
    idx = np.argmax(p)
    submission_top1.label[i] = labels[idx]

submission_top1.to_csv('submission_top1.csv', index=False, header=True)

submission_top1.head()

## Submit Top 3 Predictions

In [None]:
submission_top3 = test.copy()

N = len(test)
for i in range(N):
    p = probs[i, :]
    idx = np.argsort(-p)[:3]
    top3 = labels[idx]
    submission_top3.label[i] = ' '.join(top3)

submission_top3.to_csv('submission_top3.csv', index=False, header=True)
submission_top3.head()