# Import Libraries

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import librosa, librosa.display
from sklearn.model_selection import train_test_split
import os
import json

import tensorflow as tf
from tensorflow import keras

# Gather Data

In [2]:
sr = 22050
train_csv = pd.read_csv('drive/My Drive/freesound-audio-tagging/train.csv')
train_csv2 = pd.read_csv('drive/My Drive/freesound-audio-tagging/train_post_competition.csv')

test_csv = pd.read_csv('drive/My Drive/freesound-audio-tagging/test_post_competition.csv')

In [3]:
train_csv2.head(3)

Unnamed: 0,fname,label,manually_verified,freesound_id,license
0,00044347.wav,Hi-hat,0,28739,Attribution
1,001ca53d.wav,Saxophone,1,358827,Attribution
2,002d256b.wav,Trumpet,0,10897,Creative Commons 0


In [4]:
labels = train_csv2['label'].to_numpy()
unique_labels = np.unique(labels)
print(unique_labels.shape)
boolean_labels = [label == unique_labels for label in labels]
print(len(boolean_labels))
print(boolean_labels[0].astype(int))
print(type(boolean_labels))


(41,)
9473
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0]
<class 'list'>


# Load Audio files and preprocess

In [27]:
def save_mfcc(dataset, n_mfcc=13, n_fft=2048, hop_length=512, train=True):
  #selects training or test data set
  if train == False:
    os.chdir('/content/drive/My Drive/freesound-audio-tagging/audio_test')
  else:
    os.chdir('/content/drive/My Drive/freesound-audio-tagging/audio_train')

  temp_mfcc = []
  i=0
  for fname in dataset['fname']:
    signal, sr = librosa.load(fname)
    mfcc = librosa.feature.mfcc(signal, sr=sr,
                                n_mfcc=13, 
                                n_fft=2048, 
                                hop_length=512)
    mfcc = mfcc.T.tolist()
    temp_mfcc.append(mfcc)
    i += 1
    # if i > 10:
    #   break
    if i % 500 == 0:
      print(i)


  js = {'mfcc': temp_mfcc}
  if train==True:
    os.chdir('/content/drive/My Drive/freesound-audio-tagging')
    with open('saved_mfcc1.json', 'w') as json_file:
      json.dump(js, json_file)
  else:
    os.chdir('/content/drive/My Drive/freesound-audio-tagging')
    with open('test_mfcc.json', 'w') as json_file:
      json.dump(js, json_file)

In [6]:
# # ET. 3.5 hours. Saved into updated.csv 
# save_mfcc(train_csv2)

# Load Data and Prepare data

In [7]:
# # Load Completed Mfcc Data
# os.chdir('/content/drive/My Drive/freesound-audio-tagging')
# with open('saved_mfcc.json', 'r') as fp:
#     updated_data = json.load(fp)

In [8]:
# Find the longest instance for padding
def longest(l):
  '''
  l = list of matrices
  '''
    if not isinstance(l, list):
        return 0
    return max([len(subl) for subl in l if isinstance(subl, list)] 
            + [longest(subl) for subl in l])

In [9]:
# max_len = longest(updated_data['mfcc'])
# print(max_len) # Output: 1292

In [10]:
# Prepare data by padding it
def pad_data(data, longest):
  '''
  data = mfcc data
  longest = longest matrix in the mfcc data 
  '''
  new_data=[]
  for mfcc in data['mfcc']:
    leng = len(mfcc)
    s = mfcc
    if leng < longest:
      s += [[0.]*13 for i in range(longest - leng)]
      # mfcc.append(s)
    new_data.append(s)
  new_js = {'mfcc': new_data}

  os.chdir('/content/drive/My Drive/freesound-audio-tagging')
  with open('mfcc_padded.json', 'w') as json_file:
    json.dump(new_js, json_file)
  return new_js

In [11]:
# # Pad Data Func Call. est 1 min
# new_js = pad_data(updated_data, max_len)

# Load Padded data
os.chdir('/content/drive/My Drive/freesound-audio-tagging')
with open('mfcc_padded.json', 'r') as fp:
    new_js = json.load(fp)

In [12]:
# check to make sure all data is of the same size
def size_check(new_data):
  temp = []
  for i in range(0, len(new_data['mfcc'])):
    temp.append(len(new_data['mfcc'][i]))
  return set(temp)

# # Run the Check
# size_check(new_js)  # output 1292

In [13]:
# Save test data into X and Y for Classification
X = np.array(new_js['mfcc'])
y = np.array(boolean_labels)

In [14]:
# Input train test split, Using as testing data prior to utilizing the test folder
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Create and Fit model

In [21]:
# relu- Rectified Linier Unit
model = keras.Sequential([
        # input layer Flatten the Input size
        keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),

        # 3 dense layers
        keras.layers.Dense(1024, activation='relu'),
        keras.layers.Dense(512, activation='relu'),
        keras.layers.Dense(256, activation='relu'),
        
        # output layer. Softmax normalizes
        keras.layers.Dense(41, activation='softmax')
])

In [22]:
# compile model. Adam effective for deep learning.
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 16796)             0         
_________________________________________________________________
dense_9 (Dense)              (None, 1024)              17200128  
_________________________________________________________________
dense_10 (Dense)             (None, 512)               524800    
_________________________________________________________________
dense_11 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_12 (Dense)             (None, 41)                10537     
Total params: 17,866,793
Trainable params: 17,866,793
Non-trainable params: 0
_________________________________________________________________


In [23]:
#Fit the model
#model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=50)
# model.fit(X, y, batch_size=32, epochs=50) # 50th Epoch - loss: 0.6385 - accuracy: 0.8322

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f5bf0520b38>

Accuracy is okay, loss is high.
Will test model against test data.

In [24]:
# # Save model
# model.save('/content/drive/My Drive/freesound-audio-tagging/trained_model_1.h5')

In [25]:
# Load Model
new_model = keras.models.load_model('/content/drive/My Drive/freesound-audio-tagging/trained_model_1.h5')

# Evaluating Model against Test Data (Unfinished)

In [29]:
test_labels = test_csv['label'].to_numpy()
unique_test_labels = np.unique(test_labels)
boolean_test_labels = [label == unique_labels for label in test_labels]

In [31]:
# save_mfcc(test_csv, train=False)