In [13]:
import datetime
import time
import warnings
import os

import numpy as np
import random
import pandas as pd
from collections import Counter
    
from glob import glob

from scipy.io import wavfile
from scipy.signal import stft

from skimage import io, transform
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import keras
from keras.utils import to_categorical, plot_model
from keras.models import load_model
from keras import optimizers
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping

from simple_models_0911 import deep, deep_cnn, light_cnn, elvinouyang


warnings.filterwarnings("ignore")
map_file = '../../01_preprocess/train_map.csv'
map_df=pd.read_csv(map_file, index_col=0)

print(">>> Loading the data mapping files...")
map_df = pd.read_csv(map_file, index_col=0)
targets_to_keep = ['yes', 'no', 'up', 'down', 'left',
                   'right', 'on', 'off', 'stop', 'go', 'silence']
map_df['target'] = map_df['label']
map_df['target'] = map_df['target'].apply(
    lambda x: x if x in targets_to_keep else 'unknown')
label_to_ix = {
    'unknown': 11,
    'silence': 1,
    'down': 2,
    'go': 3,
    'left': 4,
    'no': 5,
    'off': 6,
    'on': 7,
    'right': 8,
    'stop': 9,
    'up': 10,
    'yes': 0}
map_df['label'] = map_df['target'].apply(lambda x: label_to_ix[x])

random_state=2018
test_size=0.3
val_size=0.2

INPUT_SHAPE = (99,161,3)
BATCH = 32
EPOCHS = 15

NUM_CLASSES=len(label_to_ix)

>>> Loading the data mapping files...


In [2]:
map_df.head()

Unnamed: 0,path,label,pict_path,target
0,../train/audio/bed/c245d3d7_nohash_0.wav,11,../train/picts/log_spec/bed/c245d3d7_nohash_0.png,unknown
1,../train/audio/bed/a1a59b72_nohash_0.wav,11,../train/picts/log_spec/bed/a1a59b72_nohash_0.png,unknown
2,../train/audio/bed/89f3ab7d_nohash_1.wav,11,../train/picts/log_spec/bed/89f3ab7d_nohash_1.png,unknown
3,../train/audio/bed/35c8fa78_nohash_1.wav,11,../train/picts/log_spec/bed/35c8fa78_nohash_1.png,unknown
4,../train/audio/bed/1706c35f_nohash_0.wav,11,../train/picts/log_spec/bed/1706c35f_nohash_0.png,unknown


In [3]:
x_train = []
y_train= []
y_train_copy=[]
for i in range(map_df.shape[0]):
    pict_path='../../'+map_df.iloc[i,2]
    x_train.append(io.imread(pict_path)[:,:,:3])
    y_train.append(map_df.iloc[i,1])   
x_train = np.array(x_train)
y_train_copy=y_train
y_train=to_categorical(np.array(y_train), num_classes = 12)

In [5]:
classes = ['yes', 'no', 
           'up', 'down', 
           'left', 'right', 
           'on', 'off', 
           'stop', 'go', 
           'silence', 'unknown']

all_classes = [x for x in classes[:11]]
for ind, cl in enumerate(os.listdir('../../../train/audio/')):
    if cl not in classes:
        all_classes.append(cl)
print(all_classes)

['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'silence', 'bed', 'one', 'sheila', 'two', 'wow', 'marvin', 'four', 'house', 'nine', 'bird', 'eight', 'three', 'dog', '_background_noise_', 'seven', 'cat', 'happy', 'six', 'five', 'tree', 'zero']


In [6]:
def get_class_weights(y):
    counter = Counter(y)
    majority = max(counter.values())
    return  {cls: float(majority/count) for cls, count in counter.items()}  

class_weights = get_class_weights(y_train_copy)

In [8]:
x_train,x_val,y_train,y_val=train_test_split(x_train, y_train, test_size=0.1, random_state=2017)

In [22]:
def training_model(model_name="deep_cnn", optimizer="Adam", imbalanced=True, EPOCHS=10, learning_rate=0.005,
                   momentum=0.95,BATCH=16):
    """
     Keep adding new models first in the python script and then add them below
    """ 
    
    if model_name=="deep_cnn":
        model = deep_cnn(INPUT_SHAPE, NUM_CLASSES)
    elif model_name=="light_cnn":
        model = light_cnn(INPUT_SHAPE, NUM_CLASSES)
    elif model_name== "elvinouyang":
        model = elvinouyang(INPUT_SHAPE, NUM_CLASSES)
        
    if optimizer== 'Adam':    
        model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    elif optimizer=='SGD':
        sgd = optimizers.SGD(lr=0.005, momentum=0.95)
        model.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy'])
    callbacks = [EarlyStopping(monitor='val_acc', patience=4, verbose=1, mode='max')]
    if imbalanced==False:
        history = model.fit(x_train, 
                   y_train, 
                   batch_size = BATCH, 
                   epochs = 20, 
                   verbose = 1, shuffle = True, 
                   validation_data = (x_val, y_val) 
                   )
    else:
        history = model.fit(x_train, 
                   y_train, 
                   batch_size = BATCH, 
                   epochs = 20, 
                   verbose = 1, shuffle = True, 
                   class_weight = class_weights,
                   validation_data = (x_val, y_val) 
                   )
    return model

#model.save('0912_simple_cnn_model.h5')
#simple_cnn_model = load_model('0912_simple_cnn_model.h5')

def predictions_val(model):
    y_pred_proba = model.predict(x_val)
    y_pred = np.argmax(y_pred_proba, axis=1)
    _,_,_,y_val_true=train_test_split(map_df["path"], map_df["label"], test_size=0.1, random_state=2017)
    y_true = y_val_true.values
    acc_score = accuracy_score(y_true, y_pred)
    #confusion_matrix=confusion_matrix(y_true,y_pred)
    return y_pred_proba, y_pred, y_true, acc_score

In [17]:
model_deep_cnn=training_model(model_name="light_cnn", imbalanced=True, EPOCHS=10)

W0922 02:51:37.718171 140178720864000 deprecation.py:323] From /home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 99, 161, 3)        0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 99, 161, 3)        12        
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 98, 160, 16)       208       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 97, 159, 16)       1040      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 48, 79, 16)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 48, 79, 16)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 46, 77, 32)        4640      
__________

In [18]:
y_pred_proba,y_pred, y_true, acc_score=predictions_val(model_deep_cnn)

In [19]:
acc_score

0.8754655146730225

In [20]:
print(confusion_matrix(y_true,y_pred))

[[ 217    1    1    0    7    1    0    2    0    0    1    3]
 [   0  257    0    0    1    0    0    0    0    0    0    0]
 [   1    0  217   11    1    7    2    1    0    0    1    3]
 [   1    0    1  231    1    9    0    1    0    2    0    3]
 [   2    0    1    2  252    0    0    0    2    1    0    0]
 [   1    0    0    4    1  218    1    0    0    0    3    5]
 [   1    0    0    2    1    0  209    1    1    1    4    2]
 [   0    0    1    2    0    0    8  244    1    3    2    1]
 [   0    0    0    0    2    1    0    1  239    0    2    1]
 [   1    0    1    1    0    0    2    0    0  226    2    2]
 [   0    0    1    2    2    0    9    1    0    1  208    3]
 [  30    5   51   82  131   37   69  102  102   32   44 3359]]


In [None]:
model_elvin=training_model(model_name="elvinouyang", optimizer='SGD', imbalanced=True, EPOCHS=20)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 99, 161, 3)        0         
_________________________________________________________________
batch_normalization_13 (Batc (None, 99, 161, 3)        12        
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 96, 158, 24)       1176      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 48, 79, 24)        0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 48, 79, 24)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 46, 77, 48)        10416     
_________________________________________________________________
zero_padding2d_4 (ZeroPaddin (None, 46, 78, 48)        0         
__________

In [23]:
y_pred_proba,y_pred, y_true, acc_score=predictions_val(model_elvin)

In [24]:
print(confusion_matrix(y_true,y_pred))

[[ 217    0    2    1    3    0    0    2    2    2    3    1]
 [   0  257    0    1    0    0    0    0    0    0    0    0]
 [   0    0  209   18    1   10    0    4    0    0    1    1]
 [   1    0    4  221    1   14    0    2    2    1    1    2]
 [   3    0    0    1  248    1    0    0    4    1    2    0]
 [   0    1    3   10    1  210    1    0    2    1    1    3]
 [   0    0    0    1    0    1  201    3    0    9    7    0]
 [   0    0    0    2    1    1    5  243    0    2    5    3]
 [   0    0    0    0    2    2    0    1  236    0    1    4]
 [   0    0    1    1    0    0    1    0    0  229    1    2]
 [   0    0    0    2    0    0    4    1    2    7  211    0]
 [  17    8   68  136   30   94   31  117  115   78   52 3298]]


In [11]:
model_elvin_cw=training_model(model_name="elvinouyang", imbalanced=True, EPOCHS=10)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 99, 161, 3)        0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 99, 161, 3)        12        
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 96, 158, 24)       1176      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 48, 79, 24)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 48, 79, 24)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 46, 77, 48)        10416     
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 46, 78, 48)        0         
__________

In [12]:
y_pred_proba,y_pred, y_true, acc_score=predictions(model_elvin_cw)

chum
20137
test
test
test
test
test
test
test
test
test
  1/630 [..............................] - ETA: 5:00test
  4/630 [..............................] - ETA: 1:24test
  7/630 [..............................] - ETA: 53s test
 10/630 [..............................] - ETA: 40stest
 13/630 [..............................] - ETA: 34stest
test
 15/630 [..............................] - ETA: 32stest
test
 17/630 [..............................] - ETA: 31stest
test
 19/630 [..............................] - ETA: 30stest
test
 21/630 [>.............................] - ETA: 29stest
test
 23/630 [>.............................] - ETA: 29stest
test
 25/630 [>.............................] - ETA: 28stest
test
 27/630 [>.............................] - ETA: 28stest
test
 29/630 [>.............................] - ETA: 27stest
test
 31/630 [>.............................] - ETA: 27stest
test
 33/630 [>.............................] - ETA: 27stest
test
 35/630 [>.............................] - ETA

test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
test
20137
test


In [13]:
print(confusion_matrix(y_true,y_pred))

[[7330    1    1    0    0   58  102 1559 1586 1727    0    0]
 [ 414    0    0    0    0    4    9  100   96  110    0    0]
 [ 419    0    0    0    0    4    9   81   88   92    0    0]
 [ 422    0    0    0    0    2    6   96   94   82    0    0]
 [ 408    0    0    0    0    1    6   94   99   96    0    0]
 [ 414    0    1    0    0    4    9   88   95  108    0    0]
 [ 422    0    0    0    0    3   11   92  105   86    0    0]
 [ 419    0    0    0    0    4    7  102   75   87    0    0]
 [ 386    0    0    0    0    5   11   97  109   96    0    0]
 [ 381    0    0    0    0    3    6  100   88  113    0    0]
 [ 411    0    0    0    0    3    9   98  110   79    0    1]
 [ 422    0    0    0    0    3    3   94   96   85    0    0]]


In [15]:
acc_score

0.38084123752296767

In [21]:
model_elvin_cw_sgd=training_model(model_name="elvinouyang", optimizer="SGD", imbalanced=True, EPOCHS=30)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 99, 161, 3)        0         
_________________________________________________________________
batch_normalization_16 (Batc (None, 99, 161, 3)        12        
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 96, 158, 24)       1176      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 48, 79, 24)        0         
_________________________________________________________________
dropout_16 (Dropout)         (None, 48, 79, 24)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 46, 77, 48)        10416     
_________________________________________________________________
zero_padding2d_6 (ZeroPaddin (None, 46, 78, 48)        0         
__________

In [19]:
y_pred_proba,y_pred, y_true, acc_score=predictions_val(model_elvin_cw_sgd)

NameError: name 'model_elvin_cw_sgd' is not defined