# Part 03 - Modeling

### 01 - Import library

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import IPython.display as ipd
import librosa
import librosa.display

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Flatten

### 02 - Unpickle the features

In [13]:
df_tr = pd.read_pickle('../pickles/train.pkl')
df_ts = pd.read_pickle('../pickles/test.pkl')
df_ho = pd.read_pickle('../pickles/hold_out.pkl')

### 03 - Transform into tensorflow-friendly format

In [17]:
# use label encoder to convert the language labels to numeric target column
encoder = LabelEncoder()
encoder.fit(df['lang'])
labels = encoder.classes_
y_tr = encoder.transform(df_tr['lang'])
y_ts = encoder.transform(df_ts['lang'])
y_ho = encoder.transform(df_ho['lang'])

In [18]:
# Convert feature matrix into a keras-friendly format
X_tr = np.array(df_tr['features'].tolist())
X_tr = X_tr.reshape(X_tr.shape[0],128,256,1)
X_ts = np.array(df_ts['features'].tolist())
X_ts = X_ts.reshape(X_ts.shape[0],128,256,1)
X_ho = np.array(df_ho['features'].tolist())
X_ho = X_ho.reshape(X_ho.shape[0],128,256,1)

In [19]:
# Check the shape
print(X_tr.shape)
print(y_tr.shape)
print(X_ts.shape)
print(y_ts.shape)
print(X_ho.shape)
print(y_ho.shape)

(5924, 128, 256, 1)
(5924,)
(740, 128, 256, 1)
(740,)
(740, 128, 256, 1)
(740,)


### 04 - CNN Model

In [44]:
model = Sequential()
model.add(Conv2D(16, (5, 5), activation='relu', input_shape=(128, 256, 1), padding = 'same'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (5, 5), activation='relu', padding = 'same'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [45]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 128, 256, 16)      416       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 64, 128, 16)       0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 64, 128, 32)       12832     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 32, 64, 32)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 65536)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               8388736   
_________________________________________________________________
dense_7 (Dense)              (None, 2)                

In [46]:
model.compile(optimizer='adam',
              loss='SparseCategoricalCrossentropy',
              metrics=['accuracy'])

In [47]:
history = model.fit(X_tr, y_tr, epochs=10, validation_data=(X_ts, y_ts))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [48]:
val_loss, val_acc = model.evaluate(X_ho, y_ho, verbose=1)



In [49]:
model.save('../saved_model/20200603')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ../saved_model/20200603/assets
