In [1]:
import os

from time import perf_counter
import numpy as np

import soundfile as sf
from collections import Counter
import matplotlib.pyplot as plt

import tensorflow as tf
import keras
from keras.layers import Conv1D, BatchNormalization, Dense, Flatten, Activation
from tensorflow.keras.layers.experimental import preprocessing
from keras.models import Sequential
from keras.callbacks import History, EarlyStopping
history = History()

#data_normalized=0

Download the dataset from https://www.kaggle.com/toponowicz/spoken-language-identification

In [2]:
# Data generator for serving up batches for training the NN
# ref https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
from keras.utils import Sequence
import pywt

class langidDataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, wavelet='rbio3.1', truncate_len=None,
                 batch_size=32, n_channels=1, n_classes=6, shuffle=True):
        'Initialization'
        self.wvlt = wavelet
        self.trunc_len = truncate_len
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        
        # determine dimensions of input
        X,y=self.__getitem__(0)
        #import pdb; pdb.set_trace()
        self.dim = X.shape[1:]

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        X = np.expand_dims(X,2)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def wavelet_features(self, list_IDs_temp):
        features = []
        y=[]
        # Take the DWT of each component and concat them end-to-end
        for ID in list_IDs_temp:
            # Store sample
            signal,fs = sf.read(ID)
            list_coeff = pywt.wavedec(signal, self.wvlt, mode='per')
            # string the coefficient arrays end-end to keep like ones together
            dwt_local_coeff=[]
            end_flag=0
            for coeff in list_coeff:
                if not end_flag:
                    dwt_local_coeff.extend(coeff)
                    flag=1
                else:
                    flag=0
                    dwt_local_coeff.extend(coeff.reverse())
            features.append(dwt_local_coeff[:self.trunc_len])
            y.append(self.labels[ID])
        X = np.array(features)
        return X,y

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        X,y = self.wavelet_features(list_IDs_temp)
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [3]:
DATA_FOLDER = '../datasets/langid/'
train_path = DATA_FOLDER+'/train/'
test_path = DATA_FOLDER+'/test/'

#para train path
train_labels = []
for filename in os.listdir(train_path):
    train_labels.append(filename[:4]) # [:4] file naming convention, f.e. es_f
test_labels = []
for filename in os.listdir(test_path):
    test_labels.append(filename[:4])

# Create numeric labels
lb=0
labeld={}
for k in Counter(train_labels).keys():
    labeld[k] = lb
    lb=lb +1
# number of classes
num_classes = lb

# Create list of training/test filenames and dict {filename : label}
train_files = []
train_labels2={}
for filename in os.listdir(train_path):
    train_files.append(train_path+filename)
    train_labels2[train_path+filename] = labeld[filename[:4]]

test_files = []
test_labels2={}
for filename in os.listdir(test_path):
    test_files.append(test_path+filename)
    test_labels2[test_path+filename] = labeld[filename[:4]]


In [9]:
epochs = 128
no_train=2000
no_test=200

# Parameters
params = {'wavelet' :'rbio3.1',
          'truncate_len' : 1024,
          'batch_size' : 64,
          'n_classes' : 6,
          'n_channels' : 1,
          'shuffle' : True}

# Generators
training_generator = langidDataGenerator(train_files[:no_train], train_labels2, **params)
validation_generator = langidDataGenerator(test_files[:no_test], test_labels2, **params)
"""
if not data_normalized:
    training_generator.normalize_data()
"""
model = Sequential()
model.add(Conv1D(32, kernel_size=9, strides=3, input_shape=training_generator.dim))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv1D(16, kernel_size=7, strides=2))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv1D(16, kernel_size=5))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv1D(8, kernel_size=3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(128, kernel_regularizer=keras.regularizers.l1_l2(l1=5e-5,l2=5e-5)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(96, kernel_regularizer=keras.regularizers.l1_l2(l1=5e-4,l2=5e-4)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(64, kernel_regularizer=keras.regularizers.l1_l2(l1=5e-5,l2=5e-5)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(16, kernel_regularizer=keras.regularizers.l1_l2(l1=1e-5,l2=1e-5)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(num_classes, kernel_regularizer=keras.regularizers.l1_l2(l1=5e-5,l2=5e-5),\
                activation='softmax'))

model.summary()

model.compile(loss=keras.losses.categorical_crossentropy, 
              optimizer=keras.optimizers.Adam(), 
              metrics=['accuracy'])

es = EarlyStopping(monitor='loss', verbose=0, patience=8)
t_start = perf_counter()
# Train model on dataset
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator, epochs=epochs,
                    use_multiprocessing=False, verbose=1,
                    workers=0, callbacks=[history,es])
"""
model.fit(training_generator, epochs=epochs, verbose=1,
          callbacks=[history,es])
"""

t_stop = perf_counter()
t_diff = t_stop-t_start
print ('Time to train the network {} seconds'.format(t_diff))

train_score = model.evaluate(training_generator, verbose=0)
print('Train loss: {}, Train accuracy: {}'.format(train_score[0], train_score[1]))
test_score = model.evaluate(validation_generator, verbose=0)
print('Test loss: {}, Test accuracy: {}'.format(test_score[0], test_score[1]))


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_20 (Conv1D)           (None, 339, 32)           320       
_________________________________________________________________
batch_normalization_30 (Batc (None, 339, 32)           128       
_________________________________________________________________
activation_30 (Activation)   (None, 339, 32)           0         
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 167, 16)           3600      
_________________________________________________________________
batch_normalization_31 (Batc (None, 167, 16)           64        
_________________________________________________________________
activation_31 (Activation)   (None, 167, 16)           0         
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 163, 16)          

In [None]:
fig, axarr = plt.subplots(figsize=(14,7), ncols=2)
axarr[0].plot(history.history['accuracy'], label='train accuracy')
axarr[0].plot(history.history['val_accuracy'], label='test accuracy')
axarr[0].set_xlabel('Number of Epochs', fontsize=18)
axarr[0].set_ylabel('Accuracy', fontsize=18)
axarr[0].set_ylim([0.5,1])
axarr[0].legend()

axarr[1].plot(history.history['loss'], label='train loss')
axarr[1].plot(history.history['val_loss'], label='test loss')
axarr[1].set_xlabel('Number of Epochs', fontsize=18)
axarr[1].set_ylabel('Loss', fontsize=18)
axarr[1].legend()

plt.show()