In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

import keras
from keras import layers
from keras.layers import Conv1D, GRU, Dense, BatchNormalization, Dropout, MaxPool1D,Activation, regularizers
from keras import Sequential
from keras.preprocessing import text
from keras.layers import regularizers
from keras.optimizers import Adam, RMSprop

import scipy.io.wavfile as wav

from sklearn.preprocessing import LabelEncoder

import shutil
import os
from pathlib import Path 
import re
from collections import OrderedDict

Using TensorFlow backend.


In [2]:
# for google colab. Ignore
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
path_data = Path('../data/')
os.listdir(path_data)

['wav',
 'Schuller_SpeechAnalysis.pdf',
 'readme.docx',
 'ComParE2018_AtypicalAffect.txt']

In [5]:
df_labs = pd.read_csv(path_data/'ComParE2018_AtypicalAffect.txt', sep='\t')

def fetch_data(directory='wav/', subset='train', length=50_000, pad_item=0):
    print(subset)
    sub = df_labs['file_name'].apply(lambda fn: subset in fn)
    df_lab_sub = df_labs[sub]
    y = df_lab_sub['emotion'].values
    fn = df_lab_sub['file_name']

    X = OrderedDict()
    for f in fn:
        wv = wav.read(f'{directory}/{f}')[1]
        n = wv.shape[0]
        if n < length:
            padding = np.repeat(pad_item, length-n)
            wv = np.hstack([padding, wv])
        X[f] = wv[:length].reshape(1, -1, 1)

    return np.vstack(X.values()), y


def normalize_data(tr_X, val_X = None, ts_X=None):
    m = tr_X.mean()
    sd = tr_X.std()
    return (tr_X-m)/sd #, (val_X-m)/sd, (ts_X-m)/sd, 

In [6]:
length = 50_000
(tr_X, tr_y) = fetch_data(directory=path_data/'wav', subset='train', length=length)
# (dv_X, dv_y) = fetch_data(directory=path_data/'wav', subset='devel', length=length)
# (ts_X, ts_y) = fetch_data(directory=path_data/'wav', subset='test', length=length)

train


In [8]:
tr_X = normalize_data(tr_X) #, dv_X, ts_X #, val_X, ts_X 

le = LabelEncoder()
tr_y = le.fit_transform(tr_y)
# val_y = le.transform(dv_y)
# ts_y = le.transform(ts_y)

In [0]:
def conv_block(n, input_layer=False):
    conv_layer = (Conv1D(n[0], n[1], strides=n[2], kernel_regularizer=regularizers.l2(0.01), input_shape=tr_X.shape[1:]) 
                if input_layer 
                else Conv1D(n[0], n[1], strides=n[2], kernel_regularizer=regularizers.l2(0.01)))
      return Sequential([
        conv_layer,
    #       BatchNormalization(),
        Activation(activation='relu'),
        Dropout(0.5),
        MaxPool1D(pool_size=2, strides=2)
      ])


def get_model(conv_activations=[(15, 20, 3), (32, 10, 2), (64, 5, 1)]):  
    model = Sequential()

    for i,n in enumerate(conv_activations):
    model.add(conv_block(n, input_layer=(i==0)))

    #   model.add(GRU(100))
    model.add(layers.Flatten())

    #   model.add(Dense(10, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(4, activation='softmax'))

    model.compile(optimizer=RMSprop(lr=.0001), loss='sparse_categorical_crossentropy', metrics=['acc'])

    print(model.summary())

    return model

In [29]:
model = get_model(conv_activations=[(15, 30, 5), (32, 15, 3), (64, 10, 1)]) #, (64, 3, 1), (64, 3, 1)
model.fit(tr_X, tr_y, epochs=10, batch_size=50, validation_split=0.2)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_14 (Sequential)   (None, 4997, 15)          465       
_________________________________________________________________
sequential_15 (Sequential)   (None, 830, 32)           7232      
_________________________________________________________________
sequential_16 (Sequential)   (None, 410, 64)           20544     
_________________________________________________________________
flatten_3 (Flatten)          (None, 26240)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 104964    
Total params: 133,205
Trainable params: 133,205
Non-trainable params: 0
_________________________________________________________________
None
Train on 2673 samples, validate on 669 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8

<keras.callbacks.History at 0x7f94e7f36128>

In [30]:
model.fit(tr_X, tr_y, epochs=10, batch_size=50, validation_split=0.2)

Train on 2673 samples, validate on 669 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f94e8905278>

In [9]:
labs = df_labs.iloc[:,1]
labs.value_counts()

neutral    5002
happy      1669
sad         498
angry       172
Name: emotion, dtype: int64