# 1. Nils Ackermann, "Introduction to 1D Convolutional Neural Networks in Keras for Time Sequences", sep 4,2018,


https://blog.goodaudience.com/introduction-to-1d-convolutional-neural-networks-in-keras-for-time-sequences-3a7ff801a2cf

- Conv1D layer -> Fully connected layer with softmax

## 0. Import

In [36]:
from emotion_recognition import utils
from emotion_recognition import features

import numpy as np

import warnings

import keras
from keras.models import Sequential
from keras import layers

warnings.filterwarnings(action='ignore') 

## 1.1 Data preparation

- Setting parameters

In [37]:
window_size = 1000 # 특징 추출을 위한 윈도우 크기
frame_size = 500 # 음성구간 찾기 위한 프레임 크기 
class_size = 4 #class 크기
sr = 16000

# 0 = neutral, 1 = anger, 2 = happiness, 3 = sadness
emotion = ["neutral","anger","happiness","sadness"]

- Loading filelist 

In [38]:
# 파일 이름 읽어오기
path = "../data/"
text_filename = "filelist_wav.txt"

filelist_wav = []
emotionlist = []

f = open(path+text_filename, 'r')

while True:
    line = f.readline()
    if not line: break
    
    filename, label = line.split()
    
    filelist_wav.append(filename)
    emotionlist.append(label)
    
f.close()

- Splitting the data into trainnnig and test
- For test, subjects 1,6,7,11 
- For training, subjects 2,3,4,5,8,9,10,12,13,14,15,16,17,18,19,20

In [39]:
Training_filename = []
Training_emotionlist = []
Test_filename = []
Test_emotionlist = []

for i in range(len(filelist_wav)):
    if (filelist_wav[i].split("_")[0][1:] == '1'):
        Test_filename.append(filelist_wav[i])
        Test_emotionlist.append(emotionlist[i])
    elif (filelist_wav[i].split("_")[0][1:] == '6'):
        Test_filename.append(filelist_wav[i])
        Test_emotionlist.append(emotionlist[i])
    elif (filelist_wav[i].split("_")[0][1:] == '7'):
        Test_filename.append(filelist_wav[i])
        Test_emotionlist.append(emotionlist[i])
    elif (filelist_wav[i].split("_")[0][1:] == '11'):
        Test_filename.append(filelist_wav[i])
        Test_emotionlist.append(emotionlist[i])
    else:
        Training_filename.append(filelist_wav[i])
        Training_emotionlist.append(emotionlist[i])

In [40]:
print (np.shape(Training_filename))
print (np.shape(Test_filename))

(614,)
(226,)


In [41]:
Training_vector = []
Training_label = []

for ix in range(len(Training_filename)):
    # 파일 읽어오기
    if Training_emotionlist[ix] == "excitement":
        continue
        
    if Training_emotionlist[ix] == "fear":
        continue
        
    print (str(ix)+ "\t" + Training_emotionlist[ix] + "\t" + path+'wav/' + Training_filename[ix])
    
    y,sr = utils.loadwav(path+'wav/' + Training_filename[ix])
    
    temp = emotion.index(Training_emotionlist[ix])    
    label = np.zeros(class_size)
    label[temp] = 1
    
    #파일 전체 길이
    length = len(y)
    
    idx = 0
    while(idx != length):
        #voice 구간 구하기
        IAV_th = utils.get_IAV_threshold(y,length,frame_size)
        th = IAV_th/frame_size*2
        
        start_point, end_point = utils.search_voicearea(y,frame_size,length,idx,th,IAV_th)
        
        if(start_point == -1):
            break
            
        idx = end_point
        
        segment_length = end_point-start_point+1
        
        # 음성 구간이 time_step 보다 짧으면 해당 음성 구간에서 특징 추출 x
        if(segment_length-window_size < 7000 ):
            continue
        
        # 세그먼트 추출
        for i in range(start_point,(end_point-window_size), window_size ):
            y_sub = y[i:i+window_size]
            
            Training_vector.append(y_sub)
            Training_label.append(label)

0	sadness	../data/wav/s19_sadness_M_s17.wav
1	sadness	../data/wav/s18_sadness_F_s1.wav
4	anger	../data/wav/s15_anger_F_a11.wav
6	sadness	../data/wav/s19_sadness_M_s8.wav
8	neutral	../data/wav/s18_neutral_F_n14.wav
9	sadness	../data/wav/s20_sadness_F_s20.wav
11	happiness	../data/wav/s3_happiness_M_h10.wav
13	happiness	../data/wav/s5_happiness_M_h8.wav
14	sadness	../data/wav/s8_sadness_M_s17.wav
15	happiness	../data/wav/s5_happiness_M_h16.wav
16	happiness	../data/wav/s4_happiness_M_h10.wav
17	neutral	../data/wav/s9_neutral_M_n10.wav
20	anger	../data/wav/s19_anger_M_a18.wav
21	neutral	../data/wav/s15_neutral_F_n11.wav
22	anger	../data/wav/s14_anger_M_a17.wav
23	neutral	../data/wav/s12_neutral_F_n5.wav
24	neutral	../data/wav/s15_neutral_F_n16.wav
25	happiness	../data/wav/s19_happiness_M_h7.wav
28	happiness	../data/wav/s10_happiness_M_h10.wav
29	happiness	../data/wav/s20_happiness_F_h8.wav
30	happiness	../data/wav/s19_happiness_M_h8.wav
31	sadness	../data/wav/s12_sadness_F_s12.wav
32	neutra

279	happiness	../data/wav/s19_happiness_M_h10.wav
281	sadness	../data/wav/s18_sadness_F_s13.wav
282	happiness	../data/wav/s19_happiness_M_h9.wav
284	neutral	../data/wav/s9_neutral_M_n13.wav
285	happiness	../data/wav/s12_happiness_F_h4.wav
287	anger	../data/wav/s13_anger_F_a12.wav
288	happiness	../data/wav/s10_happiness_M_h14.wav
289	happiness	../data/wav/s2_happiness_M_h18.wav
290	neutral	../data/wav/s19_neutral_M_n13.wav
291	neutral	../data/wav/s13_neutral_F_n13.wav
292	sadness	../data/wav/s15_sadness_F_s5.wav
294	anger	../data/wav/s5_anger_M_a8.wav
296	sadness	../data/wav/s4_sadness_M_s18.wav
298	sadness	../data/wav/s4_sadness_M_s17.wav
299	sadness	../data/wav/s9_sadness_M_s1.wav
301	sadness	../data/wav/s10_sadness_M_s12.wav
302	happiness	../data/wav/s8_happiness_M_h5.wav
303	anger	../data/wav/s15_anger_F_a17.wav
304	neutral	../data/wav/s12_neutral_F_n12.wav
305	sadness	../data/wav/s15_sadness_F_s15.wav
306	anger	../data/wav/s15_anger_F_a16.wav
307	neutral	../data/wav/s14_neutral_M_n

551	sadness	../data/wav/s10_sadness_M_s13.wav
552	sadness	../data/wav/s10_sadness_M_s6.wav
553	neutral	../data/wav/s18_neutral_F_n9.wav
554	happiness	../data/wav/s15_happiness_F_h13.wav
559	happiness	../data/wav/s3_happiness_M_h1.wav
560	sadness	../data/wav/s9_sadness_M_s19.wav
561	neutral	../data/wav/s12_neutral_F_n8.wav
563	sadness	../data/wav/s2_sadness_M_s18.wav
564	happiness	../data/wav/s9_happiness_M_h16.wav
565	anger	../data/wav/s9_anger_M_a11.wav
566	neutral	../data/wav/s3_neutral_M_n16.wav
567	anger	../data/wav/s19_anger_M_a19.wav
568	sadness	../data/wav/s13_sadness_F_s5.wav
569	sadness	../data/wav/s13_sadness_F_s12.wav
574	sadness	../data/wav/s18_sadness_F_s17.wav
576	neutral	../data/wav/s14_neutral_M_n14.wav
577	happiness	../data/wav/s9_happiness_M_h4.wav
580	neutral	../data/wav/s3_neutral_M_n8.wav
582	happiness	../data/wav/s12_happiness_F_h12.wav
583	sadness	../data/wav/s10_sadness_M_s17.wav
585	sadness	../data/wav/s19_sadness_M_s2.wav
586	sadness	../data/wav/s17_sadness_M_

In [42]:
Test_vector = []
Test_label = []

for ix in range(len(Test_filename)):
    # 파일 읽어오기
    if Test_emotionlist[ix] == "excitement":
        continue
        
    if Test_emotionlist[ix] == "fear":
        continue
        
    print (str(ix)+ "\t" + Test_emotionlist[ix] + "\t" + path+'wav/' + Test_filename[ix])
    
    y,sr = utils.loadwav(path+'wav/' + Test_filename[ix])
    
    temp = emotion.index(Test_emotionlist[ix])    
    label = np.zeros(class_size)
    label[temp] = 1
    
    #파일 전체 길이
    length = len(y)
    
    idx = 0
    while(idx != length):
        #voice 구간 구하기
        IAV_th = utils.get_IAV_threshold(y,length,frame_size)
        th = IAV_th/frame_size*2
        
        start_point, end_point = utils.search_voicearea(y,frame_size,length,idx,th,IAV_th)
        
        if(start_point == -1):
            break
            
        idx = end_point
        
        segment_length = end_point-start_point+1
        
        # 음성 구간이 time_step 보다 짧으면 해당 음성 구간에서 특징 추출 x
        if(segment_length-window_size < 7000 ):
            continue
        
        # 세그먼트 추출
        for i in range(start_point,(end_point-window_size), window_size ):
            y_sub = y[i:i+window_size]
            
            Test_vector.append(y_sub)
            Test_label.append(label)

1	sadness	../data/wav/s7_sadness_M_s1.wav
2	happiness	../data/wav/s11_happiness_F_h20.wav
3	sadness	../data/wav/s11_sadness_F_s1.wav
4	sadness	../data/wav/s11_sadness_F_s9.wav
9	neutral	../data/wav/s1_neutral_M_n20.wav
10	sadness	../data/wav/s6_sadness_F_s18.wav
11	anger	../data/wav/s1_anger_M_a4.wav
13	happiness	../data/wav/s6_happiness_F_h14.wav
14	happiness	../data/wav/s11_happiness_F_h15.wav
16	anger	../data/wav/s6_anger_F_a12.wav
18	sadness	../data/wav/s11_sadness_F_s17.wav
19	happiness	../data/wav/s6_happiness_F_h1.wav
21	sadness	../data/wav/s6_sadness_F_s10.wav
24	happiness	../data/wav/s6_happiness_F_h15.wav
26	neutral	../data/wav/s6_neutral_F_n15.wav
27	sadness	../data/wav/s7_sadness_M_s15.wav
28	happiness	../data/wav/s6_happiness_F_h4.wav
29	sadness	../data/wav/s7_sadness_M_s11.wav
30	sadness	../data/wav/s6_sadness_F_s12.wav
31	anger	../data/wav/s7_anger_M_a17.wav
32	neutral	../data/wav/s1_neutral_M_n13.wav
33	anger	../data/wav/s11_anger_F_a16.wav
34	sadness	../data/wav/s7_sad

In [43]:
print (np.shape(Training_vector), np.shape(Test_vector))

(20250, 1000) (7835, 1000)


In [44]:
import math

for i in range(len(Training_vector)):
    Training_vector[i] = Training_vector[i] * 2 / math.pow(2,16)
    
for i in range(len(Test_vector)):
    Test_vector[i] = Test_vector[i] * 2 / math.pow(2,16)

In [45]:
Training_vector = np.expand_dims(Training_vector,axis=2)
Test_vector = np.expand_dims(Test_vector,axis=2)

In [46]:
X_train = np.array(Training_vector)
Y_train = np.array(Training_label)
X_test = np.array(Test_vector)
Y_test = np.array(Test_label)

In [47]:
random_index = np.arange(np.shape(X_train)[0])
np.random.shuffle(random_index)

In [48]:
X_train = X_train[random_index]
Y_train = Y_train[random_index]

In [49]:
print (np.shape(X_train), np.shape(X_test))

(20250, 1000, 1) (7835, 1000, 1)


In [50]:
print (np.shape(Y_train), np.shape(Y_test))

(20250, 4) (7835, 4)


In [51]:
model_m = Sequential()
model_m.add(layers.Conv1D(100, 10, activation='relu', input_shape=(window_size, 1)))
model_m.add(layers.Conv1D(100, 10, activation='relu'))
model_m.add(layers.MaxPooling1D(3))
model_m.add(layers.Conv1D(160, 10, activation='relu'))
model_m.add(layers.Conv1D(160, 10, activation='relu'))
model_m.add(layers.GlobalAveragePooling1D())
model_m.add(layers.Dropout(0.5))
model_m.add(layers.Dense(4, activation='softmax'))
print(model_m.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_9 (Conv1D)            (None, 991, 100)          1100      
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 982, 100)          100100    
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 327, 100)          0         
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 318, 160)          160160    
_________________________________________________________________
conv1d_12 (Conv1D)           (None, 309, 160)          256160    
_________________________________________________________________
global_average_pooling1d_3 ( (None, 160)               0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 160)              

In [52]:
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='acc', patience=1)
]

model_m.compile(loss='categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])

BATCH_SIZE = 200
EPOCHS = 100

history = model_m.fit(X_train,
                      Y_train,
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      validation_data=(X_test,Y_test),
                      verbose=1)

Train on 20250 samples, validate on 7835 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100


Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [53]:
import matplotlib.pyplot as plt

acc = history.history["acc"]
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs= range(1,len(acc)+1)

plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation a cc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss,'b',label = 'Validation loss')
plt.title('Training an validation loss')
plt.legend()

plt.show()

KeyError: 'acc'

In [None]:
acc =model_m.evaluate(X_test,Y_test)

In [None]:
print (acc[1])

In [None]:
model_m.save("Conv1D_DNN_subject_windowing7000_class4.h5")