# Project: Speech Emotional Recognition

## Requirements
1. OpenSmile

## 1. Preprocessing input data

### 1.1. Copying input wav files to data_path/input

Import the required modules.

In [5]:
import os
import shutil
from sklearn.model_selection import train_test_split
import csv
import numpy as np

Assuming all required files are within the "data" directory

In [6]:
data_path = "/work/peiyun/data"
input_path = os.path.join(data_path, "input")

Create input directory (data_path/input) if not exist.

In [17]:
if not os.path.exists(input_path):
    os.makedirs(input_path)

Copying all input files to input_path. (10,039 utterances in total)

In [18]:
print input_path

/work/peiyun/data/input


In [19]:
# for each session
for session in range(1,6):
    path = os.path.join(data_path, "IEMOCAP", "Session" + str(session), "sentences", "wav")
    
    # for each dialog
    for dialog in os.listdir(path):
        dialog_path = os.path.join(path, dialog)
            
        # for each utterance (file)
        for filename in os.listdir(dialog_path):
            if filename.endswith(".wav"):
                shutil.copy(os.path.join(dialog_path, filename), os.path.join(input_path, filename))

### 1.2. Getting a dictionary of utterance labels

In [20]:
label_dict = {}

# for each session
for session in range(1,6):
    path = os.path.join(data_path, "IEMOCAP/Session" + str(session), "dialog", "EmoEvaluation")
    
    # for file in the session
    for filename in os.listdir(path):
        
        # only interested in "summary" txt files
        if filename.endswith(".txt"):
            f = open(os.path.join(path, filename), "r")
            for line in f.readlines():
                if line[0] == "[":
                    name, label = line.split("\t")[1:3]
                    label_dict[name] = label
            f.close()

### 1.3. Split files into training and test sets (70% training, 15% test, 15% dev, seed = 100)

In [21]:
# get filename list
filename_ls = []
for filename in os.listdir(input_path):
    if filename.endswith(".wav"):
        filename_ls.append(filename[:-4])  # [:-4] for removing .wav
        
# get corresponding label list
label_ls = []
for filename in filename_ls:
    label_ls.append(label_dict[filename])
        
# splitting into train and test
filename_train, filename_remain, label_train, label_remain = train_test_split(filename_ls, label_ls, 
                                                                          train_size=0.7, random_state=100)

# splitting into train and test
filename_dev, filename_test, label_dev, label_test = train_test_split(filename_remain, label_remain, 
                                                                          test_size=0.5, random_state=100)

### 1.4. Writing labels into csv file (7027 train instances, 1506 test instances, 1506 dev instances)

In [22]:
with open(os.path.join(data_path, "label.csv") , mode='w') as label_file:
    writer = csv.writer(label_file, delimiter=",")
    
    # training instances
    for filename in filename_train:
        writer.writerow([filename, "train", label_dict[filename]])
            
    # test instances
    for filename in filename_test:
        writer.writerow([filename, "test", label_dict[filename]])
        
    # test instances
    for filename in filename_dev:
        writer.writerow([filename, "dev", label_dict[filename]])
                
    label_file.close()

### 1.5. Move files into test, dev, and train directories

Create test and train directory for input instances if not exist.

In [23]:
for data_type in ["train", "test", "dev"]:
    path = os.path.join(input_path, data_type)
    if not os.path.exists(path):
        os.makedirs(path)

Moving files to its directory.

In [24]:
for filename in filename_train:
    shutil.move(os.path.join(input_path, filename + ".wav"), os.path.join(input_path, "train", filename + ".wav"))
    
for filename in filename_test:
    shutil.move(os.path.join(input_path, filename + ".wav"), os.path.join(input_path, "test", filename + ".wav"))
    
for filename in filename_dev:
    shutil.move(os.path.join(input_path, filename + ".wav"), os.path.join(input_path, "dev", filename + ".wav"))

## 2. Feature Extraction with openSMILE

File for feature extractions.

In [82]:
%%file feature.py

# Import the required modules
import argparse
import os
from subprocess import call
import csv
import sys
import numpy as np
from time import gmtime, strftime, time

# Global variables
data_path = "/work/peiyun/data"

# Get the ground_truth label number of the file
def get_label(label_file, filename):
    
    with open(label_file, mode = "r") as f:
        reader = csv.reader(f)
        for row in reader:
            name, data_type, label = row
            if name == filename:
                return label

# Check input and output directories
def check_dirs(args):
    
    # Check input directory (if not exist -> error)
    if not os.path.exists(os.path.join(data_path, args.input_dir)):
        print "Error: input directory not exist"
        return False
    for data_type in ["train", "test"]:
        path = os.path.join(data_path, args.input_dir, data_type)
        if not (os.path.exists(path)):
            print "Error: input directory missing train or test directories"
            return False
    
    # Check output directory (if not exist -> create one)
    if not os.path.exists(os.path.join(data_path, args.output_dir)):
        os.makedirs(os.path.join(data_path, args.output_dir))
    for data_type in ["train", "test"]:
        path = os.path.join(data_path, args.output_dir, data_type)
        if not os.path.exists(path):
            os.makedirs(path)
    
    return True

# Function for extracting features with openSMILE (Return whether successed)
def extract_features(args):
    if not check_dirs(args):
        return False   # failed to read inputs
            
    # Iterate over wav audio files in input directory
    for data_type in ["train", "test", "dev"]:
        path_in = os.path.join(data_path, args.input_dir, data_type)
        
        for filename in os.listdir(path_in):
            
            # Only interested in wav files
            if filename.endswith(".wav"):
                # in
                file_in = os.path.join(path_in, filename)
                config = os.path.join(data_path, "config", args.config)
                
                filename = filename[:-4]  # [:-4] for removing .wav
                
                # out
                path_out = os.path.join(data_path, args.output_dir, data_type)
                csv_out = os.path.join(path_out, filename + "_" + args.config[:-5] + ".csv")
                arff_out = os.path.join(path_out, filename + "_" + args.config[:-5] + ".arff")  # [:-5] for removing .conf
                label = get_label(os.path.join(data_path, args.label), filename)
                
                # use openSMILE
                call(["SMILExtract", "-l", "0", "-noconsoleoutput", "-I", file_in, 
                      "-C", config, "-D", csv_out, "-O", arff_out, "-instname", filename, "-label", label])
                
    return True

# Obtaining args from terminal
def get_args():
    
    parser = argparse.ArgumentParser(description='Extract features for files in the directory using openSMILE')
    
    parser.add_argument("-i",                   # optional argument (no "-" for positional)
                        "--input_dir",   # name of the attribute (dest)
                        action = "store",       # can be "store", "store_const", "store_true", etc.
                        # nargs = N for associating N args with a single action
                        # const = ... to hold constant values
                        # default = ... to set default value
                        type = str,             # check arg type
                        # choice = [.., .., ..] # restrict set of values
                        required = True,        # make an option required
                        # metavar = "XXX" for changing display name
                        help = "The directory of input audio files (wav)")
    
    parser.add_argument("-o", "--output_dir", type = str, required = True, help = "The directory of results")
    parser.add_argument("-c", "--config", type = str, required = True, help = "Configuration filename")
    parser.add_argument("-l", "--label", type = str, required = True, help = "Label filename")
    args = parser.parse_args()
    
    return args

def main():
    # Obtaining terminal args
    args = get_args()
    
    start_time = time()
    
    # Extracting features according to args
    if not extract_features(args):
        print "Failed to extract features"
    else:
        end_time = time()
        print("Time taken for extracting features:", strftime("%H:%M:%S", gmtime(end_time - start_time)))
        print "Successfully extracted features"

# If running the file directly
if __name__ == "__main__":
    main()

Overwriting feature.py


Running script for extracting features. 

In [None]:
%%!
python feature.py -i "input" -o "output" -c "IS09_emotion.conf" -l "label.csv"

In [83]:
%%!
python feature.py -i "input" -o "output" -c "IS10_paraling.conf" -l "label.csv"

["('Time taken for extracting features:', '00:35:29')",
 'Successfully extracted features']

## 3. Deep Neural Network for emotion recognition

Import the required modules.

In [94]:
from keras.models import Sequential
import pandas as pd

Using TensorFlow backend.


Preparing training, development, and test sets in the format that is suitable for neural network.

In [105]:
# Get the ground_truth label number of the file
def get_label(filename):
    label_file = os.path.join(data_path, "label.csv")
    
    with open(label_file, mode = "r") as f:
        reader = csv.reader(f)
        for row in reader:
            name, data_type, label = row
            if name == filename:
                return label

In [162]:
def extract_Xy(file_path):
    
    # Obtain dataframe for the csv file (one utterance/instance)
    df = pd.read_csv(file_path)
    
    print type(df)
    
    print df.columns[1]

    # Get filename
    filename = df.loc["name",]
    
    print filename
    
    # Clean unnecessary columns
    df = df.drop(columns = ["name", "frameTime"])
    
    label = get_label(filename)
    
    print df
    
    print label

In [163]:
file_path = os.path.join(data_path, "output", "train", "Ses05M_script01_2_F000_IS10_paraling.csv")
extract_Xy(file_path)

<class 'pandas.core.frame.DataFrame'>


IndexError: index 1 is out of bounds for axis 0 with size 1

In [None]:
df = pd.DataFrame(np.arange(12).reshape(3,4),columns=['A', 'B', 'C', 'D'])

In [125]:
df["A"].values[1]

4

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


Import the required modules.

In [93]:
from keras.models import Sequential

Using TensorFlow backend.


In [94]:
model = Sequential()

In [None]:
def training(model_name, fold,
             x_train, y_train,
             x_dev, y_dev,
             x_test, y_test,
             batch_size,
             nb_epoch,
             validation_split,
             params_path):
    params = GCNN_Params(params_path)
    if fold > 0:
        model_name = os.path.join(model_name, str(fold))

    """ Training process of GCNN """
    print('\nBuilding GCNN...')
    all_model = generate_model(params.is_dilated,
                               params.nb_filter,
                               params.dilation_rate,
                               params.n_stack,
                               x_train.shape)

    print('\nCompile all_model...')
    all_model.compile(loss='binary_crossentropy',
            optimizer='adam',
            metrics=['accuracy', precision, recall, fscore])
    all_model.summary()

    best_model_filepath = os.path.join("models", model_name +".best.hdf5")
    if not os.path.exists(os.path.dirname(best_model_filepath)):
        os.makedirs(os.path.dirname(best_model_filepath))
    tensorboard_path = os.path.join("logs", "tensorboard", model_name)
    if not os.path.exists(tensorboard_path):
        os.makedirs(tensorboard_path)
    csv_logpath = os.path.join("logs", "csv", model_name + '.log.csv')
    if not os.path.exists(os.path.dirname(csv_logpath)):
        os.makedirs(os.path.dirname(csv_logpath))

    # Create all callback instances
    # Early stop is not used
    early_stop = EarlyStopping(monitor='val_acc', patience=5, mode='max', verbose=1)
    checkpoint_best = ModelCheckpoint(best_model_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    csv_logger = CSVLogger(csv_logpath)
    tensorboard = TensorBoard(log_dir=tensorboard_path, histogram_freq=0, write_graph=True, write_images=True)
    callbacks_list = [tensorboard, metrics, checkpoint_best, csv_logger] # [metrics, checkpoint_best, tensorboard, early_stop]

    print('\nFitting all_model...')

    if validation_split > 0:
        print("Fitting all_model with validation split...   ", validation_split)
        history = all_model.fit(x_train, y_train,
                                batch_size=batch_size,
                                epochs=nb_epoch,
                                validation_split=validation_split,
                                callbacks=callbacks_list,
                                shuffle=True)
    elif x_dev is not None and y_dev is not None:
        print("Fitting all_model with validation set...")
        history = all_model.fit(x_train, y_train,
                                batch_size=batch_size,
                                epochs=nb_epoch,	
                                validation_data=(x_dev, y_dev),
                                callbacks=callbacks_list,
                                shuffle=True)
    elif x_test is not None and y_test is not None:
        print("Fitting all_model using test set...")
        history = all_model.fit(x_train, y_train,
                                batch_size=batch_size,
                                epochs=nb_epoch,
                                validation_data=(x_test, y_test),
                                callbacks=callbacks_list,
                                shuffle=True)

    last_model_filepath = os.path.join("models", model_name +".last.hdf5")
    all_model.save(last_model_filepath)

    # Save model plot
    if fold > 0:
        model_plot_name = os.path.join("models", "/".join(model_name.split("/")[:-1]), "structure.png") # take only model name, without fold
    else:
        model_plot_name = os.path.join("models", model_name + ".structure.png") # take only model name, without fold
    if not os.path.exists(os.path.dirname(model_plot_name)):
        os.makedirs(os.path.dirname(model_plot_name))
    plot_model(all_model, to_file= model_plot_name)

    acc = 0
    if x_test is not None and y_test is not None:
        print("\nEvaluate all_model")
        print("\# Best all_model based on val set")
        all_model.load_weights(best_model_filepath)
        score = all_model.evaluate(x_test, y_test, batch_size=batch_size)
        print(score)
        y_pred = np.round(all_model.predict(x_test, batch_size=batch_size))
        print(y_test.shape)
        print(np.squeeze(y_pred).shape)
        _val_f1 = f1_score(y_test, y_pred)
        _val_recall = recall_score(y_test, y_pred)
        _val_precision = precision_score(y_test, y_pred)
        TN, FP, FN, TP = confusion_matrix(y_test, y_pred, labels=[0,1]).ravel()

        acc = (TP + TN) / (TP + TN + FP + FN)
        print_cm(TP, FP, FN, TN, ["Control","Dementia"])
        UAR, WAR, fscore_0, fscore_1, uaf, waf = get_other_metrics(TP, FP, FN, TN)
        print("test_acc: %.4f | test_uar: %.4f - test_war: %.4f | - test_fscore0: %.4f - test_fscore1: %.4f - test_uaf: %.4f - test_waf: %.4f" % (acc, UAR, WAR, fscore_0, fscore_1, uaf, waf))
        log = TrainingLog(csv_logpath)
        log.print()
        return np.array([y_test.tolist(), np.squeeze(y_pred).tolist()]).transpose().tolist(), acc, uaf, waf
    else:
        return None, None, None, None
    print("Training finished. Log can be found in folder", csv_logpath)

In [None]:
# Parameters
MAX_LEN = 22500 # 22499 
DEF_BATCH_SIZE = 4
DEF_NB_EPOCH = 32
DEF_VALIDATION_SPLIT = 0
SHUFFLE = True
DEFAULT_THRESHOLD = 0.5

