#### Convert the original dataset files (extension .mat) to CSV format.
 - The dataset comprises 480 .mat files (MATLAB files).

#### Import the necessary libraries.

In [1]:
import scipy.io
import pandas as pd
import numpy as np

#### Function that returns the scale (stress level) assigned by a given individual to perform a given task. 
- The first parameter is the dataframe with the spreadsheet with all the classifications assigned by the individuals. 
- The second parameter is the name of the file being processed. Each file has the records of the execution of a certain task, so all samples of a certain file have the same scale.

In [2]:
def getScale(data_frame, file_name) :

    file_itens = file_name.split('_')
    task_desc = ""
    sub_number = ""
    try_number = ""

    if (file_itens[0] == "Mirror") :
        task_desc = file_itens[0]
        sub_number = file_itens[3]
        try_number = file_itens[4].split('.')[0]
    else :
        task_desc = file_itens[0]
        sub_number = file_itens[2]
        try_number = file_itens[3].split('.')[0]

    line_index = data_frame[data_frame['Subject'] == int(sub_number)].index[0]
    column_name = ""

    if (task_desc == "Arithmetic") :
        column_name = "Maths" + try_number[-1]
    elif (task_desc == "Mirror") :
        column_name = "Symmetry" + try_number[-1]
    elif (task_desc == "Stroop") :
        column_name = "Stroop" + try_number[-1]
    else :
        return 0

    return data_frame.iloc[line_index][column_name]

#### Function that converts the list of .mat files into CSV format.
- The result is grouped into a single .CSV file..

In [3]:
def join_files(filesList):

    print("Start function join_files.")

    all_features = {}
    for index in range(0, 32):
        all_features[index] = np.empty(0)

    label_index = index+1
    all_features[label_index] = np.empty(0, dtype=int)

    all_scales = pd.read_excel('../dataset/scales.xls')

    for fileName in filesList:
        fileContent = scipy.io.loadmat('../dataset/' + fileName) 
        for x in fileContent:
            # Consider only the dictionary element numpy.ndarray
            if isinstance(fileContent[x], np.ndarray):
                features_count = 0
                for y in fileContent[x]:
                    all_features[features_count] = np.append(all_features[features_count], y)
                    features_count += 1
                for y in range(0, len(fileContent[x][0])):
                    label_value = getScale(all_scales, fileName)
                    all_features[features_count] = np.append(all_features[features_count], label_value)

    print("Start writing CSV file.")
    df = pd.DataFrame.from_dict(all_features)
  
    df.rename(columns={0: 'CZ', 1: 'FZ', 2: 'Fp1', 3: 'F7', 4: 'F3', 5: 'FC1', 6: 'C3', 7: 'FC5', 8: 'FT9', 9: 'T7',
                       10: 'CP5', 11: 'CP1', 12: 'P3', 13: 'P7', 14: 'PO9', 15: 'O1', 16: 'PZ', 17: 'OZ', 18: 'O2',
                       19: 'PO10', 20: 'P8', 21: 'P4', 22: 'CP2', 23: 'CP6', 24: 'T8', 25: 'FT10', 26: 'FC6',
                       27: 'C4', 28: 'FC2', 29: 'F4', 30: 'F8', 31: 'Fp2', 32: 'Scale'}, inplace = True)

    df.to_csv('../dataset/original-sam-dataset.csv', index = False,  sep='|')
    print("Finish writing CSV file.")

#### Generate the list with the names of the .mat files to be converted to CSV and call the conversion function.

In [5]:
filesList = []

tasksList = ["Arithmetic", "Mirror_image", "Relax", "Stroop"]

for task in tasksList:
    for subject in range(0, 40):
        for trials in range(0, 3):
            filesList.append(task + "_sub_" + str(subject + 1) + "_trial" + str(trials + 1) + ".mat")

join_files(filesList)

Start function join_files.
Start writing CSV file.
Finish writing CSV file.
