In [2]:
import os
import csv
import wave
import sys
import numpy as np
import pandas as pd
import glob
from sklearn.preprocessing import label_binarize

sys.path.append(os.path.dirname(os.path.realpath('.')))


In [3]:
print(os.path.dirname(os.path.realpath('.')))

/Users/stella-yj/workspace/notebook


In [7]:
class Constants:
    def __init__(self):
        real_path = os.path.dirname(os.path.realpath('.'))
        #print(real_pat)
        self.available_emotions = np.array(['ang', 'exc', 'neu', 'sad'])
        self.path_to_data = real_path + "/datasets/IEMOCAP_full_release/"
        self.path_to_features = real_path + "/datasets/IEMOCAP_full_release/features/"
        self.sessions = ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']
        self.conf_matrix_prefix = 'iemocap'
        self.framerate = 16000
        self.types = {1: np.int8, 2: np.int16, 4: np.int32}
    
    def __str__(self):
        def display(objects, positions):
            line = ''
            for i in range(len(objects)):
                line += str(objects[i])
                line = line[:positions[i]]
                line += ' ' * (positions[i] - len(line))
            return line
        
        line_length = 100
        ans = '-' * line_length
        members = [attr for attr in dir(self) if not callable(attr) and not attr.startswith("__")]
        for field in members:
            objects = [field, getattr(self, field)]
            positions = [30, 100]
            ans += "\n" + display(objects, positions)
        ans += "\n" + '-' * line_length
        return ans

In [8]:
def get_audio(path_to_wav, filename, params=Constants()):
    wav = wave.open(path_to_wav + filename, mode="r")
    (nchannels, sampwidth, framerate, nframes, comptype, compname) = wav.getparams()
    content = wav.readframes(nframes)
    samples = np.fromstring(content, dtype=params.types[sampwidth])
    return (nchannels, sampwidth, framerate, nframes, comptype, compname), samples


def get_transcriptions(path_to_transcriptions, filename, params=Constants()):
    f = open(path_to_transcriptions + filename, 'r').read()
    f = np.array(f.split('\n'))
    transcription = {}
    for i in range(len(f) - 1):
        g = f[i]
        i1 = g.find(': ')
        i0 = g.find(' [')
        ind_id = g[:i0]
        ind_ts = g[i1+2:]
        transcription[ind_id] = ind_ts
    return transcription

In [9]:
def get_emotions(path_to_emotions, filename, params=Constants()):
    f = open(path_to_emotions + filename, 'r').read()
    f = np.array(f.split('\n'))
    idx = f == ''
    idx_n = np.arange(len(f))[idx]
    emotion = []
    for i in range(len(idx_n) - 2):
        g = f[idx_n[i]+1:idx_n[i+1]]
        head = g[0]
        i0 = head.find(' - ')
        start_time = float(head[head.find('[') + 1:head.find(' - ')])
        end_time = float(head[head.find(' - ') + 3:head.find(']')])
        actor_id = head[head.find(filename[:-4]) + len(filename[:-4]) + 1:
                        head.find(filename[:-4]) + len(filename[:-4]) + 5]
        emo = head[head.find('\t[') - 3:head.find('\t[')]
        vad = head[head.find('\t[') + 1:]

        v = float(vad[1:7])
        a = float(vad[9:15])
        d = float(vad[17:23])
        
        j = 1
        emos = []
        while g[j][0] == "C":
            head = g[j]
            start_idx = head.find("\t") + 1
            evoluator_emo = []
            idx = head.find(";", start_idx)
            while idx != -1:
                evoluator_emo.append(head[start_idx:idx].strip().lower()[:3])
                start_idx = idx + 1
                idx = head.find(";", start_idx)
            emos.append(evoluator_emo)
            j += 1

        emotion.append({'start': start_time,
                        'end': end_time,
                        'id': filename[:-4] + '_' + actor_id,
                        'v': v,
                        'a': a,
                        'd': d,
                        'emotion': emo,
                        'emo_evo': emos})
    return emotion

In [10]:
def split_wav(wav, emotions, params=Constants()):
    (nchannels, sampwidth, framerate, nframes, comptype, compname), samples = wav

    left = samples[0::nchannels]
    right = samples[1::nchannels]

    frames = []
    for ie, e in enumerate(emotions):
        start = e['start']
        end = e['end']

        e['right'] = right[int(start * framerate):int(end * framerate)]
        e['left'] = left[int(start * framerate):int(end * framerate)]

        frames.append({'left': e['left'], 'right': e['right']})
    return frames

In [46]:
def read_iemocap_data(params=Constants()):
    data = []
    for session in params.sessions:
        path_to_wav = params.path_to_data + session + '/dialog/wav/'
        path_to_emotions = params.path_to_data + session + '/dialog/EmoEvaluation/'
        path_to_transcriptions = params.path_to_data + session + '/dialog/transcriptions/'

        files = os.listdir(path_to_wav)
        files = [f[:-4] for f in files if f.endswith(".wav")]
        for f in files:           
            wav = get_audio(path_to_wav, f + '.wav')
            transcriptions = get_transcriptions(path_to_transcriptions, f + '.txt')
            emotions = get_emotions(path_to_emotions, f + '.txt')
            sample = split_wav(wav, emotions)

            for ie, e in enumerate(emotions):
                e['signal'] = sample[ie]['left']
                e.pop("left", None)
                e.pop("right", None)
                e['transcription'] = transcriptions[e['id']]
                e['path_to_wav']=path_to_wav
                if e['emotion'] in params.available_emotions:
                    data.append(e)
    sort_key = get_field(data, "id")
    return np.array(data)[np.argsort(sort_key)]

In [47]:
def get_field(data, key):
    return np.array([e[key] for e in data])

In [48]:
params = Constants()
data = read_iemocap_data(params=params)


  """


In [50]:
data.shape

(4936,)

In [51]:
pd.DataFrame(data=data[0:])

Unnamed: 0,0
0,"{'start': 6.2901, 'end': 8.2357, 'id': 'Ses01F..."
1,"{'start': 10.01, 'end': 11.3925, 'id': 'Ses01F..."
2,"{'start': 14.8872, 'end': 18.0175, 'id': 'Ses0..."
3,"{'start': 27.46, 'end': 31.49, 'id': 'Ses01F_i..."
4,"{'start': 85.27, 'end': 88.02, 'id': 'Ses01F_i..."
...,...
4931,"{'start': 236.57, 'end': 244.83, 'id': 'Ses05M..."
4932,"{'start': 244.84, 'end': 246.58, 'id': 'Ses05M..."
4933,"{'start': 246.59, 'end': 248.83, 'id': 'Ses05M..."
4934,"{'start': 255.86, 'end': 260.33, 'id': 'Ses05M..."


In [52]:
dfItem = pd.DataFrame.from_records(data)

In [54]:
dfItem.to_csv("iemocap_meta.csv", index=False)