# IEMOCAPをロードする

- IEMOCAPのロードをするプログラム
- 要求: 2個上の階層にIEMOCAP_full_releaseを配置する
- return: wavファイルとラベルのペア

In [210]:
import os
import re
from statistics import mode
import collections
import librosa
import math
import audeer
import pandas as pd

## 前準備

#### グローバルパスの定義

In [211]:
# 上位パスを排除してファイル名だけ見せる関数
def display_filename(filename, path_splitter='/'):
    return filename[filename.rfind(path_splitter) + 1:]

In [212]:
# wavファイルがセッションごとに整理されてるディレクトリへのパス
session_dirs = ['../../IEMOCAP_full_release/' + 'Session' + str(i) + '/' + 'sentences/wav/' for i in range(1, 6)]
# ラベルがセッションごとに整理されてるディレクトリへのパス
label_dirs = ['../../IEMOCAP_full_release/' + 'Session' + str(i) + '/dialog/EmoEvaluation/Categorical/' for i in range(1, 6)]

#### 1つ1つのwavファイルへのパスを取る

In [213]:
# 変数定義，ペアレントディレクトリへのパスを取っておく
wav_file_paths_p = []
for ses_d in session_dirs:
    sentence_dirs = sorted(os.listdir(ses_d))
    for sent_d in sentence_dirs:
        wav_file_paths.append(os.path.join(ses_d, sent_d))
        wav_file_paths_p.append(os.path.join(ses_d, sent_d))

In [214]:
# 不要な.DS_Storeとかいうのが出てきたので削除
for path in wav_file_paths_p:
    if 'DS_Store' in path:
        wav_file_paths_p.remove(path)

In [216]:
# 1つ1つのwavファイルへのパスを取流
wav_file_paths = []
for path in wav_file_paths_p:
    filenames = sorted(os.listdir(path))
    for f in filenames:
        wav_file_paths.append(os.path.join(path, f))

In [218]:
# wav以外の拡張子のファイルが含まれてたら削除
for path in wav_file_paths:
    base, ext = os.path.splitext(path)
    if not ext == '.wav':
        wav_file_paths.remove(path)

In [189]:
# 拡張子を除いたverのファイル名も取っておく
filenames = [audeer.basename_wo_ext(p) for p in wav_file_paths]

## 1つ1つのファイルのラベルを作る

wavファイルそれぞれにつき，3人の評価者がラベル付をしている．
1. wavファイルそれぞれについてラベルファイルを参照し，3人のラベル付を取得する．話者の情報もこのときに取っておく.
1. HappinessとExcitedのラベルをマージする(感情価ValenceとActivationの値が近いため)
1. 多数決を取ってwavファイルに対するラベルを決定する
1. Happiness, Anger, Neutral, Sadnessだけ抽出する

#### 1: wavファイルそれぞれについてラベルファイルを参照し，3人のラベル付を取得する

In [221]:
# ラベル付けファイル1つ1つへのpathをとる
label_file_paths = []
for label_dir in label_dirs:
    label_files_ = sorted(os.listdir(label_dir))
    for f in label_files_:
        base, ext = os.path.splitext(f)
        if ext == '.txt':
            label_file_paths.append(os.path.join(label_dir, f))

In [223]:
# Ses01F_impro01という1つのトークダイアログに，e2, e3, e4という3人の評価者によるラベル付けがされている
# Ses01F_impro02という1つのトークダイアログに，e1, e3, e4という3人の評価者によるラベル付けがされている
label_file_paths[:6]

['../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Categorical/Ses01F_impro01_e2_cat.txt',
 '../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Categorical/Ses01F_impro01_e3_cat.txt',
 '../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Categorical/Ses01F_impro01_e4_cat.txt',
 '../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Categorical/Ses01F_impro02_e1_cat.txt',
 '../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Categorical/Ses01F_impro02_e3_cat.txt',
 '../../IEMOCAP_full_release/Session1/dialog/EmoEvaluation/Categorical/Ses01F_impro02_e4_cat.txt']

In [224]:
# 全てのラベルファイルを参照し，1つ1つのwavにラベルをつける
# 1つ1つの発話ののラベルはスピーチダイアログごとにtxtファイルを作って管理されている
# スピーチダイアログの名前とwavのペアレントディレクトリの名前は一致する

# 上位パスを排除して名前だけのwav_file_path_pを作る
wav_file_paths_pp = []

for path in wav_file_paths_p:
    wav_file_paths_pp.append(path[path.rfind('/') + 1: ])

In [225]:
# list内のkeyを含む要素を全て返す関数
def index_multi(lst, key):
    idxes = []
    for i in range(len(lst)):
        # print(f'examining {lst[i]} for key:{key}')
        if key in lst[i]:
            idxes.append(i)
    return idxes

In [226]:
# スピーチダイアログごとに評価ファイルを分けていく
# 1つのダイアログに複数の評価者がいる

label_file_indexes = []
for dirname in wav_file_paths_pp:
    idxes = index_multi(label_file_paths, dirname)
    label_file_indexes.append(idxes)

In [235]:
speaker_mapping = {
    'Ses01M' : 1,
    'Ses01F' : 2, 
    'Ses02M': 3,
    'Ses02F' :4,
    'Ses03M': 5,
    'Ses03F': 6,
    'Ses04M': 7,
    'Ses04F': 8,
    'Ses05M': 9,
    'Ses05F': 10
}

In [239]:
def identify_speaker(filename):
    print(filename)
    session = filename[:5]
    sex = filename[-4]
    return session + sex

In [244]:
# 各音声ファイルごとにラベルを作っていく
# 同じダイアログに対する複数の評価者のラベル付を同時に読み込んで多数決を行う
raw_labels = []
speakers = []
filenames = []
for i, indexes in enumerate(label_file_indexes):
    # ここで1ダイアログの処理
    # それぞれの評価ファイルのラベルを保存しておくグローバルリスト
    global_labels = []
    
    # それぞれの評価ファイルのラベルを読み込んでglobal_labelsに保存する
    for index in indexes: 
        # ここで1ファイルの処理
        file = open(label_file_paths[index], 'r')
        local_labels = []
        print(f'#{display_filename(wav_file_paths_pp[i])}: opening {display_filename(label_file_paths[index])}')
        for line in file:
            label = re.split('[:;()]', line)
            label = label[:-1]
            local_labels.append(label)
        global_labels.append(local_labels)
        
    # 1ダイアログに含まれる発話すべてとラベルをまとめて紐づける
    # raw_labelsは複数ラベルのまま使う，maj_labelsは多数決をとる
    for file_id in range(len(global_labels[0])):
        raw_label = [global_labels[i][file_id][1] for i in range(len(global_labels))]
        filename = global_labels[0][file_id][0][:-1]
        filenames.append(filename)
        speakers.append(speaker_mapping[identify_speaker(filename)])
        raw_labels.append(raw_label)

#Ses01F_impro01: opening Ses01F_impro01_e2_cat.txt
#Ses01F_impro01: opening Ses01F_impro01_e3_cat.txt
#Ses01F_impro01: opening Ses01F_impro01_e4_cat.txt
Ses01F_impro01_F000
Ses01F_impro01_F001
Ses01F_impro01_F002
Ses01F_impro01_F003
Ses01F_impro01_F004
Ses01F_impro01_F005
Ses01F_impro01_F006
Ses01F_impro01_F007
Ses01F_impro01_F008
Ses01F_impro01_F009
Ses01F_impro01_F010
Ses01F_impro01_F011
Ses01F_impro01_F012
Ses01F_impro01_F013
Ses01F_impro01_F014
Ses01F_impro01_F015
Ses01F_impro01_M000
Ses01F_impro01_M001
Ses01F_impro01_M002
Ses01F_impro01_M003
Ses01F_impro01_M004
Ses01F_impro01_M005
Ses01F_impro01_M006
Ses01F_impro01_M007
Ses01F_impro01_M008
Ses01F_impro01_M009
Ses01F_impro01_M010
Ses01F_impro01_M011
Ses01F_impro01_M012
Ses01F_impro01_M013
#Ses01F_impro02: opening Ses01F_impro02_e1_cat.txt
#Ses01F_impro02: opening Ses01F_impro02_e3_cat.txt
#Ses01F_impro02: opening Ses01F_impro02_e4_cat.txt
Ses01F_impro02_F000
Ses01F_impro02_F001
Ses01F_impro02_F002
Ses01F_impro02_F003
Ses01F_impro02

#### 2: ExcitedをHappinessにマージ

元論文で，ExicitedとHappinessはvalenceとactivationの値が近しいクラスであるためマージ

In [230]:
def exc_hap_marge(label):
    label_ = label.copy()
    for i in range(len(label_)):
        if label_[i] == 'Excited':
            label_[i] = 'Happiness'
    return label_

In [232]:
ex_hp_marged = [exc_hap_marge(l) for l in raw_labels]

In [233]:
# Neutral state をNeutralに表記変え
for label in ex_hp_marged:
    for i in range(len(label)):
        if label[i] == 'Neutral state':
            label[i] = 'Neutral'

#### 3: 3評価者間でラベル付が割れているファイルを除く

In [291]:
def unagreed1(label):
    return len(label) == len(collections.Counter(label))

def strictly_agreed(label):
    return len(collections.Counter(label)) == 1

In [303]:
agreed1 = [label for label in ex_hp_marged if not unagreed1(label)]
speakers1 = [speakers[i] for i in range(len(speakers)) if not unagreed1(ex_hp_marged[i])]
file_paths_1 = [wav_file_paths[i] for i in range(len(wav_file_paths)) if not unagreed1(ex_hp_marged[i])]
file_names1 = [filenames[i] for i in range(len(filenames)) if not unagreed1(ex_hp_marged[i])]
agreed2 = [label for label in ex_hp_marged if strictly_agreed(label)]
speakers2 = [speakers[i] for i in range(len(speakers)) if not strictly_agreed(ex_hp_marged[i])]
file_paths_2 = [wav_file_paths[i] for i in range(len(wav_file_paths)) if strictly_agreed(ex_hp_marged[i])]
file_names2 = [filenames[i] for i in range(len(filenames)) if not strictly_agreed(ex_hp_marged[i])]

#### 3: 多数決を取って1つのラベルを決定する

In [304]:
maj_labels1 = [mode(label) for label in agreed1]
maj_labels2 = [label[0] for label in agreed2]

In [305]:
df1 = pd.DataFrame(maj_labels1)
df2 = pd.DataFrame(maj_labels2)

In [306]:
df1[0].value_counts()

Happiness      2448
Frustration    2044
Neutral        1708
Sadness        1070
Anger          1054
Surprise         98
Fear             33
Other             3
Disgust           2
Name: 0, dtype: int64

In [307]:
df2[0].value_counts()

Happiness      1264
Frustration     557
Sadness         424
Neutral         390
Anger           330
Surprise          7
Fear              4
Name: 0, dtype: int64

#### 3: 必要な感情ラベルだけ取ってくる

今回はNeutral, Anger, Sadness, Happinessの4つだけ使うのでそのように選ぶ．

In [265]:
labels_necessary = ['Anger', 'Neutral', 'Sadness', 'Happiness']

In [312]:
def data_selection(file_paths, filenames, speakers, labels, necessary_labels):
    files_selected = []
    filenames_selected = []
    speakers_selected = []
    labels_selected = []
    
    for i in range(len(labels)):
        if labels[i] in labels_necessary:
            files_selected.append(file_paths[i])
            filenames_selected.append(filenames[i])
            speakers_selected.append(speakers[i])
            labels_selected.append(labels[i])
    return files_selected, filenames_selected, speakers_selected, labels_selected

In [313]:
files_selected1, filenames_selected1, speakers_selected1, labels_selected1 = \
    data_selection(file_paths_1, file_names1, speakers1, maj_labels1, labels_necessary)

In [314]:
files_selected2, filenames_selected2, speakers_selected2, labels_selected2 = \
    data_selection(file_paths_2, file_names2, speakers2, maj_labels2, labels_necessary)

In [327]:
df1 = pd.DataFrame(list(zip(filenames_selected1, speakers_selected1, labels_selected1)), columns=['filename', 'speaker_id', 'labels'])

In [328]:
df2 = pd.DataFrame(list(zip(filenames_selected2, speakers_selected2, labels_selected2)), columns=['filename', 'speaker_id', 'labels'])

In [330]:
df1['labels'].value_counts()

Happiness    2448
Neutral      1708
Sadness      1070
Anger        1054
Name: labels, dtype: int64

In [329]:
df2['labels'].value_counts()

Happiness    1264
Sadness       424
Neutral       390
Anger         330
Name: labels, dtype: int64

In [331]:
datas = []
for file in files_selected1:
    x, fs = librosa.load(file, sr=16000)
    datas.append([x, fs])

In [332]:
class dataset:
    def __init__(self):
        self.datas = datas
        self.speakers = speakers_selected1
        self.speaker_mapping = speaker_mapping
        self.labels = labels_selected1
        self.filenames = filenames_selected1

In [333]:
dataset = dataset()

In [334]:
dataset.filenames

['Ses01F_impro01_F000',
 'Ses01F_impro01_F001',
 'Ses01F_impro01_F002',
 'Ses01F_impro01_F005',
 'Ses01F_impro01_F012',
 'Ses01F_impro01_F014',
 'Ses01F_impro01_M013',
 'Ses01F_impro02_F000',
 'Ses01F_impro02_F001',
 'Ses01F_impro02_F002',
 'Ses01F_impro02_F003',
 'Ses01F_impro02_F004',
 'Ses01F_impro02_F005',
 'Ses01F_impro02_F006',
 'Ses01F_impro02_F010',
 'Ses01F_impro02_F012',
 'Ses01F_impro02_F013',
 'Ses01F_impro02_F014',
 'Ses01F_impro02_F015',
 'Ses01F_impro02_F016',
 'Ses01F_impro02_F017',
 'Ses01F_impro02_F018',
 'Ses01F_impro02_F019',
 'Ses01F_impro02_F020',
 'Ses01F_impro02_M009',
 'Ses01F_impro02_M010',
 'Ses01F_impro02_M011',
 'Ses01F_impro02_M012',
 'Ses01F_impro02_M013',
 'Ses01F_impro02_M014',
 'Ses01F_impro02_M015',
 'Ses01F_impro03_F000',
 'Ses01F_impro03_F001',
 'Ses01F_impro03_F002',
 'Ses01F_impro03_F003',
 'Ses01F_impro03_F004',
 'Ses01F_impro03_F005',
 'Ses01F_impro03_F006',
 'Ses01F_impro03_F007',
 'Ses01F_impro03_F008',
 'Ses01F_impro03_F009',
 'Ses01F_impro03