# IEMOCAPをロードする

- IEMOCAPのロードをするプログラム
- 要求: 2個上の階層にIEMOCAP_full_releaseを配置する
- return: wavファイルとラベルのペア
  
※まだ途中

In [38]:
import os
import re
from statistics import mode
import collections
import librosa
import math

## 前準備

#### パスの定義

In [31]:
# 上位パスを排除してファイル名だけ見せる関数
def display_filename(filename, path_splitter='/'):
    return filename[filename.rfind(path_splitter) + 1:]

In [4]:
# wavファイルがセッションごとに整理されてるディレクトリへのパス
session_dirs = ['../../IEMOCAP_full_release/' + 'Session' + str(i) + '/' + 'sentences/wav/' for i in range(1, 6)]
# ラベルがセッションごとに整理されてるディレクトリへのパス
label_dirs = ['../../IEMOCAP_full_release/' + 'Session' + str(i) + '/dialog/EmoEvaluation/Categorical/' for i in range(1, 6)]

#### 1つ1つのwavファイルへのパスを取る

In [5]:
# 変数定義，ペアレントディレクトリへのパスを取っておく
wav_file_paths = []
wav_file_paths_p = []
for ses_d in session_dirs:
    sentence_dirs = sorted(os.listdir(ses_d))
    for sent_d in sentence_dirs:
        wav_file_paths.append(os.path.join(ses_d, sent_d))
        wav_file_paths_p.append(os.path.join(ses_d, sent_d))

In [6]:
# 不要な.DS_Storeとかいうのが出てきたので削除
for path in wav_file_paths:
    if 'DS_Store' in path:
        wav_file_paths.remove(path)
        wav_file_paths_p.remove(path)

In [7]:
# 1つ1つのwavファイルへのパスを取る
wav_file_paths_ = []
for path in wav_file_paths:
    filenames = sorted(os.listdir(path))
    for f in filenames:
        wav_file_paths_.append(os.path.join(path, f))

In [8]:
wav_file_paths_

['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F000.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F001.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F002.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F003.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F004.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F005.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F006.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F007.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F008.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F009.wav',
 '../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses

In [9]:
# 定義した変数にpathをコピーして完成
wav_file_paths = wav_file_paths_.copy()

In [10]:
# wav以外の拡張子のファイルが含まれてたら削除
for path in wav_file_paths:
    base, ext = os.path.splitext(path)
    if not ext == '.wav':
        wav_file_paths.remove(path)

## [ファイルのパス, ラベル]のペアを作る

音声ファイルそれぞれにつき，3~4個のラベル付けがされている．各ファイルについて全てのラベル付けを参照し，多数決でラベルを決定する．

#### 前準備

In [12]:
# ラベル付ファイル1つ1つへのpathをとる
label_file_paths = []
for label_dir in label_dirs:
    label_files_ = sorted(os.listdir(label_dir))
    for f in label_files_:
        base, ext = os.path.splitext(f)
        if ext == '.txt':
            label_file_paths.append(os.path.join(label_dir, f))

In [18]:
# 全てのラベルファイルを参照し，1つ1つのwavにラベルをつける
# 1つ1つの発話ののラベルはスピーチダイアログごとにtxtファイルを作って管理されている
# スピーチダイアログの名前とwavのペアレントディレクトリの名前は一致する

# 上位パスを排除して名前だけのwav_file_path_pを作る
wav_file_paths_pp = []

for path in wav_file_paths_p:
    wav_file_paths_pp.append(path[path.rfind('/') + 1: ])

In [15]:
wav_file_paths_pp

['Ses01F_impro01',
 'Ses01F_impro02',
 'Ses01F_impro03',
 'Ses01F_impro04',
 'Ses01F_impro05',
 'Ses01F_impro06',
 'Ses01F_impro07',
 'Ses01F_script01_1',
 'Ses01F_script01_2',
 'Ses01F_script01_3',
 'Ses01F_script02_1',
 'Ses01F_script02_2',
 'Ses01F_script03_1',
 'Ses01F_script03_2',
 'Ses01M_impro01',
 'Ses01M_impro02',
 'Ses01M_impro03',
 'Ses01M_impro04',
 'Ses01M_impro05',
 'Ses01M_impro06',
 'Ses01M_impro07',
 'Ses01M_script01_1',
 'Ses01M_script01_2',
 'Ses01M_script01_3',
 'Ses01M_script02_1',
 'Ses01M_script02_2',
 'Ses01M_script03_1',
 'Ses01M_script03_2',
 'Ses02F_impro01',
 'Ses02F_impro02',
 'Ses02F_impro03',
 'Ses02F_impro04',
 'Ses02F_impro05',
 'Ses02F_impro06',
 'Ses02F_impro07',
 'Ses02F_impro08',
 'Ses02F_script01_1',
 'Ses02F_script01_2',
 'Ses02F_script01_3',
 'Ses02F_script02_1',
 'Ses02F_script02_2',
 'Ses02F_script03_1',
 'Ses02F_script03_2',
 'Ses02M_impro01',
 'Ses02M_impro02',
 'Ses02M_impro03',
 'Ses02M_impro04',
 'Ses02M_impro05',
 'Ses02M_impro06',
 'Ses0

In [16]:
# list内のkeyを含む要素を全て返す関数
def index_multi(lst, key):
    idxes = []
    for i in range(len(lst)):
        # print(f'examining {lst[i]} for key:{key}')
        if key in lst[i]:
            idxes.append(i)
    return idxes

In [19]:
# スピーチダイアログごとに評価ファイルを分けていく
# 1つのダイアログに複数の評価者がいる

label_file_indexes = []
for dirname in wav_file_paths_pp:
    idxes = index_multi(label_file_paths, dirname)
    label_file_indexes.append(idxes)

#### メインの処理

発話1つ1つに対する複数の評価者のラベル付を参照して，多数決を取ってその発話のラベルを決める．[ファイル名，ラベル]のペアにする．

In [21]:
# 各音声ファイルごとにラベルを作っていく
# 同じダイアログに対する複数の評価者のラベル付を同時に読み込んで多数決を行う
labels = []
for i, indexes in enumerate(label_file_indexes):
    # ここで1ダイアログの処理
    # それぞれの評価ファイルのラベルを保存しておくグローバルリスト
    global_labels = []
    
    # それぞれの評価ファイルのラベルを読み込んでglobal_labelsに保存する
    for index in indexes: 
        # ここで1ファイルの処理
        file = open(label_file_paths[index], 'r')
        local_labels = []
        print(f'#{display_filename(wav_file_paths_pp[i])}: opening {display_filename(label_file_paths[index])}')
        for line in file:
            label = re.split('[:;()]', line)
            label = label[:-1]
            local_labels.append(label)
        global_labels.append(local_labels)
        
    # global_labelsの中身から多数決でそのファイルに対するラベルを決定する
    for file_id in range(len(global_labels[0])):
        file_label = mode([global_labels[i][file_id][1] for i in range(len(global_labels))])
        labels.append([global_labels[0][file_id][0][:-1], file_label])
        

#Ses01F_impro01: opening Ses01F_impro01_e2_cat.txt
#Ses01F_impro01: opening Ses01F_impro01_e3_cat.txt
#Ses01F_impro01: opening Ses01F_impro01_e4_cat.txt
#Ses01F_impro02: opening Ses01F_impro02_e1_cat.txt
#Ses01F_impro02: opening Ses01F_impro02_e3_cat.txt
#Ses01F_impro02: opening Ses01F_impro02_e4_cat.txt
#Ses01F_impro03: opening Ses01F_impro03_e1_cat.txt
#Ses01F_impro03: opening Ses01F_impro03_e2_cat.txt
#Ses01F_impro03: opening Ses01F_impro03_e4_cat.txt
#Ses01F_impro04: opening Ses01F_impro04_e1_cat.txt
#Ses01F_impro04: opening Ses01F_impro04_e2_cat.txt
#Ses01F_impro04: opening Ses01F_impro04_e4_cat.txt
#Ses01F_impro05: opening Ses01F_impro05_e1_cat.txt
#Ses01F_impro05: opening Ses01F_impro05_e3_cat.txt
#Ses01F_impro05: opening Ses01F_impro05_e4_cat.txt
#Ses01F_impro06: opening Ses01F_impro06_e1_cat.txt
#Ses01F_impro06: opening Ses01F_impro06_e2_cat.txt
#Ses01F_impro06: opening Ses01F_impro06_e4_cat.txt
#Ses01F_impro07: opening Ses01F_impro07_e1_cat.txt
#Ses01F_impro07: opening Ses01F

#Ses04M_impro07: opening Ses04M_impro07_e4_cat.txt
#Ses04M_impro08: opening Ses04M_impro08_e1_cat.txt
#Ses04M_impro08: opening Ses04M_impro08_e2_cat.txt
#Ses04M_impro08: opening Ses04M_impro08_e4_cat.txt
#Ses04M_script01_1: opening Ses04M_script01_1_e1_cat.txt
#Ses04M_script01_1: opening Ses04M_script01_1_e2_cat.txt
#Ses04M_script01_1: opening Ses04M_script01_1_e4_cat.txt
#Ses04M_script01_2: opening Ses04M_script01_2_e1_cat.txt
#Ses04M_script01_2: opening Ses04M_script01_2_e2_cat.txt
#Ses04M_script01_2: opening Ses04M_script01_2_e4_cat.txt
#Ses04M_script01_3: opening Ses04M_script01_3_e1_cat.txt
#Ses04M_script01_3: opening Ses04M_script01_3_e2_cat.txt
#Ses04M_script01_3: opening Ses04M_script01_3_e4_cat.txt
#Ses04M_script02_1: opening Ses04M_script02_1_e1_cat.txt
#Ses04M_script02_1: opening Ses04M_script02_1_e2_cat.txt
#Ses04M_script02_1: opening Ses04M_script02_1_e6_cat.txt
#Ses04M_script02_2: opening Ses04M_script02_2_e1_cat.txt
#Ses04M_script02_2: opening Ses04M_script02_2_e2_cat.tx

In [335]:
labels

[['Ses01F_impro01_F000', 'Neutral state'],
 ['Ses01F_impro01_F001', 'Neutral state'],
 ['Ses01F_impro01_F002', 'Neutral state'],
 ['Ses01F_impro01_F003', 'Neutral state'],
 ['Ses01F_impro01_F004', 'Neutral state'],
 ['Ses01F_impro01_F005', 'Neutral state'],
 ['Ses01F_impro01_F006', 'Frustration'],
 ['Ses01F_impro01_F007', 'Frustration'],
 ['Ses01F_impro01_F008', 'Frustration'],
 ['Ses01F_impro01_F009', 'Frustration'],
 ['Ses01F_impro01_F010', 'Frustration'],
 ['Ses01F_impro01_F011', 'Frustration'],
 ['Ses01F_impro01_F012', 'Anger'],
 ['Ses01F_impro01_F013', 'Frustration'],
 ['Ses01F_impro01_F014', 'Neutral state'],
 ['Ses01F_impro01_F015', 'Frustration'],
 ['Ses01F_impro01_M000', 'Frustration'],
 ['Ses01F_impro01_M001', 'Frustration'],
 ['Ses01F_impro01_M002', 'Frustration'],
 ['Ses01F_impro01_M003', 'Frustration'],
 ['Ses01F_impro01_M004', 'Frustration'],
 ['Ses01F_impro01_M005', 'Frustration'],
 ['Ses01F_impro01_M006', 'Frustration'],
 ['Ses01F_impro01_M007', 'Frustration'],
 ['Ses01

#### 後処理

In [32]:
# ファイル名を絶対パスに置換する
labels_ = [[wav_file_paths[i], labels[i][1]] for i in range(len(labels))]

In [33]:
# Neutral state をNeutralに表記変え
for label in labels_:
    if label[1] == 'Neutral state':
        label[1] = label[1].replace('Neutral state', 'Neutral')

#### 必要な感情ラベルだけ取ってくる

今回はNeutral, Anger, Sadness, Happinessの4つだけ使うのでそのように選ぶ．

In [34]:
data_selected = [label for label in labels_ if label[1] == 'Neutral' or label[1] == 'Anger' or label[1] == 'Sadness' or label[1] == 'Happiness']

In [29]:
data_selected

[['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F000.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F001.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F002.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F003.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F004.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F005.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F012.wav',
  'Anger'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_F014.wav',
  'Neutral'],
 ['../../IEMOCAP_full_release/Session1/sentences/wav/Ses01F_impro01/Ses01F_impro01_M011.wav',
  'Anger'],
 ['../../IEMOCAP_full_release/Se

In [346]:
len(labels_selected)

4801

## [wav, ラベル]のペアにする

In [39]:
ret = []
for data in data_selected:
    x, fs = librosa.load(data[0], sr=16000)
    label = data[1]
    ret.append([x, label, fs])

In [41]:
for r in ret:
    print(len(r[0]))

31129
22119
50084
23960
54628
64479
44000
81120
115839
96479
149299
46880
91576
90440
92319
40960
67440
62189
69280
68000
53364
48488
98007
118400
57406
35039
106479
48800
48950
72479
92733
79199
58760
67511
51519
74881
75680
73840
20000
45760
55039
41119
60319
41306
101440
42720
27199
51279
36559
82079
32839
47720
46760
22719
22079
87360
36039
13479
70440
62880
96159
35519
32959
33440
44039
21119
78560
62880
62640
44760
34880
28880
76640
73920
42880
59039
35519
70312
70482
38553
56904
22139
93606
67789
23840
120479
32159
50559
50959
70400
46399
93520
62651
26720
52159
28800
74279
53440
48575
19039
17919
42791
28000
60800
37542
54443
122663
110399
47760
44960
97039
20960
30720
155680
36808
73559
35039
51200
38400
42600
19839
45787
43520
26720
33919
81919
52960
54079
247920
62560
149840
38320
85999
70959
76159
54199
30720
22079
49759
23800
74000
33880
25279
19680
17759
23679
24319
64959
27794
93640
29119
93600
97759
324639
44159
23039
23199
48360
131799
102320
24479
21280
35680
116960
1