## Building Audio Vectors

We use the df file to split wav files into multiple frames

Let's try one file first

In [1]:
# Try for one file first
import librosa
import os
import soundfile as sf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as ms
from tqdm import tqdm
import pickle

import IPython.display
import librosa.display
ms.use('seaborn-muted')
%matplotlib inline

We see the audio signal values.

In [2]:
file_path = 'IEMOCAP_Extracted/IEMOCAP_full_release/Session1/dialog/wav/Ses01F_impro01.wav'

y, sr = librosa.load(file_path, sr=44100)
y, sr

(array([ 0.42572615,  0.48587543,  0.37312022, ..., -0.31514615,
        -0.16263676,  0.        ], dtype=float32),
 44100)

In [3]:
import pandas as pd
import math

labels_df = pd.read_csv('df_iemocap.csv')
iemocap_dir = 'IEMOCAP_Extracted/IEMOCAP_full_release/'

In [4]:
sr = 44100

audio_vectors = {}

for sess in range(1,6):  # using one session due to memory constraint, can replace [5] with range(1, 6)
    wav_file_path = '{}Session{}/dialog/wav/'.format(iemocap_dir, sess)
    orig_wav_files = os.listdir(wav_file_path)
    for orig_wav_file in tqdm(orig_wav_files):
        if orig_wav_file == ".ipynb_checkpoints":
            continue
        elif orig_wav_file[7] == 's':
            continue
        try:
            orig_wav_vector, _sr = librosa.load(wav_file_path + orig_wav_file, sr=sr)
            orig_wav_file, file_format = orig_wav_file.split('.')
            for index, row in labels_df[labels_df['wav_file'].str.contains(orig_wav_file)].iterrows():
                start_time, end_time, truncated_wav_file_name, emotion, val, act, dom = row['start_time'], row['end_time'], row['wav_file'], row['emotion'], row['val'], row['act'], row['dom']
                start_frame = math.floor(start_time * sr)
                end_frame = math.floor(end_time * sr)
                truncated_wav_vector = orig_wav_vector[start_frame:end_frame + 1]
                audio_vectors[truncated_wav_file_name] = truncated_wav_vector
        except:
            print('An exception occured for {}'.format(orig_wav_file))
    with open('IEMOCAP_Data/PreProcessed/AudioVectors/audio_vectors_{}.pkl'.format(sess), 'wb') as f:
        pickle.dump(audio_vectors, f)

100%|██████████| 28/28 [03:42<00:00, 10.69s/it]
100%|██████████| 31/31 [03:43<00:00,  7.22s/it]
100%|██████████| 32/32 [03:52<00:00,  7.28s/it]
100%|██████████| 30/30 [04:23<00:00, 11.66s/it]
100%|██████████| 31/31 [04:05<00:00, 10.36s/it]
