In [1]:
# Try for one file first
import librosa
import os
import soundfile as sf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as ms
from tqdm import tqdm
import pickle
import random

import IPython.display
import librosa.display
import time
import joblib
from joblib import Parallel, delayed
ms.use('seaborn-muted')
%matplotlib inline

  ms.use('seaborn-muted')


In [2]:
import pandas as pd
import math

labels_df = pd.read_csv('/home/arpitsah/Desktop/Projects Fall-22/DA/domain_adaptation/LSTM-DENSE/speech-emotion-recognition-iemocap/preprocess_info/df_iemocap.csv')
iemocap_dir = '/home/arpitsah/Desktop/Projects Fall-22/DA/domain_adaptation/LSTM-DENSE/speech-emotion-recognition-iemocap/data/IEMOCAP_full_release_FILES/IEMOCAP_full_release/'
save_dir = '/home/arpitsah/Desktop/Projects Fall-22/DA/domain_adaptation/LSTM-DENSE/speech-emotion-recognition-iemocap/preprocess_info/'
audio_vectors_path= save_dir + 'audio_vectors_'

In [3]:
sr = 22050

In [None]:

audio_vectors = {}
for sess in range(1, 6):
# for sess in range(4, 5):
    wav_file_path = '{}Session{}/dialog/wav/'.format(iemocap_dir, sess)
    orig_wav_files = os.listdir(wav_file_path)
    for orig_wav_file in tqdm(orig_wav_files):
        try:
            orig_wav_vector, _sr = librosa.load(wav_file_path + orig_wav_file, sr=sr)
            orig_wav_file, file_format = orig_wav_file.split('.')
            for index, row in labels_df[labels_df['wav_file'].str.contains(orig_wav_file)].iterrows():
                start_time, end_time, truncated_wav_file_name, emotion, val, act, dom = row['start_time'], row['end_time'], row['wav_file'], row['emotion'], row['val'], row['act'], row['dom']
                start_frame = math.floor(start_time * sr)
                end_frame = math.floor(end_time * sr)
                truncated_wav_vector = orig_wav_vector[start_frame:end_frame + 1]
                audio_vectors[truncated_wav_file_name] = truncated_wav_vector
        except:
            print('An exception occured for {}'.format(orig_wav_file))
    with open(save_dir + 'audio_vectors_{}.pkl'.format(sess), 'wb') as f:
        pickle.dump(audio_vectors, f)

In [4]:
def features(X, sample_rate):
    
    stft = np.abs(librosa.stft(X))
    
    pitches, magnitudes = librosa.piptrack(X, sr=sample_rate, S=stft, fmin=70, fmax=400)
    pitch = []
    for i in range(magnitudes.shape[1]):
        index = magnitudes[:, 1].argmax()
        pitch.append(pitches[index, i])
        
    pitch_tuning_offset = librosa.pitch_tuning(pitches)
    pitchmean = np.mean(pitch)
    pitchstd = np.std(pitch)
    pitchmax = np.max(pitch)
    pitchmin = np.min(pitch)
    
    cent = librosa.feature.spectral_centroid(y=X, sr=sample_rate)
    cent = cent / np.sum(cent)
    meancent = np.mean(cent)
    stdcent = np.std(cent)
    maxcent = np.max(cent)
    
    flatness = np.mean(librosa.feature.spectral_flatness(y=X))

    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=12).T, axis=0)
    
    mfccsstd = np.std(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=12).T, axis=0)
    
    mfccmax = np.max(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=12).T, axis=0)    
    
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)    

    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)    

    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)    

    zerocr = np.mean(librosa.feature.zero_crossing_rate(X))

    rms = librosa.feature.rms(X + 0.0001)[0]
    meanrms = np.mean(rms)
    stdrms = np.std(rms)
    maxrms = np.max(rms)
    
    y_harmonic = np.mean(librosa.effects.hpss(X)[0])
    sig_mean = np.mean(abs(X))
    sig_std = np.std(X)

    ext_features = np.array([
        flatness, zerocr, meancent, stdcent,
        maxcent, pitchmean, pitchmax, pitchmin, pitchstd,
        pitch_tuning_offset, meanrms, maxrms, stdrms, y_harmonic, sig_mean, sig_std])

    
    ext_features = np.concatenate((ext_features, mfccs, mfccsstd, mfccmax, chroma, mel, contrast))
    
    return ext_features

In [5]:
columns = ['wav_file', 'label', "flatness", "zerocr", "meancent", "stdcent", "maxcent", "pitchmean", 
           "pitchmax", "pitchmin", "pitchstd", "pitch_tuning_offset", "meanrms", "maxrms", "stdrms", 
           "y_harmonic", "sig_mean", "sig_std"]
columns_new = columns + [(lambda x: "mfcc_" + str(x))(x) for x in range(1, 13)] + [(lambda x: "mfccstd_" + str(x))(x) for x in range(1, 13)] + [(lambda x: "mfccmax_" + str(x))(x) for x in range(1, 13)] + [(lambda x: "chroma" + str(x))(x) for x in range(1, 13)] + [(lambda x: "mel" + str(x))(x) for x in range(1, 129)] + [(lambda x: "contrast" + str(x))(x) for x in range(1, 8)]
        
# columns_new

In [6]:
df_features = pd.DataFrame(columns=columns_new)

In [7]:
emotion_dict = {'ang': 0,
                'hap': 1,
                'exc': 2,
                'sad': 3,
                'fru': 4,
                'fea': 5,
                'sur': 6,
                'neu': 7,
                'xxx': 8,
                'oth': 8,
                'dis': 8}

In [8]:
labels_df.emotion.value_counts()

xxx    2506
fru    1849
neu    1708
ang    1103
sad    1084
exc    1041
hap     595
sur     107
fea      40
oth       3
dis       2
Name: emotion, dtype: int64

In [9]:
for sess in range(1, 6):
    audio_vectors = pickle.load(open('{}{}.pkl'.format(audio_vectors_path, sess), 'rb'))
    for index, row in tqdm(labels_df[labels_df['wav_file'].str.contains('Ses0{}'.format(sess))].iterrows()):

        wav_file_name = row['wav_file']
    
        label = emotion_dict[row['emotion']]
        y = audio_vectors[wav_file_name]

        features_all = list(features(y, sr))
        
        feature_list = [wav_file_name, label] + features_all
        
        df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
        
    print("Session Finished {}".format(sess))

  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ig

Session Finished 1


  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ig

Session Finished 2


  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ig

Session Finished 3


  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ig

Session Finished 4


  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ignore_index=True)
  df_features = df_features.append(pd.DataFrame(feature_list, index=columns_new).transpose(), ig

ValueError: can't extend empty axis 0 using modes other than 'constant' or 'empty'

In [11]:
df_features.to_csv('/home/arpitsah/Desktop/Projects Fall-22/DA/domain_adaptation/LSTM-DENSE/speech-emotion-recognition-iemocap/preprocess_info/audio_features.csv', index=False)

In [13]:
print(wav_file_name)
print(df_features.shape)

Ses05M_script01_1b_F037
(8014, 201)
