### Prepocesamiento y preparación del dataset.

En primer lugar los audios se convierten a formato mp3 para que estén igual a las grabaciones del call center. Después se aplica un filtro de 300 a 3300 Hz porque es el rango de frecuencias en el que trabajan los codecs de audio más utilizados para telefonía IP. Por último, a los audios se les calcula el los features.
Los casos positivos se incrementan agregando ruido y desplazándolos en tiempo.

#### Imports

In [5]:
import os
import glob
import random
import numpy as np
import statistics
import librosa
from sklearn.model_selection import train_test_split
from database import Database
from audio_process import Augmenter, TimeStretcher, PitchShifter, Reverberator, Distorter, feature_extraction
import re


#### Armado del dataset completo

- Se eliminan los audios de menos de 1 segundo

In [2]:

def process_dataset(collection_name, directory, name, n_mfcc, n_start, augment=True ):
    db = Database(collection_name)
    f0min = 300
    f0max = 3300
    directory = directory + '*.mp3'
    print(directory)
    files = glob.glob(directory)
    anger = 0
    files = files[n_start :]
    try:
        for n, path in enumerate(files):
            if n % 1000 == 0:
                print(n)

            file_name = os.path.basename(path)
            file_name = file_name.replace("-", "_")
            in_db = db.select_by_id(file_name, like=True)
            if in_db:
                continue
            emotion = file_name.split('_')[2]
            file_name = f'{name}_{file_name}'
            if emotion == '05' or emotion == 'anger.mp3' or emotion =='ANG':
                emotion = 1
                anger = anger + 1
            else:
                emotion = 0
            
            audio, sr = librosa.load_mp3(path)
            if len(list(audio))/sr < 0.5:
                    continue
            features = feature_extraction(audio, sr, f0min, f0max, n_mfcc, unit="Hertz" )
            if features != 'skip':
                 db.post(file_name, emotion, features, augmentation='')
            processors = [Distorter(), TimeStretcher(), PitchShifter(), Reverberator()]
            names = ['clipping', 'time_stretch', 'pitch_shift', 'reverb']
            augmenter = Augmenter(processors)
            augmented_audios = augmenter.augment(audio, sr)
            for audio, aug in zip(augmented_audios, names):
                features = feature_extraction(audio, sr, f0min, f0max, n_mfcc, unit="Hertz" )
                if features != 'skip':
                    db.post(file_name, emotion, features, augmentation=aug)
                else:
                    print(features)
    except Exception as e:
        print(path)
        raise(e)


In [3]:
Meld = "Data/Meld/*/"
Enterface = "Data/Enterface/*/"
Crema = "Data/CREMA-D/"
IEMOCAP = "Data/IEMOCAP/"
datasets= [Meld, Enterface, Crema, IEMOCAP]
names = ['Meld', 'Enterface', 'Crema', 'IEMOCAP']
process_dataset('IEMOCAP', 'IEMOCAP/', 'iemocap', 16, 0 )


IEMOCAP/*.mp3


In [14]:
db = Database('Meld')
db.print_balance()

positives_no_aug: 1074
negatives_no_aug: 8608
postives_aug: 4296
negatives_aug: 34428
Total_positives: 5370
Total_negatives: 43036


In [6]:
# db = Database("Meld")
# rgx = re.compile("Enterface_.._.._05.*")
# result = db.collection.find({"_id": rgx})
# for r in result:
#     id = r["_id"]
#     r["label"] = 1
#     db.collection.update_one({"_id": id}, {"$set": r})
# rgx = re.compile("Enterface.*")
# result = db.collection.find({"_id": rgx, "label": 1})
# print(len(list(result)))

rgx = re.compile("Meld_.*_anger.*")
result = db.collection.find({"_id": rgx})
for r in result:
    id = r["_id"]
    r["label"] = 1
    db.collection.update_one({"_id": id}, {"$set": r})
rgx = re.compile("Meld.*")
result = db.collection.find({"_id": rgx, "label": 1})
print(len(list(result)))

# rgx = re.compile("Crema_.*_ANG.*")
# result = db.collection.find({"_id": rgx})
# for r in result:
#     id = r["_id"]
#     r["label"] = 1
#     db.collection.update_one({"_id": id}, {"$set": r})
# rgx = re.compile("Crema.*")
# result = db.collection.find({"_id": rgx, "label": 1})
# print(len(list(result)))




0


In [None]:
iemocap_Ses01M_script03_2_F031_ang.mp3_

In [9]:
db = Database('IEMOCAP')
rgx = re.compile("iemocap_.*_ang.*")
result = db.collection.find({"_id": rgx})
for r in result:
    id = r["_id"]
    r["label"] = 1
    db.collection.update_one({"_id": id}, {"$set": r})
rgx = re.compile("IEMOCAP.*")
result = db.collection.find({"_id": rgx, "label": 1})
print(len(list(result)))

0


In [16]:
db.print_balance()

positives_no_aug: 1074
negatives_no_aug: 8608
postives_aug: 4296
negatives_aug: 34428
Total_positives: 5370
Total_negatives: 43036


In [15]:
l = [{field: np.nan} for field in db.feature_names]
r = {"$or": l}
db.collection.delete_many(r)

<pymongo.results.DeleteResult at 0x7fefc3a53d40>

In [12]:
rgx = re.compile(".*xxx.*")
db.collection.delete_many({"_id": rgx})

<pymongo.results.DeleteResult at 0x7fefa3d725c0>

In [19]:


db = Database('IEMOCAP')

results = list(db.collection.find({"augmented": False}))
n = int(len(results) * 0.3)
n_sample = n if n % 2 == 0 else n - 1

test_val = np.random.choice(results, n_sample, False)

test = list(test_val[0 : int(len(test_val) / 2)])
val = list(test_val[int(len(test_val) / 2) :])
db_val = Database(f"{db.COLLECTION}_validation")
db_test = Database(f"{db.COLLECTION}_test")
db_val.collection.insert_many(val)
db_test.collection.insert_many(test)
for doc in test_val:
    rgx = re.compile(f'.*{doc}.*')
    db.collection.delete_many({"_id": rgx})

db = Database('Meld')

results = list(db.collection.find({"augmented": False}))
n = int(len(results) * 0.3)
n_sample = n if n % 2 == 0 else n - 1

test_val = np.random.choice(results, n_sample, False)

test = list(test_val[0 : int(len(test_val) / 2)])
val = list(test_val[int(len(test_val) / 2) :])
db_val = Database(f"{db.COLLECTION}_validation")
db_test = Database(f"{db.COLLECTION}_test")
db_val.collection.insert_many(val)
db_test.collection.insert_many(test)
for doc in test_val:
    rgx = re.compile(f'.*{doc}.*')
    db.collection.delete_many({"_id": rgx})