# Data preparation

Features
- Modality (01 = full-AV, 02 = video-only, 03 = audio-only).
- Vocal channel (01 = speech, 02 = song).
- Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).
- Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
- Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
- Repetition (01 = 1st repetition, 02 = 2nd repetition).
- Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

NB We considered only the speech videos (vocal channel=01) with both audio and video (modality=01)

In [1]:
emotions = {1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}
emotional_intensity = {1:'normal', 2:'strong'}

In [2]:
import re
import os
import pandas as pd
import cv2
import random
import numpy as np
from tensorflow import keras
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from tqdm import tqdm

2023-06-13 23:48:46.698475: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
path = "Datasets/AUDIO/"

In [4]:
filenames = []
feats = []
labels = []
paths = []

for (dirpath, dirnames, fn) in os.walk(path):
    for name in fn:
        filename = name.split('.')[0]
        feat = filename.split('-')[2:]
        label = feat[0]
        filenames.append(filename)
        feats.append(feat)
        labels.append(label)
        paths.append(dirpath + '/' + filename)
        
filenames[:5]

['01-01-04-01-02-02-20',
 '01-01-04-02-02-02-20',
 '01-01-05-02-01-02-20',
 '02-01-05-02-01-02-20',
 '02-01-05-02-01-02-20']

## Data Exploration

In [5]:
df = pd.DataFrame(feats, columns = ['emotion', 'emotional intensity', 'statement', 'repetition', 'actor']).astype(int)

df['emotion'] = df['emotion'].map(emotions)
df['emotional intensity'] = df['emotional intensity'].map(emotional_intensity)

df['index'] = filenames
df.set_index('index', inplace=True)

df

Unnamed: 0_level_0,emotion,emotional intensity,statement,repetition,actor
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
01-01-04-01-02-02-20,sad,normal,2,2,20
01-01-04-02-02-02-20,sad,strong,2,2,20
01-01-05-02-01-02-20,angry,strong,1,2,20
02-01-05-02-01-02-20,angry,strong,1,2,20
02-01-05-02-01-02-20,angry,strong,1,2,20
...,...,...,...,...,...
02-01-07-01-01-02-23,disgust,normal,1,2,23
01-01-02-02-02-01-23,calm,strong,2,1,23
01-01-03-02-01-02-23,happy,strong,1,2,23
02-01-08-02-02-02-23,surprise,strong,2,2,23


## Export frames

- one frame every skip=3 starting from the 21th frame
- proportional resize to obtain height=224
- saved as png with and name videoname_iframe

### 398x224 normal

In [8]:
import time
def prepare_all_videos(filenames, paths, skip=1):
    nframes_tot = 0
    
    for count, video in tqdm(enumerate(zip(filenames, paths)), desc='framing progress', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'):
        # Gather all its frames
        # save_frames(video[0], video[1], video[1].replace('RAVDESS', 'RAVDESS_frames'), skip)
        # print(f"Processed videos {count+1}/{len(paths)}")
        time.sleep(5)
    return


def save_frames(filename, input_path, output_path, skip):
    # Initialize video reader
    cap = cv2.VideoCapture(input_path + '.mp4')
    frames = []
    count = 0
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    try:
        # Loop through all frames
        while True:
            # Capture frame
            ret, frame = cap.read()
            if (count % skip == 0 and count > 20):
                #print(frame.shape)
                if not ret:
                    break
                frame = cv2.resize(frame, (398, 224))
                cv2.imwrite(output_path + '/' + f'{filename}_{count}' + '.png', frame)
            count += 1
    finally:
        cap.release()
    return

In [None]:
def prepare_all_videos(filenames, paths, skip=1):
    
    for count, video in enumerate(zip(filenames, paths)):
        # Gather all its frames
        filename, input_path, output_path = video[0], video[1], video[1].replace('RAVDESS', 'RAVDESS_frames')
        
        cap = cv2.VideoCapture(input_path + '.mp4')
        count = 0
        
        if not os.path.exists(output_path):
            os.makedirs(output_path)
            
        try:
        # Loop through all frames
            while True:
                # Capture frame
                ret, frame = cap.read()
                if (count % skip == 0 and count > 20):
                    #print(frame.shape)
                    if not ret:
                        break
                    frame = cv2.resize(frame, (398, 224))
                    cv2.imwrite(os.path.join(output_path, f'{filename}_{count}.png'), frame)
                count += 1
        finally:
            cap.release()
        print(f"Processed videos {count+1}/{len(paths)}")


In [9]:
prepare_all_videos(filenames, paths, skip=3)

framing progress: |          | 5/? [00:25<00:00,  5.01s/it]


KeyboardInterrupt: 

### 224x224 black background

In [8]:
def prepare_all_videos(filenames, paths, skip=1):
    nframes_tot = 0
    
    for count, video in enumerate(zip(filenames, paths)):
        # Gather all its frames
        filename, input_path, output_path = video[0], video[1], video[1].replace('RAVDESS', 'RAVDESS_frames_black')
        
        cap = cv2.VideoCapture(input_path + '.mp4')
        count = 0
        
        if not os.path.exists(output_path):
            os.makedirs(output_path)
            
        try:
        # Loop through all frames
            while True:
                # Capture frame
                ret, frame = cap.read()
                if (count % skip == 0 and count > 20):
                    #print(frame.shape)
                    if not ret:
                        break
                    #####
                    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)                  # background from white to black
                    ret, thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
                    frame[thresh == 255] = 0
                    #####
                    frame = cv2.resize(frame, (398, 224))
                    frame = frame[0:224, 87:311]
                    cv2.imwrite(os.path.join(output_path, f'{filename}_{count}.png'), frame)
                count += 1
        finally:
            cap.release()
        
        print(f"Processed videos {count+1}/{len(paths)}")



def save_frames(filename, input_path, output_path, skip):
    # Initialize video reader
    cap = cv2.VideoCapture(input_path + '.mp4')
    frames = []
    count = 0
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    try:
        # Loop through all frames
        while True:
            # Capture frame
            ret, frame = cap.read()
            if (count % skip == 0 and count > 20):
                #print(frame.shape)
                if not ret:
                    break
                #####
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)                  # background from white to black
                ret, thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
                frame[thresh == 255] = 0
                #####
                frame = cv2.resize(frame, (398, 224))
                frame = frame[0:224, 87:311]
                cv2.imwrite(output_path + '/' + f'{filename}_{count}' + '.png', frame)
            count += 1
    finally:
        cap.release()
    return

In [None]:
prepare_all_videos(filenames, paths, skip=3)

### 224x224 only faces BW

In [6]:
def prepare_all_videos(filenames, paths, skip=1):
    nframes_tot = 0
    
    for count, video in enumerate(zip(filenames, paths)):
        # Gather all its frames
        save_frames(video[0], video[1], video[1], skip)
        print(f"Processed videos {count+1}/{len(paths)}")
    return


def save_frames(filename, input_path, output_path, skip):
    # Initialize video reader
    cap = cv2.VideoCapture(input_path + '.mp4')
    haar_cascade = cv2.CascadeClassifier('./Other/haarcascade_frontalface_default.xml')
    frames = []
    count = 0
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    try:
        # Loop through all frames
        while True:
            # Capture frame
            ret, frame = cap.read()
            if os.path.exists(output_path + '/' + f'{filename}_{count}' + '.png'):
                count += 1
                continue
            if (count % skip == 0 and count > 20):
                #print(frame.shape)
                if not ret:
                    break
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = haar_cascade.detectMultiScale(frame, scaleFactor=1.12, minNeighbors=9)
                # if len(faces) != 1:
                    
                if len(faces) == 0:
                    faces = haar_cascade.detectMultiScale(frame, scaleFactor=1.02, minNeighbors=9)
                    if len(faces) == 0:
                        print(f"error {filename} {count} 발생!")
                        break
                if len(faces) > 1:
                    ex = []
                    print(type(faces))
                    for elem in faces:
                        for (x, y, w, h) in [elem]:
                            ex.append(frame[y:y + h, x:x + w])

                    print(filename)
                    # inp = int(input())
                    # faces = [faces[inp]]
                #     raise Exception(f"More than 1 faces detected in {filename}")

                for (x, y, w, h) in faces:
                    face = frame[y:y + h, x:x + w]

                face = cv2.resize(face, (234, 234))
                face = face[5:-5, 5:-5]
                cv2.imwrite(output_path + '/' + f'{filename}_{count}' + '.png', face)
                before_face = face
            count += 1
    finally:
        cap.release()
    return

In [None]:
def prepare_all_videos(filenames, paths, skip=1):
    
    for count, video in tqdm(enumerate(zip(filenames, paths)), desc='blacking progress', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'):
        # Gather all its frames
        filename, input_path, output_path = video[0], video[1], video[1].replace('RAVDESS', 'RAVDESS_frames_face_BW')
        
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        
        haar_cascade = cv2.CascadeClassifier('./Other/haarcascade_frontalface_default.xml')
        count = 0
        
        while os.path.exist(os.path.join(output_path, f'{filename}_{count}.png')):
            count += skip
        
        cap = cv2.VideoCapture(input_path + '.mp4')
        
        try:
        # Loop through all frames
            while True:
                # Capture frame
                ret, frame = cap.read()
                if os.path.exists(output_path + '/' + f'{filename}_{count}' + '.png'):
                    count += 1
                    continue
                if (count % skip == 0 and count > 20):
                    #print(frame.shape)
                    if not ret:
                        break
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    faces = haar_cascade.detectMultiScale(frame, scaleFactor=1.12, minNeighbors=9)
                    # if len(faces) != 1:
                        
                    if len(faces) == 0:
                        faces = haar_cascade.detectMultiScale(frame, scaleFactor=1.02, minNeighbors=9)
                        if len(faces) == 0:
                            print(f"error {filename} {count} 발생!, 얼굴 인식 불가!")
                            break
                    if len(faces) > 1:
                        ex = []
                        print(type(faces))
                        for elem in faces:
                            for (x, y, w, h) in [elem]:
                                ex.append(frame[y:y + h, x:x + w])


                    for (x, y, w, h) in faces:
                        face = frame[y:y + h, x:x + w]

                    face = cv2.resize(face, (234, 234))
                    face = face[5:-5, 5:-5]
                    cv2.imwrite(os.path.join(output_path, f'{filename}_{count}.png'), face)
                    
                count += 1
        finally:
            cap.release()


In [7]:
prepare_all_videos(filenames, paths, skip=3)

Processed videos 1/1440
Processed videos 2/1440
<class 'numpy.ndarray'>
01-01-05-02-01-02-20
Processed videos 3/1440
<class 'numpy.ndarray'>
02-01-05-02-01-02-20
Processed videos 4/1440
Processed videos 5/1440
Processed videos 6/1440
<class 'numpy.ndarray'>
01-01-03-02-01-02-20
Processed videos 7/1440
Processed videos 8/1440
Processed videos 9/1440
Processed videos 10/1440
Processed videos 11/1440
Processed videos 12/1440
Processed videos 13/1440
Processed videos 14/1440
Processed videos 15/1440
Processed videos 16/1440
Processed videos 17/1440
Processed videos 18/1440
Processed videos 19/1440
Processed videos 20/1440
Processed videos 21/1440
Processed videos 22/1440
Processed videos 23/1440
Processed videos 24/1440
Processed videos 25/1440
Processed videos 26/1440
Processed videos 27/1440
Processed videos 28/1440
Processed videos 29/1440
Processed videos 30/1440
Processed videos 31/1440
Processed videos 32/1440
Processed videos 33/1440
Processed videos 34/1440
Processed videos 35/1440

### Mean face

In [49]:
emotions_tras = {1:1, 2:4, 3:5, 4:0, 5:3, 6:2, 7:6}
emotions = {0:'angry', 1:'calm', 2:'disgust', 3:'fear', 4:'happy', 5:'sad', 6:'surprise'}

dataset_path = "Datasets/RAVDESS_frames_face_BW/"

height_orig = 224
width_orig = 224
height_targ = 112
width_targ = 112

val_actors = ['19', '20']
test_actors = ['21', '22', '23', '24']

In [50]:
filenames_train = [] # train

for (dirpath, dirnames, fn) in os.walk(dataset_path):
    if fn != []:
        class_temp = int(fn[0].split('-')[2]) - 1
        if class_temp != 0:                                                     # exclude 'neutral' label
            if any(act in dirpath for act in (test_actors+val_actors))==False:  # select only train actors
                path = [os.path.join(dirpath, elem) for elem in fn]
                label = [emotions_tras[class_temp]] * len(fn)                   # emotion transposition
                filenames_train.append(list(zip(path, label)))

In [57]:
def sampling(list, num_frames_desired):
    tot = []
    for elem in list:
        sampled_list = random.sample(elem, num_frames_desired)
        tot += sampled_list
    return(tot)


def compute_mean_face(filenames):
    # frames_per_vid = min([len(elem) for elem in filenames])     # number of frames per clip in order to have balanced classes
    frames_per_vid = 20
    print("frames per video:", frames_per_vid) 

    filenames_sampled = sampling(filenames, frames_per_vid)
    random.shuffle(filenames_sampled)

    faces = []

    for path, label in tqdm(filenames_sampled):
        face = cv2.imread(path)
        face = cv2.resize(face, (112, 112))
        face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
        faces.append(face)

    faces = np.array(faces)
    mean_face = np.mean(faces, axis=0)
    mean_face = mean_face/255
    mean_face = np.expand_dims(mean_face, axis=2)
    np.save('Other/mean_face.npy', mean_face)

In [58]:
compute_mean_face(filenames_train)

frames per video: 20


100%|██████████| 35200/35200 [00:16<00:00, 2137.49it/s]
