In [2]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import h5py
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [3]:
def read_images():
    """
    read images in images folder
    return: images, filenames as numpy array 
    """
    files = os.listdir('images/')
    imageList = []
    fileNames = []
    for file in files:
        image = cv2.imread(f"images/{file}", 0)
        if image.shape == (350, 350):
            imageList.append(image)
            fileNames.append(file)
    return np.array(imageList), np.array(fileNames)

In [4]:
def read_legend():
    """
    read legend of images from data/lenged.csv
    return lengend data, list of target values
    """
    df = pd.read_csv('data/legend.csv')
    df['emotion'] = df['emotion'].str.lower()
    label_encoder = LabelEncoder()
    emotion = df['emotion']
    integer_encoded = label_encoder.fit_transform(emotion)
    df['emotion_encoded'] = integer_encoded
    
    return df, label_encoder.inverse_transform([0,1,2,3,4,5,6,7])

In [5]:
def delete_unmatched_image(imgs, fileNames, df): 
    y = []
    for file in fileNames:
        value = df[df['image'] == file].emotion_encoded.values
        if len(value) == 1:
            y.append(value[0])
        else:
            y.append(-1)
    y_tmp = np.array(y)
    deleteList = np.where(y_tmp == -1)[0]
    images = np.delete(imgs, deleteList, 0)
    y_target = np.delete(y_tmp, deleteList)
    file_names = np.delete(fileNames, deleteList)
    
    return images, y_target, file_names

In [6]:
def pre_process():
    imgs, fileNames = read_images()
    df, target_names = read_legend()
    images, y_target, file_names = delete_unmatched_image(imgs, fileNames, df)
    y_target=pd.get_dummies(y_target)
    
    return images, y_target, target_names, file_names
    

In [7]:
def store_many_hdf5(images, target):
    file = h5py.File('h5/images6.h5', 'w')
    dataset = file.create_dataset('images', np.shape(images), data=images)
    output = file.create_dataset('target', np.shape(target), data=target)
    file.close()

In [58]:
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
def has_face(img_path):
    files = os.listdir(img_path)
    total = len(files)
    count = 0;
    not_face = []
    for file in files:
        img = cv2.imread(f'{img_path}/{file}')
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.01, 0)
        if len(faces) > 0:
            count += 1
        else:
            not_face.append(file)
            os.remove(f'{img_path}/{file}')
    return count, total, not_face

In [56]:
has_face('fer13_aug/train_augumentation/angry/')

(5154, 5154, [])

In [59]:
emotions = ['angry', 'disgust','fear','happy','neutral','sad','surprise']

In [61]:
for emo in emotions:
    has_face(f'fer13_aug/validation_augumentation/{emo}/')