In [1]:
import os
import config
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm

In [2]:
# Load data from csv file 
df_data = pd.read_csv(config.CSV_PATH)

In [3]:
def stringToImage(s):
    # Convert pixel values in string format to a np array
    image = np.array([int(x) for x in s.split(' ')]).reshape(48, 48)
    # Resize image to higher resolution
    image = cv2.resize(np.float32(image), dsize=(200, 200), interpolation=cv2.INTER_CUBIC)
    # Convert back to uint8
    image = np.uint8(image)
    # Convert from 1 channel to 3 channels
    return np.stack((image,)*3, axis=-1)
df_data[' pixels'] = df_data[' pixels'].apply(lambda s: stringToImage(s))

In [4]:
data_split = {
    'train': df_data[df_data[' Usage'] == 'Training'],
    'val': df_data[df_data[' Usage'] == 'PublicTest'],
    'test': df_data[df_data[' Usage'] == 'PrivateTest']
}

In [5]:
def parse_data(data_split):
    for ttv, data in data_split.items():
        images = data[' pixels'].tolist()
        labels = data['emotion'].tolist()
        
        # Write images to disk
        i = 0
        for image in tqdm(images):
            image_path = os.path.join(config.DATA_PATH, ttv, str(ttv) + '_' + str(i) + '.jpg')
            cv2.imwrite(image_path, image)
            i += 1

        # Write labels to disk
        np.save(os.path.join(config.DATA_PATH, 'annotations', str(ttv) + '_' + 'labels.npy'), labels)

In [6]:
""" 
Requires file structure
    /data
        /FER-2013
            /train
            /val
            /test
            /annotations
            /icml_face_data.csv
"""

parse_data(data_split)

100%|██████████| 28709/28709 [00:15<00:00, 1848.97it/s]
100%|██████████| 3589/3589 [00:01<00:00, 1916.79it/s]
100%|██████████| 3589/3589 [00:01<00:00, 1859.75it/s]
