# Load and Process the Tufts Dataset

In [1]:
import cv2
from tqdm import tqdm
from pathlib import Path

In [9]:
def crop(img, x_ratio, y_ratio):
    y, x = img.shape

    y_start = y//2 - int(y_ratio*y/2)
    y_end = y//2 +  int(y_ratio*y/2)
    x_start = x//2 - int(x_ratio*x/2)
    x_end = x//2 + int(x_ratio*x/2)

    return img[y_start:y_end, x_start:x_end]

In [10]:
dataset_path = '../MTHE-493-Dataset/face_dataset'
resize_scale = 50
crop_ratio_x, crop_ratio_y = 0.5, 0.75

In [11]:
X = []
Y = []
exps = []

image_class_dirs = [dir for dir in Path(dataset_path).iterdir() if dir.is_dir()]

for image_class_dir in tqdm(image_class_dirs):
    image_files = [image_file for image_file in image_class_dir.iterdir() if image_file.is_file()]
    for image_file in image_files:
        img = cv2.imread(str(image_file), cv2.IMREAD_GRAYSCALE)
        img = crop(img, crop_ratio_x, crop_ratio_y)

        h, w = img.shape
        resize_shape = (h//resize_scale, w//resize_scale)
        img = cv2.resize(img, resize_shape, interpolation=cv2.INTER_AREA)

        y = int(image_class_dir.name)

        exp = int(str(image_file).split('_')[-1][0])

        X.append(img)
        Y.append(y)
        exps.append(exp)

100%|██████████| 112/112 [01:37<00:00,  1.15it/s]


In [15]:
import numpy as np
np.array(X).shape

(560, 46, 46)

In [39]:
X = np.array(X)
Y = np.array(Y)
exps = np.array(exps)

data_dict = {'X': X, 'Y': Y, 'expressions': exps}
np.save('data/tufts_face_dataset_small.npy', data_dict)