# Create Data
This notebook serves the purpose of generating the training data for the EmojiMe model. The training data will consist of vectors. Each vector, representing an image, contains the combination of the distances from one facial landmark to another

In [1]:
%matplotlib inline
import sys
sys.path.append("..")

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import glob
import dlib
import math
from emojime.utils import shape_to_np, distances

In [2]:
data_path = '../data'
emotions = ['neutral', 'happy', 'sad', 'fear', 'angry']

# Set up face detector and landmarks extractor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('../models/shape_predictor_68_face_landmarks.dat')

## Extract landmark distances

In [4]:
landmarks = {}  # Contains landmark data per emotion
for emotion in emotions:
    img_paths = glob.glob(data_path + '/emotion-images/' + emotion + '/*.png')
    row = 0
    for image in  img_paths:
        # Detect face
        sample = cv2.imread(image)
        rects = detector(sample, 0)
        if len(rects) > 0:
            # Obtain first face detected
            rect = rects[0]
            # Calculate landmarks
            shape = predictor(sample, rect)
            shape = shape_to_np(shape)
            if row == 0:
                landmarks[emotion] = np.zeros((1, 68*68))
            else:
                landmarks[emotion] = np.vstack((landmarks[emotion], np.zeros((1, 68*68))))
            # Store landmark distances, move to next sample    
            landmarks[emotion][row][:] = distances(shape)
            row += 1                

## Create labels for feature vectors

In [5]:
for i, emotion in enumerate(emotions):
    # Create label data
    y = np.ones((landmarks[emotion].shape[0], 1))
    y *= i
    landmarks[emotion] = np.hstack((landmarks[emotion], y))
    np.save(data_path + '/emotion-landmarks/{}_landmarks'.format(emotion), landmarks[emotion])

## Concatenate data and save

In [6]:
data = np.copy(landmarks[emotions[0]])
for i in range(1, len(emotions)):
    # Stack all of the training data together
    data = np.vstack((data, landmarks[emotions[i]]))
np.save(data_path + '/data_set', data)