# Preprocess data

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import skimage.io

import config
from utils import normalize

%matplotlib inline

## Load dataset

In [None]:
# Load dataset
data = pd.read_csv(
    config.PATH_DATA_FEATURES01_DLIB_AUGMENTED_CSV,
    usecols={
        # Eye Right
        'eye_right_x':  np.int32,
        'eye_right_y':  np.int32,
        'eye_right_width':  np.int32,
        'eye_right_height': np.int32,
        'eye_right_image':  np.str,

        # Eye Left
        'eye_left_x': np.int32,
        'eye_left_y': np.int32,
        'eye_left_width': np.int32,
        'eye_left_height': np.int32,
        'eye_left_image':  np.str,

        # Face
        'face_width': np.int32,
        'face_height': np.int32,
        'face_x': np.int32,
        'face_y': np.int32,

        # Shot
        'y': np.int32,
        'x': np.int32,
        'score': np.int32,
        'timestamp':  np.int32,
        'img_path':  np.str,
        'img': np.str,  # Relative to the raw dataset

        # Game
        'game_id': np.str,
        'glasses': np.bool,
        'screen_diagonal': np.float,
        'camera_position': np.str,
        'screen_height': np.int32,
        'screen_width': np.int32,
    }
)

## Scale

Since the webcam position is Top Center, there shouldn't be negative values in the y axis.

![](img/reference.png)

In [None]:
normalize.normalize_data(data, config.WEBCAM_WIDTH, config.WEBCAM_HEIGHT)
normalize.normalize_labels(data, config.SCREEN_WIDTH, config.SCREEN_HEIGHT)

## Drop unneeded columns

In [None]:
data.drop(
    labels= [
        'score', 'timestamp', 'img_path', 'img', 'game_id', 'screen_diagonal',
        'camera_position', 'screen_height', 'screen_width'
    ],
    axis=1,
    inplace=True
)

## Clean data

In [None]:
data.describe().loc[['min','max']]

ERRORS:
* There shouldn't be negative y position values: face_y

In [None]:
data.drop(data.index[data['face_y']<0], inplace=True)

In [None]:
data.describe().loc[['min','max']]

Everything makes sense.

## Target stats

In [None]:
sns.jointplot(x="x", y="y", data=data, kind="scatter", stat_func=None, size=12, ratio=10)

In [None]:
sns.distplot(data['y'], hist=True, norm_hist=False, kde=False)

In [None]:
sns.distplot(data['x'], hist=True, norm_hist=False, kde=False)

## Eye images structure

In [None]:
imgs_left = []
imgs_right = []
errors = []

for i, row in data.iterrows():
    try:
        # Scale pixel value in range [-1,1]
        imgs_left.append((skimage.io.imread(config.PATH_DATA_FEATURES01_DLIB_AUGMENTED+row['eye_left_image'])/255-0.5)*2)
    except Exception as e:
        errors.append(path)
    try:
        # Scale pixel value in range [-1,1]
        imgs_right.append((skimage.io.imread(config.PATH_DATA_FEATURES01_DLIB_AUGMENTED+row['eye_right_image'])/255-0.5)*2)
    except Exception as e:
        errors.append(path)

## Save things

In [None]:
# CSV
data.to_csv(config.PATH_DATA_FEATURES01_DLIB_AUGMENTED_NORM_CSV, index=False)
# Images
np.save(file=config.PATH_DATA_FEATURES01_DLIB_AUGMENTED_NORM_IMGS_LEFT, arr=imgs_left)
np.save(file=config.PATH_DATA_FEATURES01_DLIB_AUGMENTED_NORM_IMGS_RIGHT, arr=imgs_right)