In [78]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [79]:
def __load_dataframe(is_train=True):
    base_path="./data/facial-keypoints-detection/"
    if is_train:
        file_name = "training.csv"
    else:
        file_name = "test.csv"
    return pd.read_csv(base_path + file_name)

In [80]:
def __convert_image_dataset(raw_image_infos):
    image_infos = []
    for raw_img_info in raw_image_infos:
        image_infos.append(list(map(int, raw_img_info.split())))
    image_infos = np.array(image_infos).reshape(-1,96,96,1)
    return image_infos

In [81]:
def get_dataset_xy(is_train=True):
    df = __load_dataframe(is_train)
    y = df.iloc[:,:-1]
    x = df.iloc[:,-1]
    x = __convert_image_dataset(x)
    return x, y.values

In [82]:
def normalize_image(x):
    x = x.astype(np.float32)/255.0
    return x

In [99]:
def split_data(x, y, train_ratio=0.8):
    row = x.shape[0]
    indices = np.random.choice(row, row)
    x = tf.gather(x, indices=indices).numpy()
    y = tf.gather(y, indices=indices).numpy()

    train_count = int(row * train_ratio)
    valid_count = row - train_count
    x0, x1 = tf.split(x, [train_count, valid_count])
    y0, y1 = tf.split(y, [train_count, valid_count])
    x = [x0, x1]
    y = [y0, y1]
    print(x0.shape, x1.shape, y0.shape, y1.shape,)
    return x, y

In [84]:
train_x, train_y = get_dataset_xy(is_train=True)

In [100]:
train_x2, train_y2 = split_data(train_x, train_y)

(5639, 96, 96, 1) (1410, 96, 96, 1) (5639, 30) (1410, 30)


In [91]:
(train_x2[0] == train_x[3841]).sum()

9216

In [92]:
(train_y[3841] == train_y2[0]).sum()

8

In [93]:
train_y[3841]

array([70.32257143, 38.63701714, 29.67905143, 38.63701714,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
       50.56011429, 62.12823429,         nan,         nan,         nan,
               nan,         nan,         nan, 50.56011429, 73.68740571])

In [94]:
train_y2[0]

array([70.32257143, 38.63701714, 29.67905143, 38.63701714,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
               nan,         nan,         nan,         nan,         nan,
       50.56011429, 62.12823429,         nan,         nan,         nan,
               nan,         nan,         nan, 50.56011429, 73.68740571])