In [1]:
import pandas as pd
import numpy as np
import zipfile

%matplotlib notebook
import matplotlib.pyplot as plt

plt.rcParams['toolbar'] = 'None' 

In [2]:
def load_all_data():
    
    # load idLookUp table
    idLookUp = pd.read_csv('data/IdLookupTable.csv')
    
    # read cvs's from zip
    z = zipfile.ZipFile('data/training.zip')
    train = pd.read_csv(z.open('training.csv'))
    z = zipfile.ZipFile('data/test.zip')
    test = pd.read_csv(z.open('test.csv'))
    keypoints_columns = train.columns[0:-1]
    
    # drop nans
    for keypoint in keypoints_columns:
        train = train[np.isfinite(train[keypoint])]
    train.index = range(0, len(train))
    
    # convert images
    train['Image1'] = train.Image.apply(lambda im: np.reshape(np.fromstring(im, sep=' '),96*96))
    train['Image2'] = train.Image1.apply(lambda img: img.reshape(96,96))
    test['Image1'] = test.Image.apply(lambda im: np.fromstring(im, sep=' '))
    test['Image2'] = test.Image1.apply(lambda img: img.reshape(96,96))
    
    # Create useful arrays with training and test data
    train_X = np.vstack(train.Image1.values)
    train_y = train[keypoints_columns].values
    test_X = np.vstack(test.Image1.values)
    
    return [idLookUp, keypoints_columns, train, test, train_X, train_y, test_X]

In [3]:
def plot_sample(x, y, axis):
    plt.imshow(x, cmap='gray')
    plt.scatter(y[0::2], y[1::2], marker='x', s=10)

In [4]:
def plot_images(data, y_pred, page):
    fig = plt.figure(figsize=(6, 6))
    fig.subplots_adjust(
        left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

    
    for i in range(16*(page-1),16*page):
        ax = fig.add_subplot(4, 4, (i%16) + 1, xticks=[], yticks=[])
        plot_sample(data.Image2[i], y_pred[i], ax)