# EyePredict CNN

In [1]:
import numpy as np
import pandas as pd 
import pickle
import math
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf

from random import shuffle
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
def load_data_to_one_df():
    print("log....Loading the data......" )
    with open('labels_data.pickle', 'rb') as handle:
        labelsData = pickle.load(handle)
    with open('samples_data.pickle', 'rb') as handle:
        samplesData = pickle.load(handle)
        
    #create one df from all data
    allData = pd.DataFrame(
        {'samples_data': samplesData,
         'labels_data': labelsData
        })
    return allData

In [3]:
def split_data_to_train_test(allData):
    #shuffel the data
    allData = allData.sample(frac=1).reset_index(drop=True)

    #separate the data to samples and lables again
    X = allData.samples_data
    y = allData.labels_data

    #flatten the samples
    flattendX = []
    for sample in X:
        flattendX.append(sample.flatten()) 

    # test_size: what proportion of original data is used for test set
    train_sample, test_sample, train_lbl, test_lbl = train_test_split(
        flattendX, y, test_size=1/8.0, random_state=0)

    train_sample = np.float32(np.asarray(train_sample))
    test_sample = np.float32(np.asarray(test_sample))
    train_lbl = np.int32(np.asarray(train_lbl))
    test_lbl = np.int32(np.asarray(test_lbl))
    return train_sample, test_sample, train_lbl, test_lbl

In [4]:
def standardize_data(train_data, test_data):
    #Standardize the Data
    scaler = StandardScaler()
    # Fit on training set only.
    scaler.fit(train_data)
    # Apply transform to both the training set and the test set.
    train_data = scaler.transform(train_data)
    test_data = scaler.transform(test_data)
    
    return train_data, test_data

In [5]:
def pca_transform(train_data, test_data):
    # Make an instance of the Model
    pca = PCA(.95)
    
    #Apply the mapping (transform) to both the training set and the test set
    pca.fit(train_data)
    
    train_data = pca.transform(train_data)
    test_data = pca.transform(test_data)
    
    return train_data, test_data

In [6]:
def data_reshape(data):
    #Add Padding to the samples to be on size 3600
    data_resized_list = []
    total_pad_to_add = 3600 - data.shape[1]
    pad_to_each_axes = total_pad_to_add/2
    x_axis = math.floor(pad_to_each_axes)
    y_axis = math.ceil(pad_to_each_axes)
    for sample in data:
        tempSample = np.pad(sample, (x_axis,y_axis), 'constant', constant_values=(0, 0))
        sqrtShape = int(math.sqrt(tempSample.shape[0]))
        data_resized_list.append(tempSample.reshape(sqrtShape,sqrtShape))
    return np.asarray(data_resized_list), sqrtShape

## Load Data

In [7]:
# Load data
allData = load_data_to_one_df()

log....Loading the data......


## Choose stim to classify
### Use faces and fractals only = '1'
### Use faces and snacks only = '3'
### Use snacks and fractals only = '2'

In [8]:
stim_type_to_ignore = '3'

In [9]:
#remove snacks samples from the data
allData = allData[allData.labels_data != stim_type_to_ignore]

## Split data to train test sets

In [10]:
#preper training and test sets
train_sample, test_sample, train_lbl, test_lbl = split_data_to_train_test(allData)

## Standardize The Data

In [11]:
train_lbl = train_lbl - 1
test_lbl = test_lbl - 1

In [14]:
#standardize
train_sample, test_sample = standardize_data(train_sample, test_sample)

## Run PCA Transform

In [15]:
#pca
train_sample, test_sample = pca_transform(train_sample, test_sample)

## Reshape The Data To fit Tensor

In [16]:
#padding
train_sample, shape = data_reshape(train_sample)
test_sample, shape = data_reshape(test_sample)

## Preper training validation sets

In [17]:
#preper training validation sets
train_data = train_sample[0:2563]
eval_data = train_sample[2563:]
train_labels = train_lbl[0:2563]
eval_labels = train_lbl[2563:]

## Run CNN Model

In [29]:
def cnn_model_fn(features, labels, mode):
    with tf.device('/gpu:0'):
        """Model function for CNN."""
        # Input Layer
        input_layer = tf.reshape(features["x"], [-1, 60, 60, 1])
        print("Tensor size: ",input_layer.shape.dims)

        # Convolutional Layer #1
        conv1 = tf.layers.conv2d(
            inputs=input_layer,
            filters=32,
            kernel_size=[5, 5], 
            padding="same",
            activation=tf.nn.relu)
        print("Tensor size: ",conv1.shape.dims)

        # Pooling Layer #1
        pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

        # Convolutional Layer #2 and Pooling Layer #2
        conv2 = tf.layers.conv2d(
            inputs=pool1,
            filters=64,
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu)
        print("Tensor size: ",conv2.shape.dims)

        pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
        print("Tensor size: ",pool2.shape.dims)
        

        # Dense Layer
        pool2_flat = tf.reshape(pool2, [-1, 15 * 15 * 64])
        print("Tensor size: ",pool2_flat.shape.dims)
        dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
        print("Tensor size: ",dense.shape.dims)
        dropout = tf.layers.dropout(
            inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
        print("Tensor size: ",dropout.shape.dims)

        # Logits Layer
        logits = tf.layers.dense(inputs=dense, units=2) #changed from 10 to 3
        print("Tensor size: ",logits.shape.dims)
        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)# + 1e-8

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
            train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        eval_metric_ops = {
            "accuracy": tf.metrics.accuracy(
                labels=labels, predictions=predictions["classes"])}
        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)


In [30]:
def run(train_data, eval_data, train_labels, eval_labels):
    
    # Create the Estimator
    eyePredict_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/tmp/convnet_model_v333")
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=50)
    
    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=25,
        num_epochs=None,
        shuffle=True)
    eyePredict_classifier.train(
        input_fn=train_input_fn,
        steps=20000,
        hooks=[logging_hook])
    print("*****DONE TRAINING*****")
    
    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_data},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)
    eval_results = eyePredict_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

In [28]:
tf.logging.set_verbosity(tf.logging.INFO)
   
# RUN
run(train_data, eval_data, train_labels, eval_labels)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/convnet_model_v333', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
Tensor size:  [Dimension(25), Dimension(60), Dimension(60), Dimension(1)]
Tensor size:  [Dimension(25), Dimension(60), Dimension(60), Dimension(32)]
Tensor size:  [Dimension(25), Dimension(30), Dimension(30), Dimension(64)]
Tensor size:  [Dimension(25), Dimension(15), Dimension(15), Dimension(64)]


ValueError: Dimension size must be evenly divisible by 48000 but is 360000 for 'Reshape_1' (op: 'Reshape') with input shapes: [25,15,15,64], [2] and with input tensors computed as partial shapes: input[1] = [?,48000].