In [109]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
import pandas as pd
import math
import json
import os
import time
from datetime import datetime
# For EC2
# import boto3

In [123]:
################################
# NON-HYPERPARAMETER CONSTANTS #
################################
processed_dataset_paths_xlsx = '/Volumes/GoogleDrive/My Drive/Crime Data/Composite Data/Sean Workspace/Processed/%s.xlsx' 
dataset_location = '/Volumes/GoogleDrive/My Drive/Crime Data/Composite Data/Sean Workspace/CNN Final/'
trial_file_location = '/Users/sean/Documents/Education/Stanford/230/Project/Sean/Trials/'
pickled_model_location = '/Users/sean/Documents/Education/Stanford/230/Project/Sean/Trials/Pickled Models/CNN Trial %d.ckpt'
trial_file_format = 'CNN Trial %d.xlsx'
epochs_between_prints = 100
hyperparameter_file_columns = ['Epoch Cost',
                               'Train Accuracy',
                               'Dev Accuracy',
                               'Duration',
                               'Dev Set Proportion',
                               'Test Set Proportion',
                               'Train Set Proportion',
                               'Learning Rate',
                               'Goal Total Epochs',
                               'Minibatch Size',
                               'Hidden Units per Layer',
                               'Hidden Layers',
                               'Dataset',
                               'Optimizer Name',
                               'L2 Regularization Lambda']
FIRST_DATE = datetime(2001, 1, 1)
LAST_DATE = datetime(2018, 1, 1)
NUM_DAYS = (LAST_DATE-FIRST_DATE).days
# 25 channels + date channels (17+12+31+6) = 91
X_MAX_PIXELS = 2048
Y_MAX_PIXELS = X_MAX_PIXELS
NUM_STATIC_CHANNELS = 21
STREET_CHANNEL, WATERWAY_CHANNEL, PARK_CHANNEL, FOREST_CHANNEL, SCHOOL_CHANNEL, LIBRARY_CHANNEL, BUILDING_CHANNELS,_,_,_,_,_,_,_,_,_, BUSINESS_CHANNELS,_,_,_,_ = range(NUM_STATIC_CHANNELS)
NUM_DYNAMIC_CHANNELS = 12
MIN_TEMP_CHANNEL, MAX_TEMP_CHANNEL, PRECIPITATION_CHANNEL, LIFE_EXPECTANCY_CHANNEL, GREEN_LINE_CHANNEL, RED_LINE_CHANNEL, BROWN_LINE_CHANNEL, PURPLE_LINE_CHANNEL, YELLOW_LINE_CHANNEL, BLUE_LINE_CHANNEL, PINK_LINE_CHANNEL, ORANGE_LINE_CHANNEL = range(NUM_STATIC_CHANNELS,NUM_STATIC_CHANNELS+NUM_DYNAMIC_CHANNELS)
YEAR_CHANNEL = NUM_STATIC_CHANNELS + NUM_DYNAMIC_CHANNELS
MONTH_CHANNEL = YEAR_CHANNEL + 17
DAY_CHANNEL = MONTH_CHANNEL + 12
NUM_TIME_SLOTS = 12
TIME_CHANNEL = DAY_CHANNEL + 31
NUM_INPUT_CHANNELS = TIME_CHANNEL + NUM_TIME_SLOTS
L_LINES = ['Green','Red','Brown','Purple','Yellow','Blue','Pink','Orange']

In [111]:
###################
# HYPERPARAMETERS #
###################
np.random.seed(0)
dev_set_proportion = 0.01
test_set_proportion = 0.01
train_set_proportion = 1 - (dev_set_proportion + test_set_proportion)
learning_rate = 0.0001
goal_total_epochs = 10000
minibatch_size = np.inf
hidden_units_per_layer = 100
num_hidden_layers = 14
trial_number = 45
optimizer_name = 'Adam'
regular_lambda = 0.1

In [114]:
#########################
# IMPORT PROCESSED DATA #
#########################
weather = pd.read_excel(processed_dataset_paths_xlsx % 'Weather')
weather['Date'] = pd.to_datetime(weather['Date'])
# Create fast-access weather arrays
min_temp_lookup = np.full((NUM_DAYS), np.nan)
max_temp_lookup = np.full((NUM_DAYS), np.nan)
precipitation_lookup = np.full((NUM_DAYS), np.nan)
# Insert the weather data
weather.apply(lambda record: extract_data_for_date(record, max_temp_lookup, 'Max Temp'), axis=1)
weather.apply(lambda record: extract_data_for_date(record, min_temp_lookup, 'Min Temp'), axis=1)
weather.apply(lambda record: extract_data_for_date(record, precipitation_lookup, 'Precipitation'), axis=1)
# Interpolate over any NaN values
nans, x= nan_helper(min_temp_lookup)
min_temp_lookup[nans]= np.interp(x(nans), x(~nans), min_temp_lookup[~nans])
nans, x= nan_helper(max_temp_lookup)
max_temp_lookup[nans]= np.interp(x(nans), x(~nans), max_temp_lookup[~nans])
nans, x= nan_helper(precipitation_lookup)
precipitation_lookup[nans]= np.interp(x(nans), x(~nans), precipitation_lookup[~nans])
    
# For Local Machine
street_frame = np.load(dataset_location + 'Streets Frame.npz')['street_frame']
waterway_frame = np.load(dataset_location + 'Waterway Frame.npz')['waterway_frame']
park_frame = np.load(dataset_location + 'Park Frame.npz')['park_frame']
forest_frame = np.load(dataset_location + 'Forest Frame.npz')['forest_frame']
school_frame = np.load(dataset_location + 'School Frame.npz')['school_frame']
library_frame = np.load(dataset_location + 'Library Frame.npz')['library_frame']
uninhabitable_building_frame = np.load(dataset_location + 'Building Frames.npz')['uninhabitable_building_frame']
building_frames = {'Sound':{},
                   'Minor Repair':{},
                   'Major Repair':{}}
with np.load(dataset_location + 'Building Frames.npz') as data:
    building_frames['Sound']['Stories'] = data['stories_of_sound_buildings_frame']
    building_frames['Sound']['Area'] = data['area_of_sound_buildings_frame']
    building_frames['Sound']['Units'] = data['units_of_sound_buildings_frame']

    building_frames['Minor Repair']['Stories'] = data['stories_of_minor_repair_buildings_frame']
    building_frames['Minor Repair']['Area'] = data['area_of_minor_repair_buildings_frame']
    building_frames['Minor Repair']['Units'] = data['units_of_minor_repair_buildings_frame']

    building_frames['Major Repair']['Stories'] = data['stories_of_major_repair_buildings_frame']
    building_frames['Major Repair']['Area'] = data['area_of_major_repair_buildings_frame']
    building_frames['Major Repair']['Units'] = data['units_of_major_repair_buildings_frame']
life_expectancy_frame = np.load(dataset_location + 'Life Expectancy Frames.npz')['life_expectancy_frame']
business_frames = {}
with np.load(dataset_location + 'Business Frames.npz') as data:
    business_frames['Food Service'] = data['Food Service']
    business_frames['Tobacco Sale'] = data['Tobacco Sale']
    business_frames['Alcohol Consumption'] = data['Alcohol Consumption']
    business_frames['Package Store'] = data['Package Store']
    business_frames['Gas Station'] = data['Gas Station']
L_entries_compressed = pd.read_csv(dataset_location + 'L Entries.csv')
# Unpack the json strings to numpy
for line in L_LINES:
    L_entries_compressed[line] = L_entries_compressed['Line'].apply(lambda array_string: np.array(json.loads(array_string)))
# L Entries is a pandas dataframe:
#  column is L line
#  row is day number
#  Cell is numpy array:
#    row 1 is x coordinate of rail station
#    row 2 is y coordinate of rail station
#    row 3 is number of entries for rail station

# Combine static channels
static_channels = np.zeros((NUM_STATIC_CHANNELS, X_MAX_PIXELS, Y_MAX_PIXELS))
static_channels[STREET_CHANNEL] = street_frame
static_channels[WATERWAY_CHANNEL] = waterway_frame
static_channels[PARK_CHANNEL] = park_frame
static_channels[FOREST_CHANNEL] = forest_frame
static_channels[SCHOOL_CHANNEL] = school_frame
static_channels[LIBRARY_CHANNEL] = library_frame
static_channels[BUILDING_CHANNELS + 0] = uninhabitable_building_frame
static_channels[BUILDING_CHANNELS + 1] = building_frames['Sound']['Stories']
static_channels[BUILDING_CHANNELS + 2] = building_frames['Sound']['Area']
static_channels[BUILDING_CHANNELS + 3] = building_frames['Sound']['Units']
static_channels[BUILDING_CHANNELS + 4] = building_frames['Minor Repair']['Stories']
static_channels[BUILDING_CHANNELS + 5] = building_frames['Minor Repair']['Area']
static_channels[BUILDING_CHANNELS + 6] = building_frames['Minor Repair']['Units']
static_channels[BUILDING_CHANNELS + 7] = building_frames['Major Repair']['Stories']
static_channels[BUILDING_CHANNELS + 8] = building_frames['Major Repair']['Area']
static_channels[BUILDING_CHANNELS + 9] = building_frames['Major Repair']['Units']
static_channels[BUSINESS_CHANNELS + 0] = business_frames['Food Service']
static_channels[BUSINESS_CHANNELS + 1] = business_frames['Tobacco Sale']
static_channels[BUSINESS_CHANNELS + 2] = business_frames['Alcohol Consumption']
static_channels[BUSINESS_CHANNELS + 3] = business_frames['Package Store']
static_channels[BUSINESS_CHANNELS + 4] = business_frames['Gas Station']

In [125]:
L_entries_compressed['Green'][0]

'[[ 674 1057 1583 1344 1537  858 1360 1568 1522  962 1584  770 1546  911\n  1645 1457 1151 1581 1547 1597 1468  817 1580  720 1200]\n [1311 1303  852 1302 1305 1314  727  959 1305 1309  815 1312 1271 1312\n   734  727 1297  890 1012  733 1304 1313  930 1311 1296]\n [ 633  483  364  246 2059  700  540  199 1080  405  248  141  700  346\n   391  230  357  427  448  144  217  399  211  170  213]]'

In [107]:
# Building CNN Data
# Data that varies with Time and Location:
# - Crime (OUTPUT - YOLO with crime and location)
# - L entries (8 layers - one per line)
# - Life Expectancy (1 layer)
#
# Data that varies with Time Only:
# - Weather (3 layers - MIN TEMP, MAX TEMP, and PRECIPITATION)
# - Date
# - Time
#
# Data that varies with Location Only:
# - Businesses (5 layers - types of businesses)
# - Buildings (10 layers - stories|units|sqfeet for sound|minor repair|major repair.  Also uninhabitable or not.)
# - Waterways (1 layer)
# - Major Streets (1 layer)
# - Libraries (1 layer)
# - Public Parks (1 layer)
# - Forests (1 layer)
# - Schools (1 layer)

def get_example_for_datetime(day_index, day, month, year, time_slot):
    # Need to filter for any days during which we don't have L data
    
    # Add static channels first
    input_data = np.zeros((NUM_INPUT_CHANNELS, X_MAX_PIXELS, Y_MAX_PIXELS))
    input_data[:NUM_STATIC_CHANNELS] = static_channels
    # Weather channels
    input_data[MIN_TEMP_CHANNEL] = np.full((X_MAX_PIXELS, Y_MAX_PIXELS), min_temp_lookup[day_index])
    input_data[MAX_TEMP_CHANNEL] = np.full((X_MAX_PIXELS, Y_MAX_PIXELS), max_temp_lookup[day_index])
    input_data[PRECIPITATION_CHANNEL] = np.full((X_MAX_PIXELS, Y_MAX_PIXELS), precipitation_lookup[day_index])
    # Life Expectancy Channel
    input_data[LIFE_EXPECTANCY_CHANNEL] = life_expectancy_frame[year_index-FIRST_DATE.year]
    # L Entry Channels
    input_data[GREEN_LINE_CHANNEL, L_entries_compressed['Green'][0], L_entries_compressed['Green'][1]] = L_entries_compressed['Green'][2]
    input_data[RED_LINE_CHANNEL, L_entries_compressed['Red'][0], L_entries_compressed['Red'][1]] = L_entries_compressed['Red'][2]
    input_data[BROWN_LINE_CHANNEL, L_entries_compressed['Brown'][0], L_entries_compressed['Brown'][1]] = L_entries_compressed['Brown'][2]
    input_data[PURPLE_LINE_CHANNEL, L_entries_compressed['Purple'][0], L_entries_compressed['Purple'][1]] = L_entries_compressed['Purple'][2]
    input_data[YELLOW_LINE_CHANNEL, L_entries_compressed['Yellow'][0], L_entries_compressed['Yellow'][1]] = L_entries_compressed['Yellow'][2]
    input_data[BLUE_LINE_CHANNEL, L_entries_compressed['Blue'][0], L_entries_compressed['Blue'][1]] = L_entries_compressed['Blue'][2]
    input_data[PINK_LINE_CHANNEL, L_entries_compressed['Pink'][0], L_entries_compressed['Pink'][1]] = L_entries_compressed['Pink'][2]
    input_data[ORANGE_LINE_CHANNEL, L_entries_compressed['Orange'][0], L_entries_compressed['Orange'][1]] = L_entries_compressed['Orange'][2]
    # Date and Time channels
    input_data[YEAR_CHANNEL + year_index] = np.ones((X_MAX_PIXELS, Y_MAX_PIXELS))
    input_data[MONTH_CHANNEL + month_index] = np.ones((X_MAX_PIXELS, Y_MAX_PIXELS))
    input_data[DAY_CHANNEL + requested_datetime.day] = np.ones((X_MAX_PIXELS, Y_MAX_PIXELS))
    input_data[TIME_CHANNEL + time_slot] = np.ones((X_MAX_PIXELS, Y_MAX_PIXELS))

In [108]:
start = time.time()
requested_datetime = datetime(2005, 10, 30, 5, 32)
for _ in range(10):
    get_example_for_datetime(requested_datetime)
end = time.time()
print(end-start)

3.9746479988098145


In [57]:
# Utility Functions #

####################
# EPOCH MANAGEMENT #
####################

def extract_data_for_date(record, fast_lookup, column):
    record_date = datetime(record.Date.year, record.Date.month, record.Date.day)
    index = (record_date - FIRST_DATE).days
    if index < NUM_DAYS and index >= 0:
        fast_lookup[index] = record[column]

def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.
    Reference: https://stackoverflow.com/questions/6518811/interpolate-nan-values-in-a-numpy-array

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]

def restore_model(saver, session):
    # Before epoch, check for trial # in trial files
    if os.path.isfile(trial_file_location+trial_file_format % trial_number):
        print('Model found.  Restoring parameters.')
        # If trial exists:
        # 1. roll back (cost, train & dev accuracy) to epoch with highest dev accuracy.
        trial_hyperparameters = pd.read_excel(trial_file_location+trial_file_format % trial_number)
        # Find highest dev accuracy
        best_dev_index = np.argmax(trial_hyperparameters.loc[:,'Dev Accuracy'].values)
        # Delete all rows after this epoch
        trial_hyperparameters = trial_hyperparameters[:best_dev_index+1]
        # 2. restore model for the best dev accuracy
        saver.restore(session, pickled_model_location % trial_number)
        # Save the edited/new hyperparameter trial file
        writer = pd.ExcelWriter(trial_file_location+trial_file_format % trial_number)
        trial_hyperparameters.to_excel(writer)
        writer.save()
        # Return the number of epochs already trained
        return len(trial_hyperparameters)
    else:
        print('No saved model.  Using default parameter initialization.')
        return 0

def epoch_teardown(saver, session, cost, training_accuracy, dev_accuracy, duration):
    trial_hyperparameters = pd.DataFrame(columns=hyperparameter_file_columns)
    # After epoch, check for hyperparameter file
    if os.path.isfile(trial_file_location+trial_file_format % trial_number):
        trial_hyperparameters = pd.read_excel(trial_file_location+trial_file_format % trial_number)
        # Compare dev accuracy with all other epochs
        max_dev_accuracy = np.max(trial_hyperparameters['Dev Accuracy'].values)
        if dev_accuracy > max_dev_accuracy:
            # If greatest, save model
            saver.save(session, pickled_model_location % trial_number)
    # Save hyperparameters, epoch cost, and training & dev accuracies
    trial_hyperparameters = trial_hyperparameters.append({
        'Epoch Cost' : cost,
        'Train Accuracy' : training_accuracy,
        'Dev Accuracy' : dev_accuracy,
        'Duration' : duration,
        'Dev Set Proportion' : dev_set_proportion,
        'Test Set Proportion' : test_set_proportion,
        'Train Set Proportion' : train_set_proportion,
        'Learning Rate' : learning_rate,
        'Goal Total Epochs' : goal_total_epochs,
        'Minibatch Size' : minibatch_size,
        'Hidden Units per Layer' : hidden_units_per_layer,
        'Hidden Layers' : num_hidden_layers,
        'Dataset' : dataset,
        'Optimizer Name' : optimizer_name,
        'L2 Regularization Lambda' : regular_lambda
    }, ignore_index=True)
    # Save the edited/new hyperparameter trial file
    writer = pd.ExcelWriter(trial_file_location+trial_file_format % trial_number)
    trial_hyperparameters.to_excel(writer)
    writer.save()

def random_mini_batches(X, Y, mini_batch_size = 64):
    # Creates a list of random minibatches from (X, Y)
    m = X.shape[1]
    mini_batches = []
    
    if mini_batch_size > m:
        mini_batches.append((X,Y))
    else:
        # Step 1: Shuffle (X, Y)
        permutation = list(np.random.permutation(m))
        shuffled_X = X[:, permutation]
        shuffled_Y = Y[:, permutation].reshape((1,m))

        # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
        num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
        for k in range(0, num_complete_minibatches):
            mini_batch_X = shuffled_X[:, k*mini_batch_size: (k+1)*(mini_batch_size)]
            mini_batch_Y = shuffled_Y[:, k*mini_batch_size: (k+1)*(mini_batch_size)]
            mini_batch = (mini_batch_X, mini_batch_Y)
            mini_batches.append(mini_batch)

        # Handling the end case (last mini-batch < mini_batch_size)
        if m % mini_batch_size != 0:
            mini_batch_X = shuffled_X[:, int(mini_batch_size*np.floor(m/mini_batch_size)): m]
            mini_batch_Y = shuffled_Y[:, int(mini_batch_size*np.floor(m/mini_batch_size)): m]
            mini_batch = (mini_batch_X, mini_batch_Y)
            mini_batches.append(mini_batch)
    
    return mini_batches

###################################
# CREATE NEURAL NETWORK STRUCTURE #
###################################

def create_NN_structure(n_x, n_y):
    ops.reset_default_graph()

    # Create placeholders for the featuers and labels
    X = tf.placeholder(tf.float32, shape=(n_x, None), name='X')
    Y = tf.placeholder(tf.int32, shape=(n_y, None), name='Y')

    # Create the network parameters
    parameters = {}
    for layer in range(num_hidden_layers+1):
        previous_layer_size = (n_x if layer == 0 else hidden_units_per_layer)
        this_layer_size = (n_y if layer == num_hidden_layers else hidden_units_per_layer)
        W_name = 'W'+str(layer+1)
        b_name = 'b'+str(layer+1)
        parameters[W_name] = tf.get_variable(W_name,
                                             (this_layer_size,previous_layer_size),
                                             initializer=tf.contrib.layers.xavier_initializer(seed=1, uniform=False))
        parameters[b_name] = tf.get_variable(b_name,
                                             (this_layer_size,1),
                                             initializer=tf.zeros_initializer())

    # Hook up the network layers
    A = X
    Z = X
    for layer in range(num_hidden_layers+1):
        W = parameters['W'+str(layer+1)]
        b = parameters['b'+str(layer+1)]
        Z = W@A+b
        A = tf.nn.relu(Z)
    Z_hat = Z
    Y_hat = tf.argmax(tf.transpose(tf.nn.softmax(tf.transpose(Z_hat))), axis=0)
    
    return Z_hat, Y_hat, X, Y, parameters

#############################
# CREATE AND CONDITION DATA #
#############################

def expand_one_hot_columns(crime_data):
    conditioned_data = crime_data.copy()
    print('Expanding one-hot columns')
    for column_name in discrete_columns:
        one_hot_expanded_columns = pd.get_dummies(conditioned_data[column_name])
        conditioned_data = pd.concat([conditioned_data, one_hot_expanded_columns], axis=1).dropna()
        conditioned_data = conditioned_data.drop(columns=[column_name])
    return conditioned_data

def create_and_condition_data(crime_data):
    # Drop unnecessary columns
    conditioned_data = crime_data.drop(columns=remove_columns)
    # Expand one-hot columns
    conditioned_data = expand_one_hot_columns(conditioned_data)
    # Convert the dataframe to numpy arrays for features and labels
    features = conditioned_data.drop(columns=[target_column]).values.T
    labels = pd.get_dummies(conditioned_data[target_column]).values.T

    # Drop all NAs that were caught in the transfer
    feature_cols_with_nans = np.isnan(features).any(axis=0)
    features = features[:,~feature_cols_with_nans]
    labels = labels[:,~feature_cols_with_nans]
    label_cols_with_nans = np.isnan(labels).any(axis=0)
    features = features[:,~label_cols_with_nans]
    labels = labels[:,~label_cols_with_nans]

    _, m = features.shape
    _, _ = labels.shape

    # Shuffle the data
    print('Shuffling data')
    order = np.argsort(np.random.random(m))
    features = features[:,order]
    labels = labels[:,order]

    # Split between train, dev, and test
    # Data structure: [     TRAIN     ][ DEV ][ TEST ]
    dev_start_index = int(train_set_proportion*m)
    test_start_index = dev_start_index + int(dev_set_proportion*m)

    X_train = features[:, 0:dev_start_index]
    Y_train = labels[:, 0:dev_start_index]

    X_dev = features[:, dev_start_index:test_start_index]
    Y_dev = labels[:, dev_start_index:test_start_index]

    X_test = features[:, test_start_index:]
    Y_test = labels[:, test_start_index:]

    x_variance = X_train.var(axis=1).reshape(-1,1)
    # Check if variance is zero.  State which features will be removed.
    should_keep = (x_variance!=0).reshape(-1)
    removed_features = conditioned_data.drop(columns=[target_column]).columns[should_keep==False].tolist()
    if len(removed_features) != 0:
        print('Removed the following columns (variance = 0): ' + str(removed_features))
    X_train = X_train[(x_variance!=0).reshape(-1),:]
    X_dev = X_dev[(x_variance!=0).reshape(-1),:]
    X_test = X_test[(x_variance!=0).reshape(-1),:]
    
    # Normalize the inputs and outputs based on the training set mean and variance
    print('Normalizing input data')
    x_mean = X_train.mean(axis=1).reshape(-1,1)
    x_variance = X_train.var(axis=1).reshape(-1,1)
    X_train = (X_train-x_mean)/x_variance
    X_dev = (X_dev-x_mean)/x_variance
    X_test = (X_test-x_mean)/x_variance
    
    return X_train, Y_train, X_dev, Y_dev, X_test, Y_test

#################
# EXECUTE MODEL #
#################

def execute_model():
    global optimizer_name, trial_file_location

    print('Conditioning Data')
    X_train, Y_train, X_dev, Y_dev, X_test, Y_test = create_and_condition_data(crime_data)
    n_x, m = X_train.shape
    n_y, _ = Y_train.shape
    print('Creating Network Structure')
    Z_hat, Y_hat, X, Y, parameters = create_NN_structure(n_x, n_y)

    # Calculate the cost from the network prediction
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.transpose(Z_hat),
                                                                     labels=tf.transpose(Y)))
    # Regularize the cost
    for name, weights in parameters.items():
        cost += regular_lambda * tf.nn.l2_loss(weights)
    
    optimizer = None
    # Create the optimizer
    if optimizer_name == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    else:
        optimizer_name = 'GD'
        optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

    # Formula for calculating set accuracy
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(Z_hat), tf.argmax(Y)), "float"))

    # Run the tf session to train and test
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as session:
        session.run(init)
        # If the trial already exists, pick up where we left off
        starting_epoch = restore_model(saver, session)
        print('Beginning Training')
        for epoch in range(starting_epoch, goal_total_epochs):
            start_time = time.time()
            epoch_cost = 0.
            num_minibatches = int(m / minibatch_size)
            if num_minibatches < 1: num_minibatches=1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size)
            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch
                _ , minibatch_cost = session.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
                epoch_cost += minibatch_cost / num_minibatches
            elapsed_time = time.time() - start_time

            # Display epoch results every so often
            if epoch % epochs_between_prints == 0:
                print('%i Epochs' % epoch)
                print('\tCost: %f' % epoch_cost)
                print('\tTrain Accuracy: %f' % accuracy.eval({X: X_train, Y: Y_train}))
                print('\tDev Accuracy: %f' % accuracy.eval({X: X_dev, Y: Y_dev}))

            # Epoch over, tear down
            epoch_teardown(saver,
                           session,
                           epoch_cost,
                           float(accuracy.eval({X: X_train, Y: Y_train})),
                           float(accuracy.eval({X: X_dev, Y: Y_dev})),
                           elapsed_time)

        # Calculate the accuracy on the train and dev sets
        print('Reached Goal Number of Epochs.')
        print('Final Train Accuracy: %f' % accuracy.eval({X: X_train, Y: Y_train}))
        print('Final Dev Accuracy: %f' % accuracy.eval({X: X_dev, Y: Y_dev}))

In [101]:
execute_model()

Conditioning Data
Expanding one-hot columns
Shuffling data
Normalizing input data
Creating Network Structure
No saved model.  Using default parameter initialization.
Beginning Training
0 Epochs
	Cost: 80.423340
	Train Accuracy: 0.043532
	Dev Accuracy: 0.044921
100 Epochs
	Cost: 67.558609
	Train Accuracy: 0.225869
	Dev Accuracy: 0.228994
200 Epochs
	Cost: 57.122234
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
300 Epochs
	Cost: 48.506077
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
400 Epochs
	Cost: 41.278923
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
500 Epochs
	Cost: 35.191925
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
600 Epochs
	Cost: 30.058361
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
700 Epochs
	Cost: 25.724384
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
800 Epochs
	Cost: 22.063263
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
900 Epochs
	Cost: 18.968952
	Train Accuracy: 0.225815
	Dev Accuracy: 0.228898
1000 Epochs
	Cost: 16.354263
	Train A

KeyboardInterrupt: 