In [3]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
import pandas as pd
import math
# For EC2
import boto3

In [4]:
# For EC2
bucket = "cs230"
file_name = "25_October.xlsx"

s3 = boto3.client('s3') 
# 's3' is a key word. create connection to S3 using default config and all buckets within S3

obj = s3.get_object(Bucket= bucket, Key= file_name) 
# get object and file (key) from bucket

crime_data = pd.read_excel(obj['Body']) # 'Body' is a key word

# For Local Machine
# crime_data = pd.read_excel('/Volumes/GoogleDrive/My Drive/Crime Data/Final Folder/25_October.xlsx')

In [5]:
###################
# HYPERPARAMETERS #
###################

np.random.seed(0)
dev_set_proportion = 0.01
test_set_proportion = 0.01
train_set_proportion = 1 - (dev_set_proportion + test_set_proportion)
learning_rate = 0.001
number_epochs = 10000
epochs_between_prints = 100
epochs_between_saving_cost = 5
minibatch_size = np.inf
hidden_units_per_layer = 100
num_hidden_layers = 14

In [6]:
########################
# SAVE HYPERPARAMETERS #
########################
# hyperparameter_data = pd.read.excel('Trials/Hyperparameters.xlsx')


In [7]:
#############################
# CREATE AND CONDITION DATA #
#############################

# Convert the dataframe to numpy arrays for features and labels
features = crime_data.drop(columns=['categoryCode']).values.T
labels = crime_data.loc[:,'categoryCode'].values.reshape((-1,1)).T
# Drop all NAs that were caught in the transfer
feature_cols_with_nans = np.isnan(features).any(axis=0)
features = features[:,~feature_cols_with_nans]
labels = labels[:,~feature_cols_with_nans]
label_cols_with_nans = np.isnan(labels).any(axis=0)
features = features[:,~label_cols_with_nans]
labels = labels[:,~label_cols_with_nans]

n_x, m = features.shape
n_y = len(crime_data.loc[:,'categoryCode'].unique())

# Shuffle the data
order = np.argsort(np.random.random(m))
features = features[:,order]
labels = labels[:,order]

# One Hot Encode the Labels
one_hot = np.zeros((n_y,m))
one_hot[labels,np.arange(m)] = 1
labels = one_hot

# Split between train, dev, and test
# Data structure: [     TRAIN     ][ DEV ][ TEST ]
dev_start_index = int(train_set_proportion*m)
test_start_index = dev_start_index + int(dev_set_proportion*m)

X_train = features[:, 0:dev_start_index]
Y_train = labels[:, 0:dev_start_index]

X_dev = features[:, dev_start_index:test_start_index]
Y_dev = labels[:, dev_start_index:test_start_index]

X_test = features[:, test_start_index:]
Y_test = labels[:, test_start_index:]

# Normalize the inputs and outputs based on the training set mean and variance
x_mean = X_train.mean(axis=1).reshape(n_x,1)
x_variance = X_train.var(axis=1).reshape(n_x,1)

X_train = (X_train-x_mean)/x_variance
X_dev = (X_dev-x_mean)/x_variance
X_test = (X_test-x_mean)/x_variance

In [8]:
def random_mini_batches(X, Y, mini_batch_size = 64):
    # Creates a list of random minibatches from (X, Y)
    m = X.shape[1]
    mini_batches = []
    
    if mini_batch_size > m:
        mini_batches.append((X,Y))
    else:
        # Step 1: Shuffle (X, Y)
        permutation = list(np.random.permutation(m))
        shuffled_X = X[:, permutation]
        shuffled_Y = Y[:, permutation].reshape((1,m))

        # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
        num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
        for k in range(0, num_complete_minibatches):
            mini_batch_X = shuffled_X[:, k*mini_batch_size: (k+1)*(mini_batch_size)]
            mini_batch_Y = shuffled_Y[:, k*mini_batch_size: (k+1)*(mini_batch_size)]
            mini_batch = (mini_batch_X, mini_batch_Y)
            mini_batches.append(mini_batch)

        # Handling the end case (last mini-batch < mini_batch_size)
        if m % mini_batch_size != 0:
            mini_batch_X = shuffled_X[:, int(mini_batch_size*np.floor(m/mini_batch_size)): m]
            mini_batch_Y = shuffled_Y[:, int(mini_batch_size*np.floor(m/mini_batch_size)): m]
            mini_batch = (mini_batch_X, mini_batch_Y)
            mini_batches.append(mini_batch)
    
    return mini_batches

In [9]:
###################################
# CREATE NEURAL NETWORK STRUCTURE #
###################################

ops.reset_default_graph()

# Create placeholders for the featuers and labels
X = tf.placeholder(tf.float32, shape=(n_x, None), name='X')
Y = tf.placeholder(tf.int32, shape=(n_y, None), name='Y')

# Create the network parameters
parameters = {}
for layer in range(num_hidden_layers+1):
    previous_layer_size = (n_x if layer == 0 else hidden_units_per_layer)
    this_layer_size = (n_y if layer == num_hidden_layers else hidden_units_per_layer)
    W_name = 'W'+str(layer+1)
    b_name = 'b'+str(layer+1)
    parameters[W_name] = tf.get_variable(W_name,
                                         (this_layer_size,previous_layer_size),
                                         initializer=tf.contrib.layers.xavier_initializer(seed=1, uniform=False))
    parameters[b_name] = tf.get_variable(b_name,
                                         (this_layer_size,1),
                                         initializer=tf.zeros_initializer())

# Hook up the network layers
A = X
Z = X
for layer in range(num_hidden_layers+1):
    W = parameters['W'+str(layer+1)]
    b = parameters['b'+str(layer+1)]
    Z = W@A+b
    A = tf.nn.relu(Z)
Z_hat = Z
Y_hat = tf.argmax(tf.transpose(tf.nn.softmax(tf.transpose(Z_hat))), axis=0)

In [None]:
#################
# EXECUTE MODEL #
#################

# Calculate the cost from the network prediction
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.transpose(Z_hat),
                                                                 labels=tf.transpose(Y)))

# Create the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

# Formula for calculating set accuracy
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(Z_hat), tf.argmax(Y)), "float"))

costs = []
train_accuracies = []
dev_accuracies = []

# Run the tf session to train and test
init = tf.global_variables_initializer()
with tf.Session() as session:
    session.run(init)
    for epoch in range(number_epochs):
        epoch_cost = 0.
        num_minibatches = int(m / minibatch_size)
        if num_minibatches < 1: num_minibatches=1
        minibatches = random_mini_batches(X_train, Y_train, minibatch_size)
        for minibatch in minibatches:
            (minibatch_X, minibatch_Y) = minibatch
            _ , minibatch_cost = session.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
            epoch_cost += minibatch_cost / num_minibatches
        
        # Data Analysis
        if epoch % epochs_between_prints == 0:
            print('%i Epochs' % epoch)
            print('\tCost: ', epoch_cost)
            print('\tTrain Accuracy: ', accuracy.eval({X: X_train, Y: Y_train}))
            print('\tDev Accuracy: ', accuracy.eval({X: X_dev, Y: Y_dev}))

        if epoch % epochs_between_saving_cost == 0:
            costs.append(epoch_cost)
            train_accuracies.append(accuracy.eval({X: X_train, Y: Y_train}))
            dev_accuracies.append(accuracy.eval({X: X_dev, Y: Y_dev}))

    # Calculate the accuracy on the train and dev sets

    print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
    print ("Dev Accuracy:", accuracy.eval({X: X_dev, Y: Y_dev}))

# Plot cost
plt.plot(np.squeeze(costs))
plt.ylabel('Cost')
plt.xlabel('Iterations (by 5)')
plt.title('Learning rate = ' + str(learning_rate))
plt.show()

# Plot train and dev accuracy
plt.plot(np.squeeze(train_accuracies))
plt.plot(np.squeeze(dev_accuracies))
plt.ylabel('Accuracy')
plt.xlabel('Iterations (by 5)')
plt.title('Learning rate = ' + str(learning_rate))
plt.show()

0 Epochs
	Cost:  3.511787176132202
	Train Accuracy:  0.124658056
	Dev Accuracy:  0.12255603
