In [1]:
import tensorflow as tf
import math
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def one_hot_matrix(labels, C):
    """
    Creates a matrix where the i-th row corresponds to the ith class number and the jth column
                     corresponds to the jth training example. So if example j had a label i. Then entry (i,j) 
                     will be 1. 
                     
    Arguments:
    labels -- vector containing the labels 
    C -- number of classes, the depth of the one hot dimension
    
    Returns: 
    one_hot -- one hot matrix
    """
    
    ### START CODE HERE ###
    
    # Create a tf.constant equal to C (depth), name it 'C'. (approx. 1 line)
    C = tf.constant(C, name="C")
    
    # Use tf.one_hot, be careful with the axis (approx. 1 line)
    one_hot_matrix = tf.one_hot(labels, depth=C, axis=0)
    
    # Create the session (approx. 1 line)
    sess = tf.Session()
    
    # Run the session (approx. 1 line)
    one_hot = sess.run(one_hot_matrix)
    
    # Close the session (approx. 1 line). See method 1 above.
    sess.close()
    
    ### END CODE HERE ###
    
    return one_hot

In [3]:
def create_placeholders(n_x, n_y):
    """
    Creates the placeholders for the tensorflow session.
    
    Arguments:
    n_x -- scalar, size of an image vector (num_px * num_px = 64 * 64 * 3 = 12288)
    n_y -- scalar, number of classes (from 0 to 5, so -> 6)
    
    Returns:
    X -- placeholder for the data input, of shape [n_x, None] and dtype "float"
    Y -- placeholder for the input labels, of shape [n_y, None] and dtype "float"
    
    Tips:
    - You will use None because it let's us be flexible on the number of examples you will for the placeholders.
      In fact, the number of examples during test/train is different.
    """

    ### START CODE HERE ### (approx. 2 lines)
    X = tf.placeholder(tf.float32, [n_x, None], name="X")
    Y = tf.placeholder(tf.float32, [n_y, None], name="Y")
    ### END CODE HERE ###
    
    return X, Y

In [4]:
def initialize_parameters():
    parameters = {}
    parameters['W1'] = tf.get_variable("W1", [25,370], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    parameters['b1'] = tf.get_variable("b1", [25,1], initializer = tf.zeros_initializer())
    parameters['W2'] = tf.get_variable("W2", [12,25], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    parameters['b2'] = tf.get_variable("b2", [12,1], initializer = tf.zeros_initializer())
    parameters['W3'] = tf.get_variable("W3", [1,12], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    parameters['b3'] = tf.get_variable("b3", [1,1], initializer = tf.zeros_initializer())    
    return parameters

In [5]:
def forward_propagation(X, parameters):
    Z1 = tf.add(tf.matmul(parameters['W1'], X), parameters['b1'])       # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)                 # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(parameters['W2'], A1), parameters['b2'])  # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)                 # A2 = relu(Z2)
    Z3 = tf.add(tf.matmul(parameters['W3'], A2), parameters['b3'])  # Z3 = np.dot(W3,Z2) + b3
    return Z3

In [6]:
# Before reduce mean need to multiply labels by whatever weighting factor we want
# same effect as having more 1 examples

def compute_cost(Z3, Y, wt):
    logits = tf.transpose(Z3)
    labels = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(targets = labels, logits = logits, pos_weight = wt))
    return cost

In [7]:
def random_minibatches(X_train, Y_train, minibatch_size, num_minibatches, seed):
    np.random.seed(seed)
    indices = np.arange(X_train.shape[1])
    np.random.shuffle(indices)
    
    result = []
    for idx in range(num_minibatches):
        batch_X = np.zeros((X_train.shape[0], minibatch_size))
        batch_Y = np.zeros((Y_train.shape[0], minibatch_size))
        for batch_idx in range(minibatch_size):
            shuffled_idx = indices[idx * minibatch_size + batch_idx]
            batch_X[:, batch_idx] = X_train[:, shuffled_idx]
            batch_Y[:, batch_idx] = Y_train[:, shuffled_idx]
        result.append((batch_X, batch_Y))
    return result

In [8]:
def model(X_train, Y_train, X_dev, Y_dev, learning_rate = 0.0001,
          num_epochs = 10, minibatch_size = 32, print_cost = True, pos_weight = 1):

    X, Y = create_placeholders(X_train.shape[0], Y_train.shape[0])
    m = X_train.shape[1]
    seed = 12345
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    cost = compute_cost(Z3, Y, pos_weight)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(num_epochs):
            print "At epoch", epoch
            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_minibatches(X_train, Y_train, minibatch_size, num_minibatches, seed)
            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch
                _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
                epoch_cost += minibatch_cost / num_minibatches
        parameters = sess.run(parameters)
        
        # Can change this to be AUC, precision, recall or whatever else we want
        correct_prediction = tf.equal(tf.round(tf.sigmoid(forward_propagation(X, parameters))), Y)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print ("Test Accuracy:", accuracy.eval({X: X_dev, Y: Y_dev}))
        
        return parameters

In [9]:
# Load Training Data
training_data = np.load("Train_Set.npy")
dev_data = np.load("Dev_Set.npy")

# Separate data into features and labels
num_feats = training_data.shape[1]
training_feats = training_data[:, 0:num_feats-1]
training_labels = training_data[:, num_feats-1]
training_labels = np.reshape(training_labels, (training_labels.shape[0],1))
dev_feats = dev_data[:, 0:num_feats-1]
dev_labels = dev_data[:, num_feats-1]
dev_labels = np.reshape(dev_labels, (dev_labels.shape[0],1))

#print (num_feats)
#print (training_feats.shape)

X_train = np.transpose(training_feats)
X_dev = np.transpose(dev_feats)

# Apply pos_ex_const to training examples
Y_train = np.transpose(training_labels)
Y_dev = np.transpose(dev_labels)

#print (training_feats.shape)
#print (training_labels.shape)
#print (Y_train.shape)
#print (Y_train)

# 5 appears to be the optimal weighting
parameters = model(X_train, Y_train, X_dev, Y_dev, pos_weight = 5)

  from ._conv import register_converters as _register_converters


At epoch 0
At epoch 1
At epoch 2
At epoch 3
At epoch 4
At epoch 5
At epoch 6
At epoch 7
At epoch 8
At epoch 9
('Train Accuracy:', 0.94874704)
('Test Accuracy:', 0.94580376)
