In [22]:
#Load the usual suspects

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [23]:
# Load the train data

with np.load('/Users/David/Desktop/EPFL Applied ML/cifar4-train.npz', allow_pickle=False) as npz_file:
    pixels = npz_file['pixels'].astype('float32')
    overfeat = npz_file['overfeat']
    labels = npz_file['labels']
    names = npz_file['names']
    allow = npz_file['allow_pickle']
    
print('Pixels : {:}'.format(pixels.shape),
      'Overfeat: {:}'.format(overfeat.shape),
      'Labels: {:}'.format(labels.shape),
      'Names: {:}'.format(names.shape), 
      'Allow pickle: {:}'.format(allow.shape))

Pixels : (5000, 3072) Overfeat: (5000, 4096) Labels: (5000,) Names: (4,) Allow pickle: ()


In [24]:
# Load the test data

with np.load('/Users/David/Desktop/EPFL Applied ML/cifar4-test.npz', allow_pickle=False) as npz_file:
    pixel_te = npz_file['pixels'].astype('float32')
    overfeat_te = npz_file['overfeat']
    allow_te = npz_file['allow_pickle']
    
print('Pixels : {:}'.format(pixels.shape),
      'Overfeat: {:}'.format(overfeat.shape),
      'Allow pickle: {:}'.format(allow.shape))

Pixels : (5000, 3072) Overfeat: (5000, 4096) Allow pickle: ()


In [25]:
# Transform the train data by scaling to improve performance of the gradient descent optimizer

scaler = StandardScaler().fit(overfeat)
X_train_standard = scaler.transform(overfeat)
X_test_standard = scaler.transform(overfeat_te)

In [26]:
# Define a function that will be used to select batches of data, using a Python generator

def get_batches(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y)) # 1,2,...,n
    np.random.shuffle(shuffled_idx)

    # Enumerate indexes by steps of batch_size
    # i: 0, b, 2b, 3b, 4b, .. where b is the batch size
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        yield X[batch_idx], y[batch_idx]

In [36]:
# Define the graph
graph = tf.Graph()

with graph.as_default():
    # Create placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 4096]) # dimensions set to 4096, as they correspond to the overfeat shape
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    dropout = tf.placeholder(dtype=tf.bool) # placeholder to pass to layers.dropout in order to deactivate some neurons
    alpha = tf.placeholder(dtype=tf.float32) # placeholder to pass as regularization term
    
    # Hidden layer with 64 units
    hidden = tf.layers.dense(
        X, 64, activation=tf.nn.relu, # ReLU activation
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0), # initializer for layers with ReLU activation
        bias_initializer=tf.zeros_initializer(), # all zeros initializer
        name='hidden'
    )

    # Apply dropout to hidden layer
    hidden = tf.layers.dropout(
        hidden, rate=0.5, seed=0, training=dropout)
    
    # Get weights/biases of the hidden layer
    with tf.variable_scope('hidden', reuse=True):
        W1 = tf.get_variable('kernel')
        b1 = tf.get_variable('bias')    
    
    # Output layer with 4 logits
    logits = tf.layers.dense(
        hidden, 4, activation=None, # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0), # initializer for layers without ReLU activation
        bias_initializer=tf.zeros_initializer(), # all zeros initializer
        name='output'
    )

    # Get weights/biases of the output layer
    with tf.variable_scope('output', reuse=True):
        W2 = tf.get_variable('kernel')
        b2 = tf.get_variable('bias')
    
    # Loss fuction: mean cross-entropy with regularization term
    mean_ce = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=y, logits=logits)) # mean cross-entropy
    l2_term = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W1) # L2 term which includes the hidden and output weight matrices
    loss = mean_ce + alpha * l2_term # total loss with penalization
    
    # Gradient descent parameters
    lr = tf.placeholder(dtype=tf.float32) # learning rate
    gd = tf.train.GradientDescentOptimizer(learning_rate=lr) # gradient descent algorithm

    # Minimize the loss function (cross-entropy with L2 regularization)
    train_op = gd.minimize(loss)

    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32) # Class with maximum logit
    is_correct = tf.equal(y, predictions) # Compare predictions to target values
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32)) # mean value of correctly predicted logits

In [50]:
# Rerun the neural network with the tuned values found in for this fully connected model. 

train_acc_values = [] # empty list to fill with mean training accuracy across batches

with tf.Session(graph=graph) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())

    # Set seed
    np.random.seed(0)
            
    for epoch in range(40):
        # Accuracy values (train) after each batch
        batch_acc = []

        # Get batches of data of size 64
        for X_batch, y_batch in get_batches(X_train_standard, labels, 64):
            # Run training and evaluate accuracy
            _, acc_value = sess.run([train_op, accuracy], feed_dict={
                X: X_batch,
                y: y_batch,
                lr: 0.01, # Learning rate
                alpha: 0.0001, # Regularization term
                dropout: True # Apply dropout for the training set
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)
        train_acc_values.append(np.mean(batch_acc))

        # Print progress for the epochs
        print('Epoch {} - train: {:.3f} (mean)'.format(
            epoch+1, np.mean(batch_acc)
        ))        
 
    # Predictions
    class_prediction = sess.run(predictions, feed_dict={
        X: X_test_standard,
        dropout: False # do not apply dropout because it is the test set, and we are not fitting anything
    })

Epoch 1 - train: 0.662 (mean)
Epoch 2 - train: 0.775 (mean)
Epoch 3 - train: 0.801 (mean)
Epoch 4 - train: 0.819 (mean)
Epoch 5 - train: 0.827 (mean)
Epoch 6 - train: 0.829 (mean)
Epoch 7 - train: 0.838 (mean)
Epoch 8 - train: 0.849 (mean)
Epoch 9 - train: 0.854 (mean)
Epoch 10 - train: 0.864 (mean)
Epoch 11 - train: 0.861 (mean)
Epoch 12 - train: 0.867 (mean)
Epoch 13 - train: 0.874 (mean)
Epoch 14 - train: 0.877 (mean)
Epoch 15 - train: 0.881 (mean)
Epoch 16 - train: 0.880 (mean)
Epoch 17 - train: 0.893 (mean)
Epoch 18 - train: 0.887 (mean)
Epoch 19 - train: 0.892 (mean)
Epoch 20 - train: 0.889 (mean)
Epoch 21 - train: 0.894 (mean)
Epoch 22 - train: 0.902 (mean)
Epoch 23 - train: 0.906 (mean)
Epoch 24 - train: 0.912 (mean)
Epoch 25 - train: 0.907 (mean)
Epoch 26 - train: 0.909 (mean)
Epoch 27 - train: 0.911 (mean)
Epoch 28 - train: 0.918 (mean)
Epoch 29 - train: 0.914 (mean)
Epoch 30 - train: 0.919 (mean)
Epoch 31 - train: 0.923 (mean)
Epoch 32 - train: 0.929 (mean)
Epoch 33 - train:

In [53]:
np.save('predictions', class_prediction, allow_pickle=False) # save the predicted classes to a npy file