# Milestone Model NSFW Classification

This code uses a classification model to classify an image as either not safe for work (nsfw) or safe for work (not nsfw). For the sake of the milestone we implement an Alex Net model and analyze the results. 

## Load Datasets

In [None]:
import tensorflow as tf
import numpy as np
import math
import timeit
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Load data and split into training validation and testing sets

# Function for permuting and splitting data into training, developement, and test
def import_dataset(address, file_names, train_percent = 80, dev_percent = 10):
    SEED = 455
    random.seed(SEED)
    # Read csv file and create a list of tuples
    with open(address + file_names['images']) as file:
        images = pickle.load(file)
    with open(address + file_names['nsfw']) as file_2:
        subs = pickle.load(file_2)
    with open(address + file_names['dict']) as file_3:
        dictionary = pickle.load(file_3)
    # Mix data and split into tran, dev, and test sets
    N,W,H,C = np.shape(images)
    indices = np.arange(N)
    random.shuffle(indices)
    images = images[indices]
    subs = subs[indices]
    #length = len(data)
    train_end = int(train_percent*N/100)
    dev_end = train_end + int(dev_percent*N/100)
    X_train = images[:train_end]
    y_train = subs[:train_end]
    X_val = images[train_end:dev_end]
    y_val = subs[train_end:dev_end]
    X_test = images[dev_end:]
    y_test = subs[dev_end:]
    
    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    return X_train, y_train, X_val, y_val, X_test, y_test

# Form training, developement, and testing data sets
address = ''
file_names = {}
file_names['images'] = ''
file_names['nsfw'] = ''
file_names['dict'] = ''
X_train, y_train, X_val, y_val, X_test, y_test = import_dataset(address, file_names)

# Print the sizes as a sanity check
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

## Define AlexNet model 

* 11x11 convolutional layer with 96 filters and a stride of 4
* ReLU activation
* 3x3 max pooling with a stride of 2
* batch normalization


* 5x5 convolutional layer with 256 filters and a stride of 1
* ReLU activation
* 3x3 max pooling with a stride of 2
* batch normalization


* 3x3 convolutional layer with 384 filters and a stride of 1
* ReLU activation
* 3x3 convolutional layer with 384 filters and a stride of 1
* ReLU activation 
* 3x3 convolutional layer with 256 filters and a stride of 1
* ReLU activation
* 3x3 max pooling with a stride of 2


* affine layer from 4096 to 1792
* ReLU activation
* affine layer from 1792 to 1792
* ReLU activation
* affine layer from 1792 to 2

In [None]:
# clear old variables
tf.reset_default_graph()

# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 128, 128, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
Ncategories = 2

def AlexNet(X, y, is_training):
    # define our weights (e.g. init_two_layer_convnet)
    
    # Convolutional Variables
    Wconv1 = tf.get_variable("Wconv1", shape=[11, 11, 3, 96])
    bconv1 = tf.get_variable("bconv1", shape=[96])
    Wconv2 = tf.get_variable("Wconv2", shape=[5, 5, 96, 256])
    bconv2 = tf.get_variable("bconv2", shape=[256])
    Wconv3 = tf.get_variable("Wconv3", shape=[3, 3, 256, 384])
    bconv3 = tf.get_variable("bconv3", shape=[384])
    Wconv4 = tf.get_variable("Wconv4", shape=[3, 3, 384, 384])
    bconv4 = tf.get_variable("bconv4", shape=[384])
    Wconv5 = tf.get_variable("Wconv5", shape=[3, 3, 384, 256])
    bconv5 = tf.get_variable("bconv5", shape=[256])
    
    # Fully Connected Variables
    W1 = tf.get_variable("W1", shape=[4096, 1792])
    b1 = tf.get_variable("b1", shape=[1792])
    W2 = tf.get_variable("W2", shape=[1792, 1792])
    b2 = tf.get_variable("b2", shape=[1792])
    W3 = tf.get_variable("W3", shape=[1792, Ncategories])
    b3 = tf.get_variable("b3", shape=[Ncategories])

    # define our graph (e.g. AlexNet)
    a1 = tf.nn.conv2d(X, Wconv1, strides=[1,4,4,1], padding='SAME') + bconv1       #Out N/4 x N/4
    h1 = tf.nn.relu(a1)
    mp1 = tf.nn.max_pool(h1, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME')   #Out N/2 x N/2
    bn1 = tf.layers.batch_normalization(mp1, training=is_training)
        
    a2 = tf.nn.conv2d(bn1, Wconv2, strides=[1,1,1,1], padding='SAME') + bconv2     #Out N x N
    h2 = tf.nn.relu(a2)
    mp2 = tf.nn.max_pool(h2, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME')   #Out N/2 x N/2
    bn2 = tf.layers.batch_normalization(mp2, training=is_training)              
    
    a3 = tf.nn.conv2d(bn2, Wconv3, strides=[1,1,1,1], padding='SAME') + bconv3     #Out N x N
    h3 = tf.nn.relu(a3)
    a4 = tf.nn.conv2d(h3, Wconv4, strides=[1,1,1,1], padding='SAME') + bconv4      #Out N x N
    h4 = tf.nn.relu(a3)
    a5 = tf.nn.conv2d(h4, Wconv5, strides=[1,1,1,1], padding='SAME') + bconv5      #Out N x N
    h5 = tf.nn.relu(a5)
    mp3 = tf.nn.max_pool(h5, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME')   #Out N/2 x N/2
    
    mp_flat = tf.reshape(mp3,[-1,4096])
    aff1 = tf.matmul(mp_flat, W1) + b1
    h6 = tf.nn.relu(aff1)
    aff2 = tf.matmul(h6, W2) + b2
    h7 = tf.nn.relu(aff2)
    y_out = tf.matmul(h7, W3) + b3
    
    return y_out

# Define y_out in graph
y_out = AlexNet(X, y, is_training)

# define our loss
total_loss = tf.losses.hinge_loss(tf.one_hot(y,Ncategories),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)

# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)

# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)

## Test the Size of Your Output as Sanity Check

Input random numbers into X for feed dictionary and check the size of the output makes sense. 

In [None]:
# Now we're going to feed a random batch into the model 
# and make sure the output is the right size
x = np.random.randn(64, 128, 128,3)
with tf.Session() as sess:
    with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0"
        tf.global_variables_initializer().run()

        ans = sess.run(y_out,feed_dict={X:x,is_training:True})
        print(ans.shape)
        print(np.array_equal(ans.shape, np.array([64, 20])))

## Train the Model

In [None]:
# Run the model returning total loss and total correct
def run_model(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

sess = tf.Session()

sess.run(tf.global_variables_initializer())
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,10,64,100,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)

## Run the Model on the Training and Validation Data

In [None]:
print('Training')
run_model(sess,y_out,mean_loss,X_train,y_train,1,64)
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val,1,64)

## Run the Model on the Test Data

This is done only once after determining hyperparameters on the developement set. 

In [None]:
print('Test')
run_model(sess,y_out,mean_loss,X_test,y_test,1,64)