## Setup

In [1]:
import tensorflow as tf
import numpy as np
import math

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from util import import_dataset

address = '../../data/'
file_names = {}
file_names['images'] = 'full_data.npy'
file_names['subs'] = 'full_subredditlabels'
file_names['dict'] = 'full_subredditIndex'
file_names['nsfw'] = 'full_nsfwlabels'
data, dictionary = import_dataset(address, file_names)

In [3]:
print(data.X_train.shape)
print(data.X_val.shape)
print(data.X_test.shape)
print(data.y_train.shape)
print(data.y_val.shape)
print(data.y_test.shape)

(25450, 128, 128, 3)
(3181, 128, 128, 3)
(3182, 128, 128, 3)
(25450,)
(3181,)
(3182,)


## Run the Model

In [7]:
from googlenet import GoogleNet
from config import ModelConfig, TrainConfig

tf.reset_default_graph()

model_config = ModelConfig(eval_batch_size=3000, learning_rate=5e-5, output="nsfw")
train_config = TrainConfig(print_every=100, num_epochs=8, saver_address=r'../../saved_params/', \
    save_file_name = 'GoogleNet_nsfw_classification_1e-3_96', lr_decay=0.96)
model = GoogleNet(model_config)
sess = tf.Session()
model.train(data, sess, train_config)

---------------------------------------------------------
Batch 100/398 of epoch 1 finished in 33.316869 seconds
Batch 200/398 of epoch 1 finished in 29.653357 seconds
Batch 300/398 of epoch 1 finished in 29.701008 seconds
Epoch 1 training finished in 121.799803 seconds
train accuracy:94.8%
val accuracy:94.8%
Epoch 1 evaluation finished in 22.952360 seconds
---------------------------------------------------------
Batch 100/398 of epoch 2 finished in 29.581750 seconds
Batch 200/398 of epoch 2 finished in 29.602077 seconds
Batch 300/398 of epoch 2 finished in 29.638256 seconds
Epoch 2 training finished in 117.849646 seconds
train accuracy:95.1%
val accuracy:94.8%
Epoch 2 evaluation finished in 21.322258 seconds
---------------------------------------------------------
Batch 100/398 of epoch 3 finished in 29.522658 seconds
Batch 200/398 of epoch 3 finished in 29.414610 seconds
Batch 300/398 of epoch 3 finished in 29.271164 seconds
Epoch 3 training finished in 116.902187 seconds
train acc

KeyboardInterrupt: 

In [8]:
model.eval(data, sess, split="test")

test accuracy:94.3%


(800.43861389160156, 0.94311762073368188)

## Load the Model

In [None]:
from googlenet import GoogleNet
from config import ModelConfig, TrainConfig
import pickle

# Reset Graph
tf.reset_default_graph()

# Create model instance
model_config = ModelConfig()
model = GoogleNet(model_config)

# Load Saved Model
sess = tf.Session()
saver = tf.train.Saver()
save_file = "../../saved_params/GoogleNet_nsfw_classification"
saver.restore(sess, save_file) 
saved_history = pickle.load(open(save_file + "_modelhist", 'rb'))
model.model_history = saved_history

# Test Model Accuracy
loss_train, acc_train = model.eval(data, sess, split='train')
loss_val, acc_val = model.eval(data, sess, split = 'val')

## Plot Graphs

In [None]:
model.plot_loss_acc(data)

In [None]:
# Get model predictions
y_val_pred = sess.run(model.prediction, {model.X_placeholder: data.X_val, model.y_placeholder: data.y_val, 
                                            model.is_training_placeholder:False})

y_val_pred = np.argmax(y_val_pred, axis = 1)

In [None]:
from util import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

classes = ["sfw", "nsfw"]
cm = confusion_matrix(data.y_val, y_val_pred)
plot_confusion_matrix(cm, classes, normalize=True)

## Hyperparameter Optimization

In [None]:
import hyperopt as hpropt

def objective(args):
    model_config = ModelConfig(learning_rate=args['learning_rate'], sbrd_weight=args['sbrd_weight'], keep_prob=args['keep_prob'])
    train_config = TrainConfig(num_epochs=args['num_epochs'], lr_decay=args['lr_decay'])
    
    tf.reset_default_graph()
    model = GoogleNet(model_config)
    
    sess = tf.Session()
    model.train(data, sess, train_config)
    cost, accuracy_sbrd, accuracy_nsfw = model.eval(data, sess, "val")
    
    saveList = {
        'accuracy_sbrd' : accuracy_sbrd,
        'accuracy_nsfw' : accuracy_nsfw,
        'cost' : cost,
        'num_epochs' : args['num_epochs'],
        'learning_rate' : args['learning_rate'],
        'lr_decay' : args['lr_decay'],
        'sbrd_weight' : args['sbrd_weight'],
        'keep_prob' : args['keep_prob']
    }    
    pickle.dump(saveList, open("../../hprOpt/" + str(accuracy_sbrd) + "_"  + str(accuracy_nsfw) + "_" + str(cost) + ".dat", "wb"))
    model.plot_loss_acc(data)
    return cost

def optimize(space, max_evals=50):
    
    space = {
        'num_epochs' : hpropt.hp.randint('num_epochs', 20),
        'learning_rate' : hpropt.hp.loguniform('learning_rate', -4, -1),
        'lr_decay' : hpropt.hp.uniform('lr_decay', 0.9, 1.0),
        'sbrd_weight' : hpropt.hp.uniform('sbrd_weight', 0.5, 1.0),
        'keep_prob' : hpropt.hp.uniform('keep_prob', 0.3, 1.0)
    }
    
    best = hpropt.fmin(objective, space, algo=hpropt.tpe.suggest, max_evals=max_evals)
    print(best)
    
    return best

In [None]:
space = {
    'num_epochs' : 1,
    'learning_rate' : hpropt.hp.loguniform('learning_rate', -4, -1),
    'lr_decay' : hpropt.hp.uniform('lr_decay', 0.9, 1),
    'sbrd_weight' : hpropt.hp.uniform('sbrd_weight', 0.5, 1)
}
optimize(space, 2)