## Setup

In [1]:
import tensorflow as tf
import numpy as np
import math

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from util import import_dataset

address = '../../data/'
file_names = {}
file_names['images'] = 'full_data.npy'
file_names['subs'] = 'full_subredditlabels'
file_names['dict'] = 'full_subredditIndex'
file_names['nsfw'] = 'full_nsfwlabels'
data, dictionary = import_dataset(address, file_names)

In [3]:
print(data.X_train.shape)
print(data.X_val.shape)
print(data.X_test.shape)
print(data.y_train.shape)
print(data.y_val.shape)
print(data.y_test.shape)

(25450, 128, 128, 3)
(3181, 128, 128, 3)
(3182, 128, 128, 3)
(25450,)
(3181,)
(3182,)


## Run the Model

In [4]:
from googlenet import GoogleNet
from config import ModelConfig, TrainConfig

tf.reset_default_graph()

model_config = ModelConfig(eval_batch_size=3000, learning_rate=5e-5, output="nsfw")
train_config = TrainConfig(print_every=100, num_epochs=2, saver_address=r'../../saved_params/', \
    save_file_name = 'GoogleNet_nsfw_classification_1e-3_96', lr_decay=0.96)
model = GoogleNet(model_config)
sess = tf.Session()
model.train(data, sess, train_config)

---------------------------------------------------------
Batch 100/398 of epoch 1 finished in 19.121160 seconds
Batch 200/398 of epoch 1 finished in 17.089528 seconds
Batch 300/398 of epoch 1 finished in 17.118897 seconds
Epoch 1 training finished in 70.726463 seconds
train accuracy:89.8%
val accuracy:89.5%
Epoch 1 evaluation finished in 21.582336 seconds
---------------------------------------------------------
Batch 100/398 of epoch 2 finished in 17.122850 seconds
Batch 200/398 of epoch 2 finished in 17.132186 seconds
Batch 300/398 of epoch 2 finished in 17.114587 seconds
Epoch 2 training finished in 68.096556 seconds
train accuracy:94.1%
val accuracy:93.8%
Epoch 2 evaluation finished in 10.484437 seconds


## Load the Model

In [None]:
from googlenet import GoogleNet
from config import ModelConfig, TrainConfig
import pickle

# Reset Graph
tf.reset_default_graph()

# Create model instance
model_config = ModelConfig()
model = GoogleNet(model_config)

# Load Saved Model
sess = tf.Session()
saver = tf.train.Saver()
save_file = "../../saved_params/GoogleNet_nsfw_classification"
saver.restore(sess, save_file) 
saved_history = pickle.load(open(save_file + "_modelhist", 'rb'))
model.model_history = saved_history

# Test Model Accuracy
loss_train, acc_train = model.eval(data, sess, split='train')
loss_val, acc_val = model.eval(data, sess, split = 'val')

## Plot Graphs

In [None]:
model.plot_loss_acc(data)

## Hyperparameter Optimization

In [10]:
from googlenet import GoogleNet
from config import ModelConfig, TrainConfig
import pickle
import hyperopt as hpropt

def objective(args):
    model_config = ModelConfig(learning_rate=args['learning_rate'], keep_prob=args['keep_prob'], \
                               eval_batch_size=3000, output="nsfw")
    train_config = TrainConfig(num_epochs=args['num_epochs'], lr_decay=args['lr_decay'])
    
    tf.reset_default_graph()
    model = GoogleNet(model_config)
    
    sess = tf.Session()
    model.train(data, sess, train_config)
    cost, accuracy = model.eval(data, sess, "val")
    
    saveList = {
        'accuracy' : accuracy,
        'cost' : cost,
        'num_epochs' : args['num_epochs'],
        'learning_rate' : args['learning_rate'],
        'lr_decay' : args['lr_decay'],
        'keep_prob' : args['keep_prob']
    }
    print(saveList)
    pickle.dump(saveList, open("../../hprOpt/nsfw_" + str(accuracy) + ".dat", "wb"))
    model.plot_loss_acc(data)
    return cost

def optimize(space, max_evals=50):
    best = hpropt.fmin(objective, space, algo=hpropt.tpe.suggest, max_evals=max_evals)
    print(best)
    
    return best

In [None]:
import math

def get_learning_rate_range(base_10_min, base_10_max):
    scale_factor = 1 / math.log10(math.e)
    return hpropt.hp.loguniform('learning_rate', scale_factor * base_10_min, scale_factor * base_10_max)

space = {
        'num_epochs' : 6,
        'learning_rate' : get_learning_rate_range(-6, -1.5),
        'lr_decay' : hpropt.hp.uniform('lr_decay', 0.9, 1.0),
        'keep_prob' : hpropt.hp.uniform('keep_prob', 0.6, 1.0)
}

best_opt = optimize(space, 30)

---------------------------------------------------------
Batch 100/398 of epoch 1 finished in 18.164331 seconds
Batch 200/398 of epoch 1 finished in 17.632418 seconds
Batch 300/398 of epoch 1 finished in 17.634186 seconds
Epoch 1 training finished in 70.583467 seconds
train accuracy:92.5%
val accuracy:93.0%
Epoch 1 evaluation finished in 11.311978 seconds
---------------------------------------------------------
Batch 100/398 of epoch 2 finished in 17.559952 seconds
Batch 200/398 of epoch 2 finished in 17.696228 seconds
Batch 300/398 of epoch 2 finished in 17.729908 seconds
Epoch 2 training finished in 70.193845 seconds
train accuracy:93.4%
val accuracy:93.7%
Epoch 2 evaluation finished in 10.895666 seconds
---------------------------------------------------------
Batch 100/398 of epoch 3 finished in 17.591631 seconds
Batch 200/398 of epoch 3 finished in 17.563115 seconds
Batch 300/398 of epoch 3 finished in 17.568737 seconds
Epoch 3 training finished in 69.927465 seconds
train accura

## Evaluate on Test Set

In [None]:
# Get model predictions
y_test_pred = sess.run(model.prediction, {model.X_placeholder: data.X_test, model.y_placeholder: data.y_test, 
                                            model.is_training_placeholder:False})

y_test_pred = np.argmax(y_test_pred, axis = 1)

In [None]:
from util import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

classes = ["sfw", "nsfw"]
cm = confusion_matrix(data.y_test, y_test_pred)
plot_confusion_matrix(cm, classes, normalize=True, task="nsfw")

In [None]:
f1_score = get_f1_score(data.y_test, y_test_pred)
print(f1_score)

In [8]:
model.eval(data, sess, split="test")

test accuracy:94.3%


(800.43861389160156, 0.94311762073368188)