In [1]:
from __future__ import print_function
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import numpy as np
import os

In [2]:
pickle_file = 'fer2013.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save
    
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Valid set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (28709, 48, 48) (28709,)
Valid set (3589, 48, 48) (3589,)
Test set (3589, 48, 48) (3589,)


In [3]:
image_size = 48
num_labels = 7
num_channels = 1


def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size, image_size, num_channels)).astype(np.float32)
    labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
    return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
    
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (28709, 48, 48, 1) (28709, 7)
Validation set (3589, 48, 48, 1) (3589, 7)
Test set (3589, 48, 48, 1) (3589, 7)


In [4]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [5]:
batch_size = 50
kernel_size = 5
c3_kernel = 4
c1_depth = 64
c2_depth = 96
c3_depth = 128
c4_depth = 256
fc1_nodes = 2048
fc2_nodes = 2048

beta = 0.001
starter_learning_rate = 0.0001

alexnet_simple = tf.Graph()

with alexnet_simple.as_default():
    #Input data
    
    tf_train_dataset = tf.placeholder(dtype=tf.float32, 
        shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(dtype=tf.float32,
        shape=(batch_size, num_labels))
    
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    tf_valid_labels = tf.constant(valid_labels)
    
      # Variables.
    c1_weights = tf.Variable(tf.truncated_normal(
      [kernel_size, kernel_size, num_channels, c1_depth], stddev=0.1))
    c1_bias = tf.Variable(tf.zeros([c1_depth]))

    c2_weights = tf.Variable(tf.truncated_normal(
        [kernel_size, kernel_size, c1_depth, c2_depth], stddev=0.1))
    c2_bias = tf.Variable(tf.zeros([c2_depth]))
    
    c3_weights = tf.Variable(tf.truncated_normal(
        [c3_kernel, c3_kernel, c2_depth, c3_depth], stddev=0.1))
    c3_bias = tf.Variable(tf.zeros([c3_depth]))
    
    c4_weights = tf.Variable(tf.truncated_normal(
        [c3_kernel, c3_kernel, c3_depth, c4_depth], stddev=0.1))
    c4_bias = tf.Variable(tf.zeros([c4_depth]))
    
    fc1_weights = tf.Variable(tf.truncated_normal(
                [image_size //4 * image_size //4 * c4_depth, fc1_nodes], stddev=0.1))
    fc1_bias = tf.Variable(tf.zeros([fc1_nodes]))
    
    fc2_weights = tf.Variable(tf.truncated_normal(
            [fc1_nodes, fc2_nodes], stddev=0.1))
    fc2_bias = tf.Variable(tf.zeros([fc2_nodes]))
                          
    hidden_weights = tf.Variable(tf.truncated_normal(
            [fc1_nodes, num_labels], stddev=0.1))
    hidden_bias = tf.Variable(tf.zeros([num_labels]))
                    
   
    keep_prob = tf.placeholder(tf.float32)
                          
    def model(data, drop=False):
        conv = tf.nn.conv2d(data, c1_weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + c1_bias)
        pooled = tf.nn.max_pool(hidden, ksize=[1,3,3,1],
                                strides=[1,2,2,1], padding='SAME')
        #pooled_norm = tf.nn.local_response_normalization(pooled)
                          
        conv = tf.nn.conv2d(pooled, c2_weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + c2_bias)
        pooled = tf.nn.max_pool(hidden, ksize=[1,3,3,1],
                                strides=[1,2,2,1], padding='SAME')
        #pooled_norm = tf.nn.local_response_normalization(pooled)
        
        conv = tf.nn.conv2d(pooled, c3_weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + c3_bias)
        
        conv = tf.nn.conv2d(hidden, c4_weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + c4_bias)
        
        #hidden_norm = tf.nn.local_response_normalization(hidden)
        
        shape = hidden.get_shape().as_list()
                          
        reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
        if drop:
            reshape_drop = tf.nn.dropout(reshape, keep_prob=keep_prob)

            fc1 = tf.nn.relu(tf.matmul(reshape_drop, fc1_weights) + fc1_bias)
            fc1_drop = tf.nn.dropout(fc1, keep_prob=keep_prob)
            
            #fc2 = tf.nn.relu(tf.matmul(fc1_drop, fc2_weights) + fc2_bias)
            #fc2_drop = tf.nn.dropout(fc2, keep_prob=keep_prob)
            return tf.matmul(fc1_drop, hidden_weights) + hidden_bias
            
        else:
            fc1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_bias)
            #fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_bias)
            return tf.matmul(fc1, hidden_weights) + hidden_bias           
    
    logits = model(tf_train_dataset, False)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
    l2_loss = beta * ( tf.nn.l2_loss(c1_weights) + tf.nn.l2_loss(c1_bias) +
                   tf.nn.l2_loss(c2_weights) + tf.nn.l2_loss(c2_bias) +
                   tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_bias) +
                   tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_bias) +
                   tf.nn.l2_loss(hidden_weights) + tf.nn.l2_loss(hidden_bias) +
                   tf.nn.l2_loss(c3_weights) + tf.nn.l2_loss(c3_bias)  +
                   tf.nn.l2_loss(c4_weights) + tf.nn.l2_loss(c4_bias)  
                 )

    # global_step = tf.Variable(0, trainable=False)
    
    #learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
    #                                           1000, 0.96, staircase=True)

    optimizer = tf.train.AdamOptimizer(starter_learning_rate).minimize(loss+l2_loss)
                          
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_prediction, tf_valid_labels))
    
    test_prediction = tf.nn.softmax(model(tf_test_dataset))
    
    summary = tf.scalar_summary("training_error", loss)
    validation_summary = tf.scalar_summary("test_error", valid_loss)
    saver = tf.train.Saver()

In [None]:
num_steps = 140001
model_name = 'more_convolution_l2_1e3_r1e4'
train_writer = tf.train.SummaryWriter('./summary/'+ model_name)
#valid_writer = tf.train.SummaryWriter('./summary/valid')


with tf.Session(graph=alexnet_simple) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  initial_step = 0

  ## Load saved checkpoint
  ckpt = tf.train.get_checkpoint_state(
    os.path.dirname('./checkpoints/' + model_name + '/checkpoint'))

  if ckpt and ckpt.model_checkpoint_path:
    # Restore from checkpoint
    saver.restore(session, ckpt.model_checkpoint_path)
    initial_step = int(ckpt.model_checkpoint_path.rsplit('-', 1)[1])
    print("Restore from: ", ckpt.model_checkpoint_path)
  else:
    try:
        os.mkdir('./checkpoints/' + model_name)
    except:
        print("It's ok")
    
  for step in range(initial_step, num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob: 0.5}
    _, l, predictions, = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    
    if (step > 0 and step % 100 == 0):
      train_summ, valid_summ = session.run([summary, validation_summary], feed_dict=feed_dict)
      train_writer.add_summary(valid_summ, step)
      train_writer.add_summary(train_summ, step)
    
    if (step % 500 == 0):
    
      # print('Learning rate: %f' % (rate))
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
        
      #valid_predict = session.run([valid_prediction], feed_dict=feed_dict)
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
    if (step % 1000 == 0):
        saver.save(session, './checkpoints/' + model_name + '/' + model_name, global_step=step)
  #test_predict = session.run([test_prediction], feed_dict=feed_dict)
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
  saver.save(session, './checkpoints/' + model_name, global_step=num_steps)
  session.close()


train_writer.close()

#valid_writer.close()

Initialized
Restore from:  ./checkpoints/more_convolution_l2_1e3_r1e4/more_convolution_l2_1e3_r1e4-100000
Minibatch loss at step 100000: 0.001110
Minibatch accuracy: 100.0%
Validation accuracy: 50.1%
