In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
from train import *
from loss import *
from accuracy import *
from model import *

import cifar_input
import os.path
import time
import numpy as np
import tensorflow as tf

In [None]:
max_steps = 200000
train_dir = 'cifar10_alexnet_model/'
batch_size = 128
log_device_placement = False

In [None]:
with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()
    
    # Generating images and its labels
    # build_inputs('cifar10/cifar100', cifar dataset dir, batch size, mode)
    images, labels = cifar_input.build_input('cifar10', '../../cifar/cifar10/data_batch*', batch_size, 'train')
    
    # Creating graph. NUM_CLASESS=10 (CIFAR-10) or NUM_CLASESS=100 (CIFAR-100)
    logits = inference(images, NUM_CLASSES=10)
    
    # Loss/Error and Accuracy
    losses = loss(logits, labels)
    accuracies = accuracy(logits, labels)
    
    # Our train_op (Only minimizing loss)
    train_op = train(losses, global_step, batch_size)
    
    
    # SessionRunHook. Logging will be done each x steps.
    class _LoggerHook(tf.train.SessionRunHook):
        
      def begin(self):
        self._step = -1
        # Creating train_dir if it does not exist and writing to log file
        if not os.path.exists(train_dir):
            os.makedirs(train_dir)
        open(train_dir+'training_data.csv', 'w').close()
        f = open(train_dir+"log.txt",'ab')
        f.write('\n\n==== Run ===\nInfo: Alexnet\n')
        f.close()

      def before_run(self, run_context):
        # Increment step, reset start_time, and asking for loss and accuracy tensor
        self._step += 1
        self._start_time = time.time()
        return tf.train.SessionRunArgs([losses, accuracies])

      def after_run(self, run_context, run_values):
        duration = time.time() - self._start_time # Calculating time
        loss_value = run_values.results[0]
        accuracy_value = run_values.results[1]
        # Printing log, accuracy, and loss
        if self._step % 10 == 0:
          print("{0}: step {1}, error = {2:.4f}, accuracy = {3:.4f}. ({4:.3f} sec/step)\n".format(
              datetime.now(), self._step, loss_value, accuracy_value, float(duration)))
                
          f = open(train_dir+"log.txt",'ab')
          f.write("{0}: step {1}, error = {2:.4f}, accuracy = {3:.4f}. ({4:.3f} sec/step)\n".format(
              datetime.now(), self._step, loss_value, accuracy_value, float(duration)))
          f.close()
                
          f = open(train_dir+"training_data.csv",'ab')
          f.write('{0},{1},{2}\n'.format(self._step, loss_value, accuracy_value))
          f.close()
            
    with tf.train.MonitoredTrainingSession(checkpoint_dir=train_dir,
                                           hooks=[tf.train.StopAtStepHook(last_step=max_steps),
                                                  tf.train.NanTensorHook(losses),
                                                  _LoggerHook()],save_checkpoint_secs=30, 
                                           config=tf.ConfigProto(
                                               log_device_placement=log_device_placement)) as mon_sess:
        while not mon_sess.should_stop():
            mon_sess.run(train_op)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into cifar10_alexnet_model/model.ckpt.
2017-03-13 18:45:36.836552: step 0, error = 17.3153, accuracy = 0.1406. (2.707 sec/step)

2017-03-13 18:45:40.436102: step 10, error = 16.9583, accuracy = 0.2266. (0.394 sec/step)

2017-03-13 18:45:44.348494: step 20, error = 16.7423, accuracy = 0.2031. (0.385 sec/step)

2017-03-13 18:45:48.471898: step 30, error = 16.4711, accuracy = 0.3125. (0.445 sec/step)

2017-03-13 18:45:53.185234: step 40, error = 16.4015, accuracy = 0.2188. (0.407 sec/step)

2017-03-13 18:45:58.230725: step 50, error = 16.1673, accuracy = 0.2500. (0.494 sec/step)

2017-03-13 18:46:03.268404: step 60, error = 15.7667, accuracy = 0.3281. (0.490 sec/step)

INFO:tensorflow:Saving checkpoints for 68 into cifar10_alexnet_model/model.ckpt.
2017-03-13 18:46:08.138950: step 70, error = 15.7079, accuracy = 0.3750. (0.504 sec/step)

2017-03-13 18:46:13.176954: step 80, error = 15.5807, accuracy = 0.3