In [1]:
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
from keras.datasets import mnist
import os
import time
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
gpu_options = tf.GPUOptions(allow_growth=True)
# Enable eager mode. Once activated it cannot be reversed! Run just once.
tfe.enable_eager_execution(config=tf.ConfigProto(gpu_options=gpu_options))

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


Using TensorFlow backend.


In [2]:
# dataset loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# normalization of dataset
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# flatten the dataset
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))

y_train = y_train.astype('int32')
y_test = y_test.astype('int32')

print('x train', x_train.shape)
print('y train', y_train.shape)
print('x test', x_test.shape)
print('y test', y_test.shape)

x train (60000, 28, 28, 1)
y train (60000,)
x test (10000, 28, 28, 1)
y test (10000,)


# 数据的迭代可以重复

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
train_dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)).shuffle(buffer_size=60000)
val_dataset = tf.data.Dataset.from_tensor_slices((x_test,y_test)).shuffle(buffer_size=10000)
'''
show single pic with iter
'''
for a,b in tfe.Iterator(train_dataset):
    print(type(a.numpy())),print(type(b.numpy()))
    break

train_dataset = train_dataset.batch(128).repeat(1)
val_dataset = val_dataset.batch(128).repeat(1)
# for _ in range(1000):
#     for i,j in enumerate(tfe.Iterator(val_dataset)):
#         print(i),print(j[0].shape,j[1].shape)
# for i,j in enumerate(tfe.Iterator(val_dataset)):
#     print(i),print(j['image'].shape)

<class 'numpy.ndarray'>
<class 'numpy.int32'>


In [4]:
class cla(tf.keras.Model):
    def __init__(self,num_class,checkpoint_directory):
        super(cla, self).__init__()
        """ Define here the layers used during the forward-pass 
            of the neural network.
        """   
        # Define the checkpoint directory
        self.checkpoint_directory = checkpoint_directory
        # Hidden layer.
        self.conv1 = tf.layers.Conv2D(16,(3,3),activation=tf.nn.relu)
        self.conv2 = tf.layers.Conv2D(32,(3,3),activation=tf.nn.relu)
        self.flatten = tf.layers.Flatten()
        self.dense_layer = tf.layers.Dense(50, activation=tf.nn.relu)
        # Output layer. No activation.
        self.output_layer = tf.layers.Dense(num_class, activation=None)
        
    def call(self,input):
        x = self.conv1(input)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.dense_layer(x)
        x = self.output_layer(x)
        return x
    
    def loss_fn(self,input,target):
        """ Defines the loss function used during 
            training.         
        """
        logits = self.call(input)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=target, logits=logits)
        return loss,logits
    
    def grads_fn(self,input,target):
        """ Dynamically computes the gradients of the loss value
            with respect to the parameters of the model, in each
            forward pass.
        """
        with tfe.GradientTape() as tape:
            loss,_ = self.loss_fn(input, target)
        return tape.gradient(loss, self.variables)
    
    def restore_model(self):
        """ Function to restore trained model.
        """
        # Run the model once to initialize variables
        dummy_input = tf.constant(tf.zeros((1,28,28,1)))
        dummy_pred = self.call(dummy_input)
        # Restore the variables of the model
        saver = tfe.Saver(self.variables)
        saver.restore(tf.train.latest_checkpoint
                      (self.checkpoint_directory))
    
    def save_model(self, global_step=0):
        """ Function to save trained model.
        """
        tfe.Saver(self.variables).save(self.checkpoint_directory, 
                                       global_step=global_step)   
    
    def compute_accuracy(self, input_data):
        """ Compute the accuracy on the input data.
        """
        with tf.device(self.device):
            acc = tfe.metrics.Accuracy()
            for images, targets in tfe.Iterator(input_data):
                # Predict the probability of each class
                logits = self.call(images)
                # Select the class with the highest probability
                preds = tf.argmax(logits, axis=1)
                # Compute the accuracy
                acc(tf.reshape(targets, [-1,]), preds)
        return acc
    
    def fit(self, training_data, eval_data, optimizer, num_epochs=500, 
            early_stopping_rounds=10, verbose=10, train_from_scratch=False):
        """ Function to train the model, using the selected optimizer and
            for the desired number of epochs. You can either train from scratch
            or load the latest model trained. Early stopping is used in order to
            mitigate the risk of overfitting the network.
            
            Args:
                training_data: the data you would like to train the model on.
                                Must be in the tf.data.Dataset format.
                eval_data: the data you would like to evaluate the model on.
                            Must be in the tf.data.Dataset format.
                optimizer: the optimizer used during training.
                num_epochs: the maximum number of iterations you would like to 
                            train the model.
                early_stopping_rounds: stop training if the loss on the eval 
                                       dataset does not decrease after n epochs.
                verbose: int. Specify how often to print the loss value of the network.
                train_from_scratch: boolean. Whether to initialize variables of the
                                    the last trained model or initialize them
                                    randomly.
        """ 
    
        if train_from_scratch==False:
            self.restore_model()
        
        # Initialize best loss. This variable will store the lowest loss on the
        # eval dataset.
        best_loss = np.Inf
        
        # Initialize classes to update the mean loss of train and eval
         
        
        # Initialize dictionary to store the loss history
        self.history = {}
        self.history['train_loss'] = []
        self.history['eval_loss'] = []
        self.history['train_acc'] = []
        self.history['eval_acc'] = []
        
        # Begin training
        train_loss = tfe.metrics.Mean()
        eval_loss = tfe.metrics.Mean()
        train_acc = tfe.metrics.Mean()
        eval_acc = tfe.metrics.Mean()
        accuracy = tfe.metrics.Accuracy()
        
        for i in range(num_epochs):
            # Training with gradient descent
            traing_begin = time.time()
            for images, target in tfe.Iterator(training_data):
                grads = self.grads_fn(images, target)
                optimizer.apply_gradients(zip(grads, self.variables))
            print('trainging\'s time is %f s' % (time.time()-traing_begin))

            # Compute the loss on the training data after one epoch
            for images, target in tfe.Iterator(training_data):
                loss,logits = self.loss_fn(images, target)
                #print('train loss is %f' % loss.numpy())
                train_loss(loss)
                preds = tf.argmax(logits, axis=1)
                # Compute the accuracy
                accuracy(tf.cast(preds,tf.int32), tf.cast(target,tf.int32))
                train_acc(accuracy.result())
                
            self.history['train_loss'].append(train_loss.result().numpy())
            self.history['train_acc'].append(train_acc.result().numpy())
            # Reset metrics
            train_loss.init_variables()
            accuracy.init_variables()
            train_acc.init_variables()

            # Compute the loss on the eval data after one epoch
            for images, target in tfe.Iterator(eval_data):
                loss,logits = self.loss_fn(images, target)
                #print('test loss is %f' % loss.numpy())
                preds = tf.argmax(logits, axis=1)
                # Compute the accuracy
                accuracy(tf.cast(preds,tf.int32), tf.cast(target,tf.int32))
                eval_loss(loss)
                eval_acc(accuracy.result())
            self.history['eval_loss'].append(eval_loss.result().numpy())
            self.history['eval_acc'].append(eval_acc.result().numpy())
            # Reset metrics
            eval_loss.init_variables()
            accuracy.init_variables()
            eval_acc.init_variables()

            # Print train and eval losses
            if (i==0) | ((i+1)%verbose==0):
                print('Train loss at epoch %d: ' %(i+1), self.history['train_loss'][-1])
                print('Eval loss at epoch %d: ' %(i+1), self.history['eval_loss'][-1])
                print('train acc at epoch %d: ' %(i+1), self.history['train_acc'][-1])
                print('Eval acc at epoch %d: ' %(i+1), self.history['eval_acc'][-1])

            # Check for early stopping
            if self.history['eval_loss'][-1]<best_loss:
                best_loss = self.history['eval_loss'][-1]
                count = early_stopping_rounds
                self.save_model(i)
            else:
                count -= 1
            if count==0:
                break 

In [5]:
# Specify the path where you want to save/restore the trained variables.
checkpoint_directory = 'models_checkpoints/myself/'

# Define optimizer.
optimizer = tf.train.AdamOptimizer()

# Instantiate model. This doesn't initialize the variables yet.
model = cla(num_class=10,checkpoint_directory=checkpoint_directory)
    

In [6]:
# Train model
model.fit(train_dataset, val_dataset, optimizer, num_epochs=10, 
          early_stopping_rounds=5, verbose=1, train_from_scratch=True)

trainging's time is 23.319279 s
Train loss at epoch 1:  0.05657598376869838
Eval loss at epoch 1:  0.05407835519577883
train acc at epoch 1:  0.9836413701472394
Eval acc at epoch 1:  0.9809335355266682
trainging's time is 23.285469 s
Train loss at epoch 2:  0.03367277329215712
Eval loss at epoch 2:  0.046097816056512954
train acc at epoch 2:  0.9903053613854902
Eval acc at epoch 2:  0.9829784233248888
trainging's time is 23.315906 s
Train loss at epoch 3:  0.025660915919311487
Eval loss at epoch 3:  0.04528208744131571
train acc at epoch 3:  0.9922107704751149
Eval acc at epoch 3:  0.9879037392339555
trainging's time is 23.086178 s
Train loss at epoch 4:  0.01845714428163231
Eval loss at epoch 4:  0.047888580503390184
train acc at epoch 4:  0.9941769316459182
Eval acc at epoch 4:  0.9833732218655078
trainging's time is 23.579183 s
Train loss at epoch 5:  0.016390818323362182
Eval loss at epoch 5:  0.04020401523477999
train acc at epoch 5:  0.9938216521822332
Eval acc at epoch 5:  0.986