In [2]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
tf.set_random_seed(777)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# parameters
learning_rate = 0.001
num_epochs = 20
batch_size = 100
num_iterations = int(mnist.train.num_examples / batch_size)

class Model:
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
        self._build_net()
        
    def _build_net(self):
        with tf.variable_scope(self.name):
            # for testing
            self.training = tf.placeholder(tf.bool)
            
            # input placeholders
            self.X = tf.placeholder(tf.float32, [None, 28*28])
            # img 28 x 28 x 1(white/black)
            X_img = tf.reshape(self.X, [-1, 28, 28, 1])
            self.Y = tf.placeholder(tf.float32, [None, 10])
            
            # convolutional layer #1
            conv1 = tf.layers.conv2d(inputs=X_img, filters=32, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            
            # pooling layer #1
            pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], padding="SAME", strides=2)
            dropout1 = tf.layers.dropout(inputs=pool1, rate=0.3, training=self.training)
            
            # convolutional layer #2 and pooling layer #2
            conv2 = tf.layers.conv2d(inputs=dropout1, filters=64, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)            
            pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], padding="SAME", strides=2)
            dropout2 = tf.layers.dropout(inputs=pool2, rate=0.3, training=self.training)
            
            # convolutional layer #3 and pooling layer #3
            conv3 = tf.layers.conv2d(inputs=dropout2, filters=128, kernel_size=[3, 3], padding="SAME", activation=tf.nn.relu)
            pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], padding="SAME", strides=2)
            dropout3 = tf.layers.dropout(inputs=pool3, rate=0.3, training=self.training)
            
            dropout3_size = dropout3.shape
            print(dropout3_size)
            
            # Dense layer with Relu
            flat = tf.reshape(dropout3, [-1, 128 * 4 * 4])
            dense4 = tf.layers.dense(inputs=flat, units=625, activation=tf.nn.relu)
            dropout4 = tf.layers.dropout(inputs=dense4, rate=0.5, training=self.training)
            
            # logits (no activation) layer: L5 Final FC 625 inputs -> 10 inputs
            self.logits = tf.layers.dense(inputs=dropout4, units=10)
            
        # define cost/loss and optimizer
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
        
        correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))
        
    def predict(self, x_test, training=False):
        return self.sess.run(self.logits, feed_dict={self.X: x_test, self.training: training})
    
    def get_accuracy(self, x_test, y_test, training=False):
        return self.sess.run(self.accuracy, feed_dict={self.X: x_test, self.Y: y_test, self.training: training})
    
    def train(self, x_train, y_train, training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: x_train, self.Y: y_train, self.training: training})

In [4]:
# initialize
sess = tf.Session()

models = []
num_models = 2

# generate models
for m in range(num_models):
    models.append(Model(sess, 'model' + str(m)))
models

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
(?, 4, 4, 128)
Instructions for updating:
Use keras.layers.dense instead.
(?, 4, 4, 128)


[<__main__.Model at 0x104ceb908>, <__main__.Model at 0x1a3655f3c8>]

In [6]:
sess.run(tf.global_variables_initializer())

print("Learning Started!")
#train the model
for epoch in range(num_epochs):
    avg_cost_list = np.zeros(num_models)
    
    for i in range(num_iterations):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        
        # train each model
        for m_idx, m in enumerate(models):
            c, _ = m.train(batch_xs, batch_ys)
            avg_cost_list[m_idx] += c / num_iterations
            
    print("Epoch:", "%04d" % (epoch + 1), "Cost:", avg_cost_list)
    
print("Learning Finished!")
    

Learning Started!
Epoch: 0001 Cost: [0.28105694 0.28110329]
Epoch: 0002 Cost: [0.08787663 0.08527374]
Epoch: 0003 Cost: [0.06512525 0.06564341]
Epoch: 0004 Cost: [0.05413894 0.05501973]
Epoch: 0005 Cost: [0.04911405 0.04804505]
Epoch: 0006 Cost: [0.04438059 0.04338993]
Epoch: 0007 Cost: [0.04155274 0.04128129]
Epoch: 0008 Cost: [0.03777087 0.03621759]
Epoch: 0009 Cost: [0.03688554 0.03445644]
Epoch: 0010 Cost: [0.03437085 0.03368921]
Epoch: 0011 Cost: [0.03261593 0.03046601]
Epoch: 0012 Cost: [0.03021749 0.03129431]
Epoch: 0013 Cost: [0.02848003 0.02714292]
Epoch: 0014 Cost: [0.02806505 0.02715264]
Epoch: 0015 Cost: [0.02631869 0.02528819]
Epoch: 0016 Cost: [0.02732981 0.02636327]
Epoch: 0017 Cost: [0.02506679 0.02466565]
Epoch: 0018 Cost: [0.02734283 0.02391823]
Epoch: 0019 Cost: [0.02280169 0.0214892 ]
Epoch: 0020 Cost: [0.02367655 0.02199571]
Learning Finished!


In [11]:
# Test the model and check accuracy
# ensemble prediction
test_size = len(mnist.test.labels)
predictions = np.zeros([test_size, 10])

for m_idx, m in enumerate(models):
    print(m_idx, 'Accuracy:', m.get_accuracy(mnist.test.images, mnist.test.labels))
    p = m.predict(mnist.test.images)
    print(p.shape)
    predictions += p # test_size 10000개에 대해 10개 라벨별 확률을 각각 더함. 그리고 아래에서 각 test 샘플별로 최대 값(라벨)을 구함

0 Accuracy: 0.9948
(10000, 10)
1 Accuracy: 0.9942
(10000, 10)


In [10]:
predictions.shape

(10000, 10)

In [12]:
ensemble_correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(mnist.test.labels, 1))
ensemble_accuracy = tf.reduce_mean(tf.cast(ensemble_correct_prediction, dtype=tf.float32))
print("Ensemble accuracy:", sess.run(ensemble_accuracy))

Ensemble accuracy: 0.9953


일반적으로 위와 같이 각각의 모델의 정확도보다 ensemble 모델의 정확도가 높게 나온다. 즉, 성능이 향상된다고 할 수 있다.