In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf

def load_mnist():
    x_train = np.load('../dataset/x_train.npy')
    x_train2 = x_train + np.random.rand(65000,28,28)
    x_train = np.concatenate([x_train,x_train2])
    t_train = np.load('../dataset/t_train.npy')
    t_train = np.concatenate([t_train,t_train])
    
    # テストデータ
    x_test = np.load('../dataset/x_test.npy')

    x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255
    x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255
    t_train = np.eye(10)[t_train.astype('int32').flatten()]
   
    return (x_train, x_test, t_train)
#x_train.shape

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

rng = np.random.RandomState(1234)
random_state = 42

### レイヤー定義 ###
class Conv:
    def __init__(self, filter_shape, function=lambda x: x, strides=[1,1,1,1], padding='VALID'):
        # Heの初期値
        fan_in = np.prod(filter_shape[:3]) # filter_shape: (縦の次元数)x(横の次元数)x(入力チャンネル数)x(出力チャンネル数)
        fan_out = np.prod(filter_shape[:2]) * filter_shape[3]
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/fan_in),
                        high=np.sqrt(6/fan_in),
                        size=filter_shape
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros((filter_shape[3]), dtype='float32'), name='b') # バイアスはフィルタごとなので, 出力フィルタ数と同じ次元数
        self.function = function
        self.strides = strides
        self.padding = padding

    def __call__(self, x):
        u = tf.nn.conv2d(x, self.W, strides=self.strides, padding=self.padding) + self.b
        return self.function(u)    

class Pooling:
    def __init__(self, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID'):
        self.ksize = ksize
        self.strides = strides
        self.padding = padding
    
    def __call__(self, x):#呼び出しに使う関数
        return tf.nn.max_pool(x, ksize=self.ksize, strides=self.strides, padding=self.padding)
    
class Flatten:
    def __call__(self, x):
        return tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:])))
    
class Dense:
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        # He Initialization
        # in_dim: 入力の次元数、out_dim: 出力の次元数
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/in_dim),
                        high=np.sqrt(6/in_dim),
                        size=(in_dim, out_dim)
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros([out_dim]).astype('float32'))
        self.function = function

    def __call__(self, x):
        return self.function(tf.matmul(x, self.W) + self.b)
    
# tf.log(0)によるnanを防ぐ
def tf_log(x):
    return tf.log(tf.clip_by_value(x, 1e-10, x))

### ネットワーク ###

x_train, x_test, t_train = load_mnist()
x_train, x_valid, t_train, t_valid = train_test_split(x_train, t_train, test_size=0.1, random_state=random_state)

tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 28, 28, 1])
t = tf.placeholder(tf.float32, [None, 10])

h = Conv((5, 5, 1, 20), tf.nn.relu)(x)           # 28x28x 1 -> 24x24x20
h = Pooling((1, 2, 2, 1))(h)                           # 24x24x20 -> 12x12x20
h = Conv((5, 5, 20, 50), tf.nn.relu)(h)        # 12x12x20 ->  8x 8x50
h = Pooling((1, 2, 2, 1))(h)                           #  8x 8x50 ->  4x 4x50
h = Flatten()(h)
y = Dense(4*4*50, 10, tf.nn.softmax)(h)

cost = - tf.reduce_mean(tf.reduce_sum(t * tf_log(y), axis=1))
train = tf.train.GradientDescentOptimizer(0.01).minimize(cost)

### 学習 ###

n_epochs = 50
batch_size = 100
n_batches = x_train.shape[0]//batch_size

init = tf.global_variables_initializer()

#with tf.Session() as sess:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
    #sess.run(init)
for epoch in range(n_epochs):
    x_train, t_train = shuffle(x_train, t_train, random_state=random_state)
    for batch in range(n_batches):
        start = batch * batch_size
        end = start + batch_size
        sess.run(train, feed_dict={x: x_train[start:end], t: t_train[start:end]})
    y_pred, cost_valid = sess.run([y, cost], feed_dict={x: x_valid, t: t_valid})
    print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(
        epoch,
        cost_valid,
        accuracy_score(t_valid.argmax(axis=1), y_pred.argmax(axis=1))
    ))
#y_pred = sess.run(y, feed_dict={x: x_test,is_training: True})
y_pred = sess.run(y, feed_dict={x: x_test})
y_pred = np.argmax(y_pred, axis=1)
submission = pd.Series(y_pred, name='label')
submission.to_csv('../dataset/submission_pred.csv', header=True, index_label='id')

EPOCH: 0, Valid Cost: 0.519, Valid Accuracy: 0.817
EPOCH: 1, Valid Cost: 0.447, Valid Accuracy: 0.845
EPOCH: 2, Valid Cost: 0.421, Valid Accuracy: 0.853
EPOCH: 3, Valid Cost: 0.396, Valid Accuracy: 0.863
EPOCH: 4, Valid Cost: 0.373, Valid Accuracy: 0.871
EPOCH: 5, Valid Cost: 0.362, Valid Accuracy: 0.875
EPOCH: 6, Valid Cost: 0.363, Valid Accuracy: 0.873
EPOCH: 7, Valid Cost: 0.353, Valid Accuracy: 0.879
EPOCH: 8, Valid Cost: 0.333, Valid Accuracy: 0.885
EPOCH: 9, Valid Cost: 0.332, Valid Accuracy: 0.885
EPOCH: 10, Valid Cost: 0.323, Valid Accuracy: 0.889
EPOCH: 11, Valid Cost: 0.322, Valid Accuracy: 0.888
EPOCH: 12, Valid Cost: 0.314, Valid Accuracy: 0.895
EPOCH: 13, Valid Cost: 0.318, Valid Accuracy: 0.892
EPOCH: 14, Valid Cost: 0.310, Valid Accuracy: 0.893
EPOCH: 15, Valid Cost: 0.304, Valid Accuracy: 0.894
EPOCH: 16, Valid Cost: 0.298, Valid Accuracy: 0.897
EPOCH: 17, Valid Cost: 0.292, Valid Accuracy: 0.902
EPOCH: 18, Valid Cost: 0.291, Valid Accuracy: 0.899
EPOCH: 19, Valid Cost: