# 第7回講義 宿題

## 課題

今Lessonで学んだことに工夫を加えて、CNNでより高精度なCIFAR10の分類器を実装してみましょう。精度上位者はリーダーボードに載ります。

### 目標値

Accuracy 78%

### ルール

- 訓練データはx_train、 t_train、テストデータはx_testで与えられます。
- 予測ラベルは one_hot表現ではなく0~9のクラスラベル で表してください。
- **下のセルで指定されているx_train、t_train以外の学習データは使わないでください。**
- ネットワークの形などは特に制限を設けません。
- 高レベルのAPI(tf.layers)を利用しても構いません。

### 提出方法

- 2つのファイルを提出していただきます。
  - テストデータ (x_test) に対する予測ラベルをcsvファイル (ファイル名: submission_pred.csv) で提出してください。
  - それに対応するpythonのコードをsubmission_code.pyとして提出してください (%%writefileコマンドなどを利用してください)。

### 評価方法

- 予測ラベルのt_testに対する精度 (Accuracy) で評価します。
- 毎日夜24時にテストデータの一部に対する精度でLeader Boardを更新します。
- 締切日の夜24時にテストデータ全体に対する精度でLeader Boardを更新します。これを最終的な評価とします。

### データの読み込み

- この部分は修正しないでください

In [1]:
import numpy as np
import pandas as pd

def load_cifar10():
    
    # 学習データ
    x_train = np.load('/root/userspace/public/chap07/data/x_train.npy')
    t_train = np.load('/root/userspace/public/chap07/data/t_train.npy')

    # テストデータ
    x_test = np.load('/root/userspace/public/chap07/data/x_test.npy')
    
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255
    
    t_train = np.eye(10)[t_train.astype('int32').flatten()]
    
    return (x_train, x_test, t_train)

In [2]:
class BatchNorm:
    def __init__(self, shape, epsilon=np.float32(1e-5)):
        self.gamma = tf.Variable(np.ones(shape, dtype='float32'), name='gamma')
        self.beta  = tf.Variable(np.zeros(shape, dtype='float32'), name='beta')
        self.epsilon = epsilon

    def __call__(self, x):
        mean, var = tf.nn.moments(x, axes=(0,1,2), keep_dims=True)
        std = tf.sqrt(var + self.epsilon)
        x_normalized = (x - mean) / std
        return self.gamma * x_normalized + self.beta
    
class Conv:
    def __init__(self, filter_shape, function=lambda x: x, strides=[1,1,1,1], padding='VALID'):
        fan_in = np.prod(filter_shape[:3])
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/fan_in),
                        high=np.sqrt(6/fan_in),
                        size=filter_shape
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros((filter_shape[3]), dtype='float32'), name='b') # バイアスはフィルタごと
        self.function = function
        self.strides = strides
        self.padding = padding

    def __call__(self, x):
        u = tf.nn.conv2d(x, self.W, strides=self.strides, padding=self.padding) + self.b
        return self.function(u)
    
class Pooling:
    def __init__(self, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID'):
        self.ksize = ksize
        self.strides = strides
        self.padding = padding
    
    def __call__(self, x):
        return tf.nn.avg_pool(x, ksize=self.ksize, strides=self.strides, padding=self.padding) #maxからavgに変更

class Flatten:
    def __call__(self, x):
        return tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:])))
    
class Dense:
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/in_dim),
                        high=np.sqrt(6/in_dim),
                        size=(in_dim, out_dim)
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros([out_dim]).astype('float32'))
        self.function = function

    def __call__(self, x):
        return self.function(tf.matmul(x, self.W) + self.b)

class Activation:
    def __init__(self, function=lambda x: x):
        self.function = function
    
    def __call__(self, x):
        return self.function(x)

### 畳み込みニューラルネットワーク(CNN)の実装

In [7]:
# %%writefile /root/userspace/chap07/materials/submission_code.py

import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

rng = np.random.RandomState(1234) #乱数がfixできてない
random_state = 8 #1or8がよさげ

def tf_log(x):
    # WRITE ME
    return tf.log(tf.clip_by_value(x, 1e-10, x))

### ネットワーク ###
tf.reset_default_graph()
is_training = tf.placeholder(tf.bool, shape=())

# WRITE ME 
# 1.layersのAPIを使用
# x = tf.placeholder(shape=[None, 32, 32, 3], dtype=tf.float32)
# t = tf.placeholder(tf.float32, [None, 10])

# h = tf.layers.Conv2D(filters=32, kernel_size= [3, 3])(x) # 32x32x3 -> 30x30x32 # conv2dの初期設定: strides=(1, 1), padding='valid' 
# h = tf.layers.BatchNormalization()(h, training=is_training)
# h = tf.nn.relu(h)
# h = tf.layers.MaxPooling2D(pool_size=[2, 2], strides=2)(h) # 30x30x32 -> 15x15x32

# h = tf.layers.Conv2D(filters=64, kernel_size= [3, 3])(h) # 15x15x32 -> 13x13x64
# h = tf.layers.BatchNormalization()(h, training=is_training)
# h = tf.nn.relu(h)
# h = tf.layers.MaxPooling2D(pool_size=[2, 2], strides=2)(h) # 13x13x64 -> 6x6x64

# h = tf.layers.Conv2D(filters=128, kernel_size= [3, 3])(h) # 6x6x64 -> 4x4x128
# h = tf.layers.BatchNormalization()(h, training=is_training)
# h = tf.nn.relu(h)
# h = tf.layers.MaxPooling2D(pool_size=[2, 2], strides=2)(h) # 4x4x128 -> 2x2x128

# h = tf.layers.Flatten()(h)
# h = tf.layers.Dense(units=256, activation=tf.nn.relu)(h)
# # WRITE ME
# y = tf.layers.Dense(units=10, activation=tf.nn.softmax)(h) #このやりかただと、最後のtestデータに対する出力のやりかたがわからない


# 2. API使用せず
x = tf.placeholder(tf.float32, [None, 32, 32, 3])
t = tf.placeholder(tf.float32, [None, 10])
                                                      # (縦の次元数)x(横の次元数)x(チャネル数)
layers = [
    Conv((3, 3, 3, 32), padding='VALID'),             # 32x32x3 -> 30x30x32
    Conv((3, 3, 32, 32), padding='SAME'),#追加
    BatchNorm((30, 30, 32)), #testデータについてはBN必要なくない？
    Activation(tf.nn.relu),
    Pooling((1, 2, 2, 1)),                        # 30x30x32 -> 15x15x32
    Conv((3, 3, 32, 64), padding='VALID'),           # 15x15x32 -> 13x13x64
    BatchNorm((13, 13, 64)),
    Activation(tf.nn.relu),
    Pooling(((1, 2, 2, 1))),                    # 13x13x64 -> 6x6x64
    Conv((3, 3, 64, 128), padding='VALID'),          # 6x6x64 -> 4x4x128
    BatchNorm((4, 4, 128)),
    Activation(tf.nn.relu),
    Pooling((1, 2, 2, 1)),                         # 4x4x128 -> 2x2x128
    Flatten(),
    Dense(2*2*128, 256, tf.nn.relu),
    Dense(256, 10, tf.nn.softmax)
]
def f_props(layers, h):
    for layer in layers:
        h = layer(h)
    return h
y = f_props(layers, x)


cost = - tf.reduce_mean(tf.reduce_sum(t * tf_log(y), axis=1))
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.control_dependencies(update_ops):
    optimizer = tf.train.AdamOptimizer(0.002).minimize(cost) #0.1から適宜変更

### 前処理 ###
def gcn(x):
    # WRITE ME
    mean = np.mean(x, axis=(1, 2, 3), keepdims=True)
    std = np.std(x, axis=(1, 2, 3), keepdims=True)
    return (x - mean)/std

class ZCAWhitening:
    # WRITE ME
    def __init__(self, epsilon=1e-4):
        self.epsilon = epsilon
        self.mean = None
        self.ZCA_matrix = None

    def fit(self, x):
        x = x.reshape(x.shape[0], -1)
        self.mean = np.mean(x, axis=0)
        x -= self.mean
        cov_matrix = np.dot(x.T, x) / x.shape[0]
        A, d, _ = np.linalg.svd(cov_matrix)
        self.ZCA_matrix = np.dot(np.dot(A, np.diag(1. / np.sqrt(d + self.epsilon))), A.T)

    def transform(self, x):
        shape = x.shape
        x = x.reshape(x.shape[0], -1)
        x -= self.mean
        x = np.dot(x, self.ZCA_matrix.T)
        return x.reshape(shape)    
    
x_train, x_test, t_train = load_cifar10()

# #追加
# padded = np.pad(x_train, ((0, 0), (4, 4), (4, 4), (0, 0)), mode='constant')
# crops = rng.randint(8, size=(len(x_train), 2))
# x_train_cropped = [padded[i, c[0]:(c[0]+32), c[1]:(c[1]+32), :] for i, c in enumerate(crops)]
# x_train_cropped = np.array(x_train_cropped)
# x_train = np.dstack([x_train, x_train_cropped]) #データセットの結合方法がわからない
# t_train = np.dstack([t_train, t_train])
# #

x_train, x_valid, t_train, t_valid = train_test_split(x_train, t_train, test_size=0.1, random_state=random_state)
zca = ZCAWhitening()
zca.fit(x_train)
x_train_zca = zca.transform(gcn(x_train))
t_train_zca = t_train[:]
x_valid_zca = zca.transform(gcn(x_valid))
t_valid_zca = t_valid[:]
x_test_zca = zca.transform(gcn(x_test))

### 学習 ###
n_epochs = 8 #10だとcostが再上昇してしまう？
batch_size = 100
n_batches = x_train.shape[0]//batch_size

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for epoch in range(n_epochs):
    # WRITE ME 
    #1に対応
#     x_train_zca, t_train_zca = shuffle(x_train_zca, t_train_zca, random_state=random_state)
#     for batch in range(n_batches):
#         start = batch * batch_size
#         end = start + batch_size
#         sess.run(optimizer, feed_dict={x: x_train_zca[start:end], t: t_train_zca[start:end], is_training: True})
#     y_pred, cost_valid = sess.run([y, cost], feed_dict={x: x_valid_zca, t: t_valid_zca, is_training: False})
#     print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(
#         epoch,
#         cost_valid,
#         accuracy_score(t_valid.argmax(axis=1), y_pred.argmax(axis=1))
#     ))
    #2に対応
    x_train_zca, t_train_zca = shuffle(x_train_zca, t_train_zca, random_state=random_state)
    for batch in range(n_batches):
        start = batch * batch_size
        end = start + batch_size
        sess.run(optimizer, feed_dict={x: x_train_zca[start:end], t: t_train_zca[start:end]})
    y_pred, cost_valid = sess.run([y, cost], feed_dict={x: x_valid_zca, t: t_valid_zca})
    print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(
        epoch,
        cost_valid,
        accuracy_score(t_valid.argmax(axis=1), y_pred.argmax(axis=1))
    ))


# WRITE ME
y_pred = sess.run(y, feed_dict={x: x_test_zca})
y_pred = y_pred.argmax(axis=1)
print(y_pred)

submission = pd.Series(y_pred, name='label')
submission.to_csv('/root/userspace/chap07/materials/submission_pred.csv', header=True, index_label='id') #/chap07/materials/を追加

sess.close()

EPOCH: 0, Valid Cost: 0.871, Valid Accuracy: 0.711
EPOCH: 1, Valid Cost: 0.742, Valid Accuracy: 0.745
EPOCH: 2, Valid Cost: 0.656, Valid Accuracy: 0.780
EPOCH: 3, Valid Cost: 0.672, Valid Accuracy: 0.778
EPOCH: 4, Valid Cost: 0.657, Valid Accuracy: 0.783
EPOCH: 5, Valid Cost: 0.628, Valid Accuracy: 0.795
EPOCH: 6, Valid Cost: 0.662, Valid Accuracy: 0.796
EPOCH: 7, Valid Cost: 0.678, Valid Accuracy: 0.800
[7 5 4 ... 0 2 3]
