# 第5回講義 宿題

## 課題. Tensorflowを用いて, MNISTを多層パーセプトロン(MLP)で学習せよ

### 注意

- homework関数を完成させて提出してください
    - 訓練データはtrain_X, train_y, テストデータはtest_Xで与えられます
    - train_Xとtrain_yをtrain_X, train_yとvalid_X, valid_yに分けるなどしてモデルを学習させてください
    - test_Xに対して予想ラベルpred_yを作り, homework関数の戻り値としてください
- pred_yのtest_yに対する精度(F値)で評価します
- 全体の実行時間がiLect上で60分を超えないようにしてください
- homework関数の外には何も書かないでください (必要なものは全てhomework関数に入れてください)
- 解答提出時には Answer Cell の内容のみを提出してください

- CNNは使わないでください

**`tf` の以下のモジュールはこの回では使用できないように制限されています. 注意してください.**
```python
tf.app
tf.compat
tf.contrib
tf.erros
tf.gfile
tf.graph_util
tf.image
tf.layers
tf.logging
tf.losses
tf.metrics
tf.python_io
tf.resource_loader
tf.saved_model
tf.sdca
tf.sets
tf.summary
tf.sysconfig
tf.test
tf.train
```

次のセルのhomework関数を完成させて提出してください

# Answer Cell

In [10]:
def homework(train_X, train_y, test_X):
    
    import numpy as np
    import tensorflow as tf 
    from sklearn.utils import shuffle
    from sklearn.metrics import f1_score
    from sklearn.datasets import fetch_mldata
    from sklearn.model_selection import train_test_split
    
    # hyperparameter
    rng = np.random.RandomState(1234)
    random_state = 42
    
    # method
    def one_to_hot(x):
        """one to hot method"""
        columns = np.unique(x)
        X = np.zeros([x.shape[0], len(columns)])
        for i, column in enumerate(columns):
            X[np.where(x==column), i] = 1
        return X
    

    def get_init_weight(n_in, n_out, weight=0.08):
        return rng.uniform(low=-weight, high=weight, size=(n_in, n_out)).astype('float32')
    
    # class
    class EarlyStopping(object):
        """early stopping"""
        
        def __init__(self):
            self.stop_count = 0
            self.limit = 5
            self.best_validation_loss = float('inf')
            
        def check(self, loss):
            if loss < self.best_validation_loss:
                self.best_validation_loss = loss
                self.stop_count = 0
            else:
                self.stop_count += 1
            
            if self.stop_count > self.limit:
                return True
            else:
                return False
            
    class Model(object):
        """Multi Perceptron Model"""

        def __init__(self, n_hiddens=[512, 256, 128], act_function=[tf.tanh, tf.tanh, tf.tanh], lr=0.01, ):
            tf.reset_default_graph()
            self.lr = lr
            self.act_functions = act_function
            self.act_functions.append(tf.nn.softmax)
            self.n_hiddens = n_hiddens

        def __call__(self, input_dim, output_dim):
            """make graph"""
            self.input_dim = input_dim
            self.output_dim = output_dim
            self.ins = np.concatenate([[self.input_dim], self.n_hiddens])
            self.outs = np.concatenate([self.n_hiddens, [self.output_dim]])

            # placeholder
            self.x = tf.placeholder(tf.float32, [None, self.input_dim], name="x")
            self.t = tf.placeholder(tf.float32, [None, self.output_dim], name="t")
            self.keep_prob = tf.placeholder(tf.float32)

            # foward
            params = []
            u = self.x
            for idx, (n_in, n_out) in enumerate(zip(self.ins, self.outs)):
                W = tf.Variable(get_init_weight(n_in=n_in, n_out=n_out, weight=0.08), name="W%d" % (idx+1,))
                b = tf.Variable(np.zeros(n_out).astype('float32'), name="b%d" % (idx+1,))
                params += [W, b]
                # dropout
                if idx > 0:
                    u = tf.nn.dropout(u, self.keep_prob)
                u = tf.matmul(u, W) + b
                u = self.act_functions[idx](u)
            y = u
            clipped_y = tf.clip_by_value(y, 1e-10, 1.0)
            self.cost = -tf.reduce_mean(tf.reduce_sum(self.t*tf.log(clipped_y), axis=1))

            # update
            grad_params = tf.gradients(self.cost, params)
            updates = [param.assign_sub(self.lr*grad_param) for param, grad_param in zip(params, grad_params)]

            # training
            self.train = tf.group(*updates)
            self.valid = tf.argmax(y, axis=1)

        def batch_training(self, batch_train_X, batch_train_y, batch_size = 20):
            for start in range(0, batch_train_X.shape[0], batch_size):
                batch_X, batch_y = batch_train_X[start:start+batch_size], batch_train_y[start:start+batch_size]
                sess.run(self.train, feed_dict={self.x: batch_X, self.t: batch_y, self.keep_prob: 0.8})
                    
    # main 
    model = Model()
    model(784, 10)

    train_y = one_to_hot(train_y)
    max_epoch = 1000
    early_stopping = EarlyStopping()

    train_X, train_y = shuffle(train_X, train_y)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(max_epoch):
            train_costs, valid_costs = [], []
            batch_train_X, batch_valid_X, batch_train_y, batch_valid_y = train_test_split(train_X, train_y, train_size=0.8, random_state=random_state)
            # train
            model.batch_training(batch_train_X, batch_train_y)
            loss = sess.run(model.cost, feed_dict={model.x: batch_valid_X, model.t: batch_valid_y, model.keep_prob: 1.0})
            if early_stopping.check(loss):
                break
            
            if epoch % 5 == 0:
                print(loss)

        # predict
        pred_y = sess.run(model.valid, feed_dict={model.x: test_X, model.keep_prob:1.0})

    return pred_y
                    
        

- 以下のvalidate_homework関数を用いてエラーが起きないか動作確認をして下さい。
- 提出に際して、以下のscore_homework関数で60分で実行が終わることを確認して下さい。
- 評価は以下のscore_homework関数で行われますが、random_stateの値は変更されます。

# Checker Cell (for student)

In [4]:
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split

import numpy as np
import tensorflow as tf

del [
    tf.app,
    tf.compat,
    tf.contrib,
    tf.errors,
    tf.gfile,
    tf.graph_util,
    tf.image,
    tf.layers,
    tf.logging,
    tf.losses,
    tf.metrics,
    tf.python_io,
    tf.resource_loader,
    tf.saved_model,
    tf.sdca,
    tf.sets,
    tf.summary,
    tf.sysconfig,
    tf.test,
    tf.train
]

def load_mnist():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                               mnist.target.astype('int32'), random_state=42)

    mnist_X = mnist_X / 255.0

    return train_test_split(mnist_X, mnist_y,
                test_size=0.2,
                random_state=42)

def validate_homework():
    train_X, test_X, train_y, test_y = load_mnist()

    # validate for small dataset
    train_X_mini = train_X[:100]
    train_y_mini = train_y[:100]
    test_X_mini = test_X[:100]
    test_y_mini = test_y[:100]

    pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
    print(f1_score(test_y_mini, pred_y, average='macro'))

def score_homework():
    train_X, test_X, train_y, test_y = load_mnist()
    pred_y = homework(train_X, train_y, test_X)
    print(f1_score(test_y, pred_y, average='macro'))

In [11]:
validate_homework()

2.24655
2.19885
2.14938
2.09519
2.03316
1.95965
1.8746
1.78793
1.70269
1.61605
1.53354
1.45949
1.38911
1.32048
1.25951
1.20265
1.15149
1.10355
1.06043
1.02541
0.990552
0.961068
0.928016
0.905442
0.883499
0.861016
0.847414
0.831106
0.811653
0.799757
0.788836
0.775997
0.766708
0.758625
0.747289
0.739228
0.73094
0.722213
0.715132
0.711825
0.704838
0.700479
0.697795
0.692762
0.692271
0.690559
0.686226
0.680288
0.680615
0.673555
0.671507
0.670357
0.667416
0.664803
0.663956
0.661393
0.657886
0.656574
0.657193
0.651189121698


In [12]:
import time
start = time.time()
score_homework()
end = time.time() - start
print(end)

0.343226
0.199369
0.15041
0.128352
0.112705
0.102154
0.095442
0.0908121
0.0874835
0.085647
0.0820033
0.0816428
0.0796613
0.0791149
0.0795057
0.978993508345
791.7237060070038
