# 第7回講義 宿題

## 課題
RNNを用いてIMDbのsentiment analysisを実装してみましょう。

ネットワークの形などは特に制限を設けませんし、今回のLessonで扱った内容以外の工夫も組み込んでもらって構いません。

## 目標値
F値：0.9

## ルール
- 以下のセルで指定されている`x_train, t_train`以外の学習データは使わないでください。

## 提出方法
- 2つのファイルを提出していただきます。
  1. テストデータ (x_test) に対する予測ラベルを`submission_pred.csv`として保存し、Homeworkタブから**chap07**を選択して提出してください。
  2. それに対応するpythonのコードを`submission_code.py`として保存し、Homeworkタブから**chap07 (code)**を選択して提出してください。
    - セルに書いたコードを.py形式で保存するためには%%writefileコマンドなどを利用してください。
    - writefileコマンドではファイルの保存のみが行われセル内のpythonコード自体は実行されません。そのため、実際にコードを走らせる際にはwritefileコマンドをコメントアウトしてください


- コードの内容を変更した場合は、1と2の両方を提出し直してください。

- なお、採点は1で行い、2はコードの確認用として利用します。(成績優秀者はコード内容を公開させていただくかもしれません。)

- **宿題の締め切りは【出題週の翌週水曜日24時】です。**

## 評価方法

- 予測ラベルの（`t_test`に対する）F値で評価します。
- 毎日24時にテストデータの一部に対するF値でLeader Boardを更新します。
- 締切日の夜24時にテストデータ全体に対するF値でLeader Boardを更新します。これを最終的な評価とします。

## データの読み込み（このセルは修正しないでください）

In [3]:
import numpy as np

def load_dataset():
    # 学習データ
    x_train = np.load('/root/userspace/public/chap07/data/x_train.npy')
    t_train = np.load('/root/userspace/public/chap07/data/t_train.npy')
    
    # テストデータ
    x_test = np.load('/root/userspace/public/chap07/data/x_test.npy')

    return (x_train, x_test, t_train)

x_train, x_test, t_train = load_dataset()

## 実装

In [25]:
y_pred = []
t_valid = []

In [4]:
#%%writefile /root/userspace/chap07/materials/submission_code.py
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from keras.preprocessing.sequence import pad_sequences

"""
# trainデータを長さ順にソート
x_train_lens = [len(com) for com in x_train]
sorted_train_indexes = sorted(range(len(x_train_lens)), key=lambda x: -x_train_lens[x])

x_train = [x_train[ind] for ind in sorted_train_indexes]
t_train = [t_train[ind] for ind in sorted_train_indexes]
"""

def tf_log(x):
    return tf.log(tf.clip_by_value(x, 1e-10, x))

### レイヤー定義 ###
class Embedding:
    # WRITE ME
    def __init__(self, vocab_size, emb_dim, scale=0.08):
        self.V = tf.Variable(tf.random_normal([vocab_size, emb_dim], stddev=scale), name='V')
        
    def __call__(self, x):
        return tf.nn.embedding_lookup(self.V, x)

"""
class RNN:
    # WRITE ME
    def __init__(self, in_dim, hid_dim, seq_len=None, scale=0.08):
        self.in_dim = in_dim
        self.hid_dim = hid_dim
        
        glorot = tf.cast(tf.sqrt(6/(in_dim+hid_dim*2)), tf.float32)
        self.W = tf.Variable(tf.random_uniform([in_dim + hid_dim, hid_dim], minval=-glorot, maxval=glorot), name='W')
        self.b = tf.Variable(tf.zeros([hid_dim]), name='b')
        
        self.seq_len = seq_en
        self.initial_state = None
        
    def __call__(self, x):
        def fn(h_prev, x_and_m):
            x_t, m_t = x_and_m
            inputs = tf.concat([x_t, h_prev], -1)
            #RNN
            h_t = tf.nn.tanh(tf.matmul(inputs, self.V) + self.b)
            # apply the mask
            h_t = m_t * h_t + (1-m_t) * h_prev
            
            return h_t
        
        # permute inputs chronologically (chronologize inputs)
        # shape: [0: batch_size, 1: max_sequence_lenght, 2: in_dim] --> [1: max_sequence_lenght, 0: batch_size, 2: in_dim]
        x_tmaj = tf.transpose(x, perm=[1, 0, 2])
        
        #generate a mask & chronogize it
        mask = tf.cast(tf.sequence_mask(self.seq_len, tf.shape(x)[1]), tf.float32)
        mask_tmaj = tf.transpose(tf.expand_dims(mask, axis=-1), perm=[1, 0, 2])
        
        if self.initialize_state is None:
            batch_size = tf.shape(x)[0]
            self.initial_state = tf.zeros([batch_size, self.hid_dim])
            
        h = tf.scan(fn=fn, elems=[x_tmaj, mask_tmaj], initializer=self.initial_state)
        
        return h[-1]
"""

class RNN:
    def __init__(self, hid_dim, seq_len=None, initial_state=None):
        self.cell = tf.nn.rnn_cell.BasicRNNCell(hid_dim)
        self.initial_state = initial_state
        self.seq_len = seq_len
    
    def __call__(self, x):
        if self.initial_state is None:
            self.initial_state = self.cell.zero_state(tf.shape(x)[0], tf.float32)
        
        outputs, state = tf.nn.dynamic_rnn(self.cell, x, self.seq_len, self.initial_state)
        return tf.gather_nd(outputs, tf.stack([tf.range(tf.shape(x)[0]), self.seq_len-1], axis=1))


### グラフ構築 ###
tf.reset_default_graph()

emb_dim = 100
hid_dim = 50
num_words = max([max(s) for s in np.hstack((x_train, x_test))])
pad_index = 0

x = tf.placeholder(tf.int32, [None, None], name='x')
t = tf.placeholder(tf.float32, [None, None], name='t')

# WRITE ME
seq_len = tf.reduce_sum(tf.cast(tf.not_equal(x, pad_index), tf.int32), axis=1)

h = Embedding(num_words, emb_dim)(x)
#h = RNN(emb_dim, hid_dim, seq_len)(h)
h = RNN(hid_dim, seq_len)(h)
y = tf.layers.Dense(1, tf.nn.sigmoid)(h)

cost = -tf.reduce_mean(t*tf_log(y) + (1-t)*tf_log(1-y))

train = tf.train.AdamOptimizer().minimize(cost)
test = tf.round(y)

### データの準備 ###
x_train, x_valid, t_train, t_valid = train_test_split(x_train, t_train)

### 学習 ###
n_epochs = 2
batch_size = 5 # バッチサイズが大きいと、ResourceExhaustedErrorになることがあります
n_batches_train = len(x_train) // batch_size
n_batches_valid = len(x_valid) // batch_size

init = tf.global_variables_initializer()

with tf.Session() as sess:
    # WRITE ME
    sess.run(init)
    for epoch in range(n_epochs):
        # train
        train_costs = []
        for i in range(n_batches_train):
            start = i * batch_size
            end = start + batch_size
            
            x_train_batch = np.array(pad_sequences(x_train[start:end], padding='post', value=pad_index))
            t_train_batch = np.array(t_train[start:end])[:, None]
            
            _, train_cost = sess.run([train, cost], feed_dict={x: x_train_batch, t: t_train_batch})
            train_costs.append(train_cost)
            
        # valid
        valid_costs = []
        y_pred = []
        for i in range(n_batches_valid):
            start = i * batch_size
            end = start + batch_size
            
            x_valid_pad = np.array(pad_sequences(x_valid[start:end], padding='post', value=pad_index))
            t_valid_pad = np.array(t_valid[start:end])[:, None]
            
            pred, valid_cost = sess.run([test, cost], feed_dict={x:x_valid_pad, t:t_valid_pad})
            
            y_pred += pred.flatten().tolist()
            valid_costs.append(valid_cost)
        print('EPOCH: {}, Training Cost: {:.3f}, Validation Cost: {:.3f}, Validation F1: {:.3f}'.format(
                epoch+1, np.mean(train_costs), np.mean(valid_costs), f1_score(t_valid, y_pred, average='macro')
        ))
        
    # test
    y_pred = []
    x_test_pad = np.array(pad_sequences(x_test, padding='post', value=pad_index))
    pred = sess.run(test, feed_dict={x:x_test_pad})
    y_pred += pred.flatten().tolist()
    
    
### 出力 ###
submission = pd.Series(y_pred, name='label')
submission.to_csv('/root/userspace/chap07/materials/submission_pred.csv', header=True, index_label='id')

EPOCH: 1, Training Cost: 0.619, Validation Cost: 0.588, Validation F1: 0.676
EPOCH: 2, Training Cost: 0.467, Validation Cost: 0.519, Validation F1: 0.737


ResourceExhaustedError: OOM when allocating tensor with shape[10000,1403,100]
	 [[Node: embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:@V"], validate_indices=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](V/read, _arg_x_0_0/_73)]]
	 [[Node: Round/_93 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_168_Round", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'embedding_lookup', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-843f04e001a8>", line 98, in <module>
    h = Embedding(num_words, emb_dim)(x)
  File "<ipython-input-4-843f04e001a8>", line 26, in __call__
    return tf.nn.embedding_lookup(self.V, x)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/embedding_ops.py", line 328, in embedding_lookup
    transform_fn=None)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/embedding_ops.py", line 150, in _embedding_lookup_and_transform
    result = _clip(_gather(params[0], ids, name=name), ids, max_norm)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/embedding_ops.py", line 54, in _gather
    return array_ops.gather(params, ids, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 2486, in gather
    params, indices, validate_indices=validate_indices, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1834, in gather
    validate_indices=validate_indices, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[10000,1403,100]
	 [[Node: embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:@V"], validate_indices=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](V/read, _arg_x_0_0/_73)]]
	 [[Node: Round/_93 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_168_Round", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [22]:
len(y_pred)

4210

In [23]:
len(t_valid)

4219