# Sprint ディープラーニングフレームワーク2

In [1]:
import tensorflow as tf
from keras import backend as K
from keras.datasets import mnist

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import OneHotEncoder


Using TensorFlow backend.


# 【問題1】公式Exampleを分担して実行
TensorFLowの公式Exampleを分担して実行してください。

以下の中から1人ひとつ選び実行し、その結果を簡単に発表してください。

tutorials → Build a Convolutional Neural Network using Estimators

実行結果  
{'accuracy': 0.8551, 'loss': 0.6025994, 'global_step': 2002}

このexampleはtensorflowを使って2次元のCNNを作成している  
以下サンプルコードとコードリーティング結果を示す

In [None]:
def cnn_model_fn(features, labels, mode):
  """Model function for CNN."""
  # Input Layer
  input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

  # Convolutional Layer #1
  conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)  

  # Pooling Layer #1
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

  # Convolutional Layer #2 and Pooling Layer #2
  conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

  # Dense Layer
  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
  dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

  # Logits Layer
  logits = tf.layers.dense(inputs=dropout, units=10)

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Calculate Loss (for both TRAIN and EVAL modes)
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
  eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])
  }
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

データの整形や畳み込み層などをテンソルとして保存している  
結果をpredictionsという辞書を用いて格納している  
dropoutを指定し過学習を防いでいる

In [None]:
# Create the Estimator
mnist_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")

tf.estimator.Estimatorを使ってモデルのインスタンスを作成している  
model_fn引数に先ほどの関数を指定することで、任意のモデルを作成している

In [None]:
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": train_data},
    y=train_labels,
    batch_size=100,
    num_epochs=None,
    shuffle=True)

# train one step and display the probabilties
mnist_classifier.train(
    input_fn=train_input_fn,
    steps=1,
    hooks=[logging_hook])

データをestimator.inputs.numpy_input_fnで準備  
先ほど作成したクラス.trainで学習している  
input_fnには準備したデータを入力している

In [None]:
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": eval_data},
    y=eval_labels,
    num_epochs=1,
    shuffle=False)

eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

validationデータを準備している  
先ほどのクラス.evaluateで推定している

# 【問題2】Iris（2値分類）をKerasで学習
TensorFlowによるIrisデータセットに対する2値分類をKerasに書き換えてください。

In [2]:
# データセットの読み込み
df = pd.read_csv("/Users/morishuuya/Desktop/dataset/DIC/iris-species/Iris.csv")
# データフレームから条件抽出
df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X)
# ラベルを数値に変換
y[y=='Iris-versicolor'] = 0
y[y=='Iris-virginica'] = 1
y = y.astype(np.int)[:, np.newaxis]
# trainとtestに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [4]:
K.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(400, input_shape=(4,), activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(200, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.sigmoid))
model.compile(
    loss="binary_crossentropy",
    optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
    metrics=["accuracy"]
             )
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 400)               2000      
_________________________________________________________________
dense_1 (Dense)              (None, 200)               80200     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 201       
Total params: 82,401
Trainable params: 82,401
Non-trainable params: 0
_________________________________________________________________


In [5]:
history = model.fit(X_train,y_train,
                   batch_size=10,
                   epochs=10,
                   verbose=1)
y_pred_proba = model.predict(X_test)
# 確率を0, 1に変換
y_pred = np.where(y_pred_proba >0.5, 1, 0)
print("accuracy:",accuracy_score(y_test, y_pred))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
accuracy: 0.8


# 【問題3】Iris（多値分類）をKerasで学習
TensorFlowによるIrisデータセットに対する3値分類をKerasに書き換えてください。

In [6]:
df = pd.read_csv("/Users/morishuuya/Desktop/dataset/DIC/iris-species/Iris.csv")
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X)
y[y=='Iris-versicolor'] = 0
y[y=='Iris-virginica'] = 1
y[y=="Iris-setosa"] = 2
y = y.astype(np.int)[:, np.newaxis]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
enc = OneHotEncoder(handle_unknown="ignore", sparse=False)
y_train = enc.fit_transform(y_train)
y_test = enc.fit_transform(y_test)
y_val = enc.fit_transform(y_val)

In [7]:
K.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(400, input_shape=(4,), activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(200, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(3, activation=tf.nn.softmax))
model.compile(loss="categorical_crossentropy",
             optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
             metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 400)               2000      
_________________________________________________________________
dense_1 (Dense)              (None, 200)               80200     
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 603       
Total params: 82,803
Trainable params: 82,803
Non-trainable params: 0
_________________________________________________________________


In [8]:
history = model.fit(X_train, y_train,
                   batch_size=1,
                   epochs =10,
                   verbose=1)

y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("accuracy:",accuracy_score(np.argmax(y_test, axis=1), y_pred))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
accuracy: 0.8


# 【問題4】House PricesをKerasで学習
TensorFlowによるHouse Pricesデータセットに対する回帰をKerasに書き換えてください。

In [9]:
df = pd.read_csv("/Users/morishuuya/Desktop/dataset/kaggle/HousePrice/train.csv")
X = df.loc[:, ["GrLivArea", "YearBuilt"]].values
y = df["SalePrice"].values
y = np.log(y)
X = np.log(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [10]:
K.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(200, input_dim=2, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1))
model.compile(loss="mean_squared_error",
             optimizer = tf.train.AdamOptimizer(learning_rate=0.01),
             metrics=["mse"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 200)               600       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 201       
Total params: 801
Trainable params: 801
Non-trainable params: 0
_________________________________________________________________


In [11]:
history = model.fit(X_train, y_train,
                   batch_size=1,
                   epochs=100,
                   verbose=0)

In [12]:
y_pred = model.predict(X_test)
print(np.sqrt(mean_squared_error(y_test, y_pred)))

0.28938372304468346


# 【問題5】MNISTをKerasで学習
TensorFlowによるMNISTデータセットによる画像の多値分類をKerasに書き換えてください。

In [13]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train  = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255
enc = OneHotEncoder(handle_unknown="ignore", sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:, np.newaxis])
y_test_one_hot = enc.fit_transform(y_test[:,  np.newaxis])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2, random_state=0)

In [14]:
K.clear_session()

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(200, input_dim=784, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(200, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))
model.compile(loss="categorical_crossentropy",
             optimizer = tf.train.AdamOptimizer(learning_rate=0.01),
             metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 200)               157000    
_________________________________________________________________
dense_1 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2010      
Total params: 199,210
Trainable params: 199,210
Non-trainable params: 0
_________________________________________________________________


In [16]:
history = model.fit(X_train, y_train,
                   batch_size=12000,
                   epochs=10,
                   verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print(accuracy_score(y_test, y_pred))

0.9758
