# Sprint 14　Keras

# ライブラリimport

In [50]:
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.optimizers import Adam
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
from keras.datasets import mnist

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# 問題1　公式チュートリアルモデルを分担して実行

こちらのチュートリアルを実行：https://www.tensorflow.org/tutorials/quickstart/beginner

In [None]:
# MNIST読み込み
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 正規化
x_train, x_test = x_train / 255.0, x_test / 255.0

In [None]:
# モデル王増の定義
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [None]:
# 予測値を出してみる（1データだけ使用）
predictions = model(x_train[:1]).numpy()
predictions

In [None]:
# softmax関数に通す
tf.nn.softmax(predictions).numpy()

In [None]:
# 損失関数定義
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
# 損失算出
loss_fn(y_train[:1], predictions).numpy()

In [None]:
# コンパイル（計算や予測を出来る状態にする）
model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

In [None]:
# 学習実行
model.fit(x_train, y_train, epochs=5)

In [None]:
# 評価(損失とACC)
model.evaluate(x_test,  y_test, verbose=3)

In [None]:
# モデルが直接確率を返してくれるようにする
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [None]:
# 試しに予測
probability_model(x_test[:5])

# 問題3

## データ準備

In [13]:
# 読み込み
iris_dataset = load_iris()

# 整形
iris_dataframe = pd.DataFrame(data=iris_dataset.data, columns=iris_dataset.feature_names)
iris_datalabel = pd.DataFrame(data=iris_dataset.target,columns=['Species'])
df = pd.concat([iris_dataframe,iris_datalabel],axis=1)

In [14]:
# 2値分類のため絞り込み
df2 = df[(df["Species"] == 0)|(df["Species"] == 1)]

# 説明変数と目的変数に分割
y = df2["Species"]
X = df2.loc[:, ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]

# 訓練データ/テストデータ/評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# 正規化
X_train /= 255
X_test /= 255
X_val /= 255

## モデル定義

In [15]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(4,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(1, activation = tf.nn.sigmoid))

In [16]:
# モデル構造の確認
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 50)                250       
_________________________________________________________________
dense_4 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 101       
Total params: 5,451
Trainable params: 5,451
Non-trainable params: 0
_________________________________________________________________


In [17]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [18]:
# モデルのコンパイル
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [19]:
# 学習実行
model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100


<tensorflow.python.keras.callbacks.History at 0x18e10cd7d30>

In [20]:
# 2値分類のうちどちらに分類されるか
y_pred_proba = model.predict(X_test)[:, 0]
y_pred = np.where(y_pred_proba >0.5, 1, 0)
print("y_pred_proba", y_pred_proba)
print("y_pred", y_pred)
print('y_test', y_test.ravel())

y_pred_proba [7.7298880e-03 9.9999827e-01 1.0302067e-03 9.9998391e-01 9.9999189e-01
 9.9443412e-01 2.1575987e-03 9.9999213e-01 9.9999774e-01 9.9990720e-01
 9.9992698e-01 9.9988472e-01 9.9999416e-01 3.3670664e-04 2.3248196e-03
 3.8595200e-03 2.7894974e-04 8.5019767e-03 1.0827184e-03 1.5566349e-03]
y_pred [0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0]
y_test [0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0]


In [21]:
# 損失とACC
score = model.evaluate(X_test, y_test, verbose=0)
print('test loss:', score[0])
print('test accuracy:', score[1])

test loss: 0.0017422338714823127
test accuracy: 1.0


# 問題4

## データ準備

In [22]:
# 説明変数と目的変数に分割
y = df["Species"]
X = df.loc[:, ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]

# 訓練データ/テストデータ/評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# onehotベクトル化
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train)
y_val_one_hot = enc.transform(y_val)
y_test_one_hot = enc.transform(y_test)

# 正規化
X_train /= 255
X_test /= 255
X_val /= 255

## モデル定義

In [23]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(4,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(3, activation = tf.nn.softmax))

In [24]:
# モデル構造の確認
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 50)                250       
_________________________________________________________________
dense_7 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 303       
Total params: 5,653
Trainable params: 5,653
Non-trainable params: 0
_________________________________________________________________


In [25]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [26]:
# モデルのコンパイル
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [27]:
# 学習実行
model.fit(
    X_train,
    y_train_one_hot,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val_one_hot)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


<tensorflow.python.keras.callbacks.History at 0x18e11e0b160>

In [28]:
# 予測値の最大インデックスを出力クラスとする
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred", y_pred)
print('y_test', y_test.ravel())

y_pred [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 1 1 0 0 1 0 0 1 1 0]
y_test [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]


In [29]:
# 損失とACC
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])

Train loss: 0.17913818359375
Train accuracy: 0.9333333373069763


# 問題5

## データの準備

In [39]:
dataset_path ="train.csv"
df = pd.read_csv(dataset_path)

y = df["SalePrice"]
X = df.loc[:, ["GrLivArea", "YearBuilt"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]
y = np.log(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

## モデル定義

In [42]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(2,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(3, activation = tf.keras.activations.linear))

In [43]:
# モデル構造の確認
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 50)                150       
_________________________________________________________________
dense_19 (Dense)             (None, 100)               5100      
_________________________________________________________________
dense_20 (Dense)             (None, 3)                 303       
Total params: 5,553
Trainable params: 5,553
Non-trainable params: 0
_________________________________________________________________


In [44]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [45]:
# モデルのコンパイル
model.compile(
    loss='mse',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['mse']
)

In [46]:
# 学習実行
model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


<tensorflow.python.keras.callbacks.History at 0x18e13209828>

In [47]:
# 予測値出力
y_pred_proba = model.predict(X_test)
y_pred = y_pred_proba
print("y_pred", y_pred.ravel()[:5])
print('y_test', y_test.ravel()[:5])

y_pred [12.416857 12.10189  10.863805 11.91993  11.839936]
y_test [12.20918779 11.79810441 11.60823564 12.16525065 11.38509209]


In [48]:
# 損失とACC
score = model.evaluate(X_test, y_test, verbose=0)
print('loss (test):', score[0])
print('Mean Squared Error (test):', score[1])

loss (test): 2.1739468574523926
Mean Squared Error (test): 2.1739468574523926


# 問題6

## データ準備

In [52]:
# 読み込み
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#　平滑化
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

# 正規化
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

# 変形
y_train = y_train.astype(np.int)[:, np.newaxis]
y_test = y_test.astype(np.int)[:, np.newaxis]

# one-hotベクトル化
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:])
y_test_one_hot = enc.fit_transform(y_test[:])

# 分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

## モデル定義

In [53]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(784,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(10, activation = tf.nn.softmax))

In [55]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [54]:
# モデルのコンパイル
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [56]:
# 学習実行
model.fit(
    X_train[:1000],
    y_train[:1000],
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


<tensorflow.python.keras.callbacks.History at 0x18e10cdf9e8>

In [57]:
# 予測値出力
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred", y_pred[:10])
print('y_test', y_test.ravel()[:10])

y_pred [7 2 1 0 4 1 4 9 4 9]
y_test [7 2 1 0 4 1 4 9 5 9]


In [58]:
# 損失とACC
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.836173951625824
Test accuracy: 0.8575000166893005
