# ライブラリimport

In [50]:
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# from keras.optimizers import Adam
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
from keras.datasets import mnist

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# 問題1　公式チュートリアルモデルを分担して実行

こちらのチュートリアルを実行：https://www.tensorflow.org/tutorials/quickstart/beginner

In [51]:
# MNIST読み込み
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 正規化
x_train, x_test = x_train / 255.0, x_test / 255.0

In [52]:
# モデルの定義
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [53]:
# 予測値を出してみる（1データだけ使用）
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.33532238,  0.24495786,  0.52920055,  0.07347189,  0.25622487,
        -0.05961295, -0.86911416, -0.06926402, -0.11070457, -0.3373456 ]],
      dtype=float32)

In [54]:
# softmax関数に通す
tf.nn.softmax(predictions).numpy()

array([[0.13136338, 0.12001334, 0.15946837, 0.10110068, 0.12137318,
        0.08850261, 0.03939074, 0.08765257, 0.08409445, 0.06704067]],
      dtype=float32)

In [55]:
# 損失関数定義
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [56]:
# 損失算出
loss_fn(y_train[:1], predictions).numpy()

2.4247231

In [57]:
# コンパイル（計算や予測を出来る状態にする）
model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

In [58]:
# 学習実行
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fab3cea3f28>

In [59]:
# 評価(損失とACC)
model.evaluate(x_test,  y_test, verbose=3)

[0.07454757392406464, 0.9761999845504761]

In [60]:
# モデルが直接確率を返してくれるようにする
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [61]:
# 試しに予測
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[2.48205563e-08, 5.74368331e-09, 3.09179200e-06, 1.09089779e-04,
        1.51966044e-11, 1.70801570e-07, 2.05797879e-12, 9.99886751e-01,
        3.61387322e-07, 5.35054255e-07],
       [4.98073049e-08, 3.92033180e-05, 9.99957442e-01, 2.90335151e-06,
        6.21071011e-13, 3.72753419e-07, 7.16569526e-10, 1.26236420e-14,
        7.31251291e-08, 2.08328372e-11],
       [2.82417795e-06, 9.99501228e-01, 7.26054204e-05, 3.39780922e-06,
        2.26075936e-05, 3.66823951e-06, 3.72365976e-05, 1.94331093e-04,
        1.61531832e-04, 6.03411536e-07],
       [9.99884605e-01, 1.67506631e-08, 1.26627865e-05, 1.65798433e-08,
        2.76632392e-07, 1.26973600e-05, 8.76159611e-05, 1.56990234e-06,
        1.29813893e-08, 5.33945979e-07],
       [2.01697162e-06, 6.21835312e-08, 3.96337236e-06, 2.45339393e-08,
        9.99186099e-01, 1.03470192e-07, 6.78778349e-07, 1.01389640e-04,
        1.62555978e-06, 7.03969214e-04]], dtype=float32)>

# 問題3

## データ準備

In [95]:
# 読み込み
iris_dataset = load_iris()

# 整形
iris_dataframe = pd.DataFrame(data=iris_dataset.data, columns=iris_dataset.feature_names)
iris_datalabel = pd.DataFrame(data=iris_dataset.target,columns=['Species'])
df = pd.concat([iris_dataframe,iris_datalabel],axis=1)

In [96]:
# 2値分類のため絞り込み
df2 = df[(df["Species"] == 0)|(df["Species"] == 1)]
print(df2)
# 説明変数と目的変数に分割
y = df2["Species"]
X = df2.loc[:, ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]

# 訓練データ/テストデータ/評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# 正規化
X_train /= 255
X_test /= 255
X_val /= 255

    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                 5.1               3.5                1.4               0.2   
1                 4.9               3.0                1.4               0.2   
2                 4.7               3.2                1.3               0.2   
3                 4.6               3.1                1.5               0.2   
4                 5.0               3.6                1.4               0.2   
..                ...               ...                ...               ...   
95                5.7               3.0                4.2               1.2   
96                5.7               2.9                4.2               1.3   
97                6.2               2.9                4.3               1.3   
98                5.1               2.5                3.0               1.1   
99                5.7               2.8                4.1               1.3   

    Species  
0         0  
1         0

## モデル定義

In [64]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(4,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(1, activation = tf.nn.sigmoid))

In [65]:
# モデル構造の確認
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 50)                250       
_________________________________________________________________
dense_17 (Dense)             (None, 100)               5100      
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 101       
Total params: 5,451
Trainable params: 5,451
Non-trainable params: 0
_________________________________________________________________


In [66]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [67]:
# モデルのコンパイル
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [68]:
# 学習実行
model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100


<keras.callbacks.History at 0x7fab83a1af28>

In [69]:
# 2値分類のうちどちらに分類されるか
y_pred_proba = model.predict(X_test)[:, 0]
y_pred = np.where(y_pred_proba >0.5, 1, 0)
print("y_pred_proba", y_pred_proba)
print("y_pred", y_pred)
print('y_test', y_test.ravel())

y_pred_proba [0.02029315 0.999977   0.00442493 0.99988467 0.9999236  0.98877263
 0.00617084 0.9999362  0.999974   0.99950224 0.99964076 0.9994469
 0.99994385 0.00164926 0.00811097 0.01333991 0.00107348 0.02398014
 0.0034925  0.00680211]
y_pred [0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0]
y_test [0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0]


In [70]:
# 損失とACC
score = model.evaluate(X_test, y_test, verbose=0)
print('test loss:', score[0])
print('test accuracy:', score[1])

test loss: 0.0051541500724852085
test accuracy: 1.0


# 問題4

## データ準備

In [71]:
# 説明変数と目的変数に分割
y = df["Species"]
X = df.loc[:, ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]

# 訓練データ/テストデータ/評価データに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# onehotベクトル化
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train)
y_val_one_hot = enc.transform(y_val)
y_test_one_hot = enc.transform(y_test)

# 正規化
X_train /= 255
X_test /= 255
X_val /= 255

## モデル定義

In [72]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(4,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(3, activation = tf.nn.softmax))

In [73]:
# モデル構造の確認
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 50)                250       
_________________________________________________________________
dense_20 (Dense)             (None, 100)               5100      
_________________________________________________________________
dense_21 (Dense)             (None, 3)                 303       
Total params: 5,653
Trainable params: 5,653
Non-trainable params: 0
_________________________________________________________________


In [74]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [75]:
# モデルのコンパイル
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [76]:
# 学習実行
model.fit(
    X_train,
    y_train_one_hot,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val_one_hot)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


<keras.callbacks.History at 0x7fab25310e48>

In [77]:
# 予測値の最大インデックスを出力クラスとする
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred", y_pred)
print('y_test', y_test.ravel())

y_pred [2 1 0 2 0 2 0 2 2 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
y_test [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]


In [78]:
# 損失とACC
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])

Train loss: 0.19666549563407898
Train accuracy: 0.9333333373069763


# 問題5

## データの準備

In [79]:
dataset_path ="train.csv"
df = pd.read_csv(dataset_path)

y = df["SalePrice"]
X = df.loc[:, ["GrLivArea", "YearBuilt"]]
y = np.array(y)
X = np.array(X)
y = y.astype(np.int)[:, np.newaxis]
y = np.log(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

## モデル定義

In [80]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(2,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(3, activation = tf.keras.activations.linear))

In [81]:
# モデル構造の確認
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 50)                150       
_________________________________________________________________
dense_23 (Dense)             (None, 100)               5100      
_________________________________________________________________
dense_24 (Dense)             (None, 3)                 303       
Total params: 5,553
Trainable params: 5,553
Non-trainable params: 0
_________________________________________________________________


In [82]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [83]:
# モデルのコンパイル
model.compile(
    loss='mse',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['mse']
)

In [84]:
# 学習実行
model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


<keras.callbacks.History at 0x7fab259d2fd0>

In [85]:
# 予測値出力
y_pred_proba = model.predict(X_test)
y_pred = y_pred_proba
print("y_pred", y_pred.ravel()[:5])
print('y_test', y_test.ravel()[:5])

y_pred [11.861548 12.627037 12.200188 12.429511 12.539381]
y_test [12.20918779 11.79810441 11.60823564 12.16525065 11.38509209]


In [86]:
# 損失とACC
score = model.evaluate(X_test, y_test, verbose=0)
print('loss (test):', score[0])
print('Mean Squared Error (test):', score[1])

loss (test): 22.0289306640625
Mean Squared Error (test): 22.0289306640625


# 問題6

## データ準備

In [87]:
# 読み込み
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#　平滑化
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

# 正規化
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

# 変形
y_train = y_train.astype(np.int)[:, np.newaxis]
y_test = y_test.astype(np.int)[:, np.newaxis]

# one-hotベクトル化
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:])
y_test_one_hot = enc.fit_transform(y_test[:])

# 分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

## モデル定義

In [88]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(50, activation = tf.nn.relu, input_shape=(784,)))
model.add(tf.keras.layers.Dense(100))
model.add(tf.keras.layers.Dense(10, activation = tf.nn.softmax))

In [89]:
# 損失が3回改善しなかったら早期学習終了
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [90]:
# モデルのコンパイル
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [91]:
# 学習実行
model.fit(
    X_train[:1000],
    y_train[:1000],
    batch_size=10,
    epochs=100,
    verbose=1,
    callbacks=[callback],
    validation_data=(X_val, y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


<keras.callbacks.History at 0x7fab49dabcc0>

In [92]:
# 予測値出力
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred", y_pred[:10])
print('y_test', y_test.ravel()[:10])

y_pred [7 2 1 0 4 1 4 9 6 9]
y_test [7 2 1 0 4 1 4 9 5 9]


In [93]:
# 損失とACC
score = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Test loss: 0.7676284909248352
Test accuracy: 0.8608999848365784
