<a href="https://colab.research.google.com/github/jumbokh/encoding-BCH/blob/main/BCH_Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 連接雲端硬碟

In [None]:
from google.colab import drive
drive.mount('/content/gdrive') # 點擊網址，選擇 Google 帳號登入，然後將授權碼貼回輸入框中

In [None]:
!ln -fs /content/gdrive/My\ Drive/Colab\ Notebooks/ /app
%cd /app

In [None]:
!ls # 確定目錄有訓練資料

### 載入必要套件

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers

### 讀取訓練資料

In [None]:
data_Training = pd.read_table("data/Training_one_to_50000.txt",sep=',',header=None)
data_Training.shape
data_Training.head()

In [None]:
data_Training.dtypes

In [None]:
data_Training.shape

In [None]:
data_Validation = pd.read_table("data/Validation_one_to_50000.txt",sep=',',header=None)
data_Validation.shape
data_Validation.head()

In [None]:
data_Validation.dtypes

分割數據集（Dataset）：將數據集切割成三個部份，訓練數據（Training data）、驗證數據（Validation data）和測試數據（Testing data）。


In [None]:
data_num = data_Training.shape[0]
# 取得一筆與data數量相同的亂數索引，主要目的是用於打散資料
indexes = np.random.permutation(data_num)
# 並將亂數索引值分為Train、val和test分為，這裡的劃分比例為6:2:2
train_indexes = indexes[:int(data_num *0.8)]
val_indexes = indexes[int(data_num *0.2):]
# 透過索引值從data取出訓練資料、驗證資料和測試資料
x_train = data_Training.loc[train_indexes]
x_val = data_Training.loc[val_indexes]

In [None]:
data_num = data_Validation.shape[0]
# 取得一筆與data數量相同的亂數索引，主要目的是用於打散資料
indexes = np.random.permutation(data_num)
# 並將亂數索引值分為Train、val和test分為，這裡的劃分比例為6:2:2
train_indexes = indexes[:int(data_num *0.8)]
val_indexes = indexes[int(data_num *0.2):]
# 透過索引值從data取出訓練資料、驗證資料和測試資料
y_train = data_Validation.loc[train_indexes]
y_val = data_Validation.loc[val_indexes]

### 建立並訓練網路模型

這裡建構三層全連接層的網路架構，並且使用ReLU作為隱藏層的激活函數，而由於需得到線性輸出，故輸出層不使用任何激活函數。

In [None]:
# 建立一個Sequential型態的model
model = keras.Sequential(name='model-1')
model.add(layers.Dense(300, activation='relu', input_shape=(75,)))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dense(63, activation='tanh')) # 輸出層
# 顯示網路模型架構
model.summary()

In [None]:
model.compile(keras.optimizers.Adam(),
               # loss=keras.losses.BinaryCrossentropy(), 
               loss=keras.losses.BinaryCrossentropy(from_logits=True), 
               metrics=[keras.metrics.BinaryAccuracy()])

In [None]:
model_dir = 'BCH-logs'

# TensorBoard回調函數會幫忙紀錄訓練資訊，並存成TensorBoard的紀錄檔
log_dir = os.path.join('BCH-logs', 'model-1')
model_cbk = keras.callbacks.TensorBoard(log_dir=log_dir)
# ModelCheckpoint回調函數幫忙儲存網路模型，可以設定只儲存最好的模型，「monitor」表示被監測的數據，「mode」min則代表監測數據越小越好。
model_mckp = keras.callbacks.ModelCheckpoint(model_dir + '/Best-model-1.h5', 
                                        monitor='val_mean_absolute_error', 
                                        save_best_only=True, 
                                        mode='min')

In [None]:
history = model.fit(x_train, y_train,  # 傳入訓練數據
               batch_size=74,  # 批次大小設為64
               epochs=5,  # 整個dataset訓練300遍
               validation_data=(x_val, y_val),  # 驗證數據
               callbacks=[model_cbk, model_mckp])  # Tensorboard回調函數紀錄訓練過程，ModelCheckpoint回調函數儲存最好的模型

In [None]:
history.history.keys()

In [None]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
# plt.ylim(0.02, 0.2)
plt.title('Mean square error')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')

In [None]:
plt.plot(history.history['mean_absolute_error'], label='train')
plt.plot(history.history['val_mean_absolute_error'], label='validation')
# plt.ylim(0.12, 0.26)
plt.title('Mean absolute error')
plt.ylabel('metrics')
plt.xlabel('epochs')
plt.legend(loc='upper right')

In [None]:
%load_ext tensorboard
%tensorboard --port 9530 --logdir BCH-logs


In [None]:
test_data_Training = pd.read_table("data/text_Training_one_to_50000.txt",sep=',',header=None)
test_data_Training.shape
test_data_Training.head()

In [None]:
test_data_Validation = pd.read_table("data/text_Validation_one_to_50000.txt",sep=',',header=None)
test_data_Validation.shape
test_data_Validation.head()

In [None]:
model = keras.models.load_model('BCH-logs/Best-model-1.h5')
y_test = test_data_Validation
x_test = test_data_Training
y_pred = model.predict(x_test)

In [None]:
y_pred[1]

In [None]:
y_test[1]

In [None]:
3**5