# Neuron Network - Lab

### Part 1: Load  data

Import "bank-data.csv"

In [None]:
import pandas as pd
bankData = pd.read_csv('/content/bank-data.csv', sep = ';')
bankData.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


### Part 2: Preprocess data

Preprocess the dataset as you have done before

#### 2.1 Binary encoding

Use LabelEncoder to encode the following columns:
- y
- default
- housing
- loan

In [None]:
#Binary encoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

bankData['y'] = le.fit_transform(bankData['y'])
bankData['housing'] = le.fit_transform(bankData['housing'])
bankData['default'] = le.fit_transform(bankData['default'])
bankData['loan'] = le.fit_transform(bankData['loan'])

bankData.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,0,1787,0,0,cellular,19,oct,79,1,-1,0,unknown,0
1,33,services,married,secondary,0,4789,1,1,cellular,11,may,220,1,339,4,failure,0
2,35,management,single,tertiary,0,1350,1,0,cellular,16,apr,185,1,330,1,failure,0
3,30,management,married,tertiary,0,1476,1,1,unknown,3,jun,199,4,-1,0,unknown,0
4,59,blue-collar,married,secondary,0,0,1,0,unknown,5,may,226,1,-1,0,unknown,0


#### 2.2 Convert categorical variables into dummy columns

(1) Use pd.get_dummies to convert the following categorical variales into dummy columns
- job
- maritial
- education
- contact
- month
- poutcome

(2) Drop columns that have been converted

In [None]:
#Convert categorical variables into dummy columns
bankData = pd.concat([bankData,pd.get_dummies(bankData['job'],prefix='job')],axis=1)
bankData = pd.concat([bankData,pd.get_dummies(bankData['marital'],prefix='marital')],axis=1)
bankData = pd.concat([bankData,pd.get_dummies(bankData['education'],prefix='education')],axis=1)
bankData = pd.concat([bankData,pd.get_dummies(bankData['contact'],prefix='contact')],axis=1)
bankData = pd.concat([bankData,pd.get_dummies(bankData['month'],prefix='month')],axis=1)
bankData = pd.concat([bankData,pd.get_dummies(bankData['poutcome'],prefix='poutcome')],axis=1)

bankData = bankData.drop(columns=['job', 'marital', 'education', 'contact', 'month', 'poutcome'])

In [None]:
bankData.head()

Unnamed: 0,age,default,balance,housing,loan,day,duration,campaign,pdays,previous,...,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,30,0,1787,0,0,19,79,1,-1,0,...,0,0,0,0,1,0,0,0,0,1
1,33,0,4789,1,1,11,220,1,339,4,...,0,0,1,0,0,0,1,0,0,0
2,35,0,1350,1,0,16,185,1,330,1,...,0,0,0,0,0,0,1,0,0,0
3,30,0,1476,1,1,3,199,4,-1,0,...,1,0,0,0,0,0,0,0,0,1
4,59,0,0,1,0,5,226,1,-1,0,...,0,0,1,0,0,0,0,0,0,1


In [None]:
bankData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 49 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   age                  4521 non-null   int64
 1   default              4521 non-null   int64
 2   balance              4521 non-null   int64
 3   housing              4521 non-null   int64
 4   loan                 4521 non-null   int64
 5   day                  4521 non-null   int64
 6   duration             4521 non-null   int64
 7   campaign             4521 non-null   int64
 8   pdays                4521 non-null   int64
 9   previous             4521 non-null   int64
 10  y                    4521 non-null   int64
 11  job_admin.           4521 non-null   uint8
 12  job_blue-collar      4521 non-null   uint8
 13  job_entrepreneur     4521 non-null   uint8
 14  job_housemaid        4521 non-null   uint8
 15  job_management       4521 non-null   uint8
 16  job_retired          452

#### 2.3 Train/Test separation

Perform hold-out method
- 60% training set
- 40% testing set

In [None]:
bankData_train = bankData.sample(frac = 0.6)
bankData_test = bankData.drop(bankData_train.index)
print(pd.crosstab(bankData_train['y'],columns = 'count'))
print(pd.crosstab(bankData_test['y'],columns = 'count'))

col_0  count
y           
0       2405
1        308
col_0  count
y           
0       1595
1        213


##### X/y separation

In [None]:
y_train = bankData_train['y']
X_train = bankData_train.copy()
del X_train['y']

y_test = bankData_test['y']
X_test = bankData_test.copy()
del X_test['y']

#### 2.4 Feature Scaling

It is always a good practice to scale the features so that all of them can be uniformly evaluated

In [None]:
# Min-max scaling
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()

In [None]:
X_train_scaled = pd.DataFrame(
                    mms.fit_transform(X_train),
                    columns=X_train.columns,
                    index=X_train.index)

X_train_scaled.head()

Unnamed: 0,age,default,balance,housing,loan,day,duration,campaign,pdays,previous,...,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
1442,0.84375,0.0,0.071567,0.0,0.0,0.633333,0.057528,0.040816,0.237267,0.12,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
183,0.328125,0.0,0.040008,0.0,0.0,0.333333,0.05508,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
499,0.359375,0.0,0.051632,1.0,0.0,0.6,0.202366,0.020408,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3440,0.203125,0.0,0.151424,1.0,1.0,0.566667,0.161159,0.020408,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4437,0.390625,0.0,0.141719,0.0,0.0,1.0,0.066503,0.020408,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
X_test_scaled = pd.DataFrame(
                    mms.fit_transform(X_test),
                    columns=X_test.columns,
                    index=X_test.index)

X_test_scaled.head()

Unnamed: 0,age,default,balance,housing,loan,day,duration,campaign,pdays,previous,...,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,0.161765,0.0,0.068455,0.0,0.0,0.6,0.024826,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,0.205882,0.0,0.10875,1.0,1.0,0.333333,0.0715,0.0,0.389908,0.166667,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0.25,0.0,0.04859,1.0,0.0,0.433333,0.111552,0.0,0.379587,0.083333,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7,0.294118,0.0,0.046442,1.0,0.0,0.166667,0.048659,0.023256,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
11,0.352941,0.0,0.048013,1.0,0.0,0.533333,0.036081,0.023256,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Artificial Neural Network : sklearn

### Part 3: Train a model

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
mlp.fit(X_train, y_train)

**Scaled**

In [None]:
mlp_scaled = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
mlp_scaled.fit(X_train_scaled, y_train)

### Part 4: Model Evaluation

Evaluation metrics
- confusion metrix
- accuracy
- precision, recall, f1-score

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [None]:
#confusion metrix
res = mlp.predict(X_test)
pd.crosstab(y_test, res)

col_0,0,1
y,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1584,11
1,195,18


In [None]:
print("Accuracy:\t %.3f" %accuracy_score(y_test, res))
print(classification_report(y_test, res))

Accuracy:	 0.886
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      1595
           1       0.62      0.08      0.15       213

    accuracy                           0.89      1808
   macro avg       0.76      0.54      0.54      1808
weighted avg       0.86      0.89      0.85      1808



**Scaled**

In [None]:
#confusion metrix
res_scaled = mlp_scaled.predict(X_test_scaled)
pd.crosstab(y_test, res_scaled)

col_0,0,1
y,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1528,67
1,127,86


In [None]:
print("Accuracy:\t %.3f" %accuracy_score(y_test, res_scaled))
print(classification_report(y_test, res_scaled))

Accuracy:	 0.893
              precision    recall  f1-score   support

           0       0.92      0.96      0.94      1595
           1       0.56      0.40      0.47       213

    accuracy                           0.89      1808
   macro avg       0.74      0.68      0.71      1808
weighted avg       0.88      0.89      0.88      1808



### Part 5: Model tuning

#### Note:

After building the classifier, try answering the following questions.

1. What is the Accuracy Score?
2. If you change your preprosessing method, can you improve the model?
3. If you change your parameters setting, can you improve the model?


#####**1.** Accuracy Score ของ mlp เท่ากับ 0.886 และ Accuracy Score ของ mlp_scaled เท่ากับ 0.893

#####**2.** การทำ preprosessing(scaled)  คือการปรับขนาด features เชิงตัวเลขให้อยู่ในช่วงทั่วไป (เช่น การใช้การปรับขนาด Min-Max หรือการกำหนดมาตรฐาน) สามารถช่วยให้อัลกอริธึม gradient-based optimization บรรจบกันได้เร็วขึ้น เพื่อค้นหา weights(w) ที่เหมาะสมสำหรับแต่ละ feature และทำให้โมเดลมีความไวต่อขนาดของ features อินพุตน้อยลง improve model ได้แต่ Accuracy ไม่ต่างกันมาก

##### **3.**
**เปลี่ยน hidden_layer_sizes:** การเพิ่มจำนวน neurons ใน hidden layers (เช่น จาก (10, 10, 10) เป็น (20, 20)) สามารถช่วยให้ model สามารถจับรูปแบบที่ซับซ้อนมากขึ้นในข้อมูลได้ ซึ่งจะเป็นประโยชน์หากข้อมูลมีความสัมพันธ์ที่ซับซ้อน

การลดจำนวน neurons ใน hidden layers (เช่น จาก (10, 10, 10) เป็น (5, 5, 5, 5)) อาจเกิด model ที่ง่ายกว่าด้วยความจุที่ลดลง วิธีนี้สามารถช่วยป้องกัน overfitting มากเกินไป กรณีที่ initial model ซับซ้อนเกินไปสำหรับ dataset

**เปลี่ยน max_iter:** การเพิ่มค่า max_iter (เช่น จาก 1,000 เป็น 2000) ช่วยให้ neural network สามารถ train epochs ต่างๆ ได้มากขึ้น มีผลดีหาก model ไม่ได้มาชนกันหลังจากผ่านไป 1,000 epochs หรือจำเป็นต้องได้รับการ train เพิ่มเติมเพื่อให้ได้ประสิทธิภาพที่ดีที่สุด

การลด max_iter (เช่น จาก 1,000 เป็น 500) อาจเกิดการลดขนาดลง หาก model ต้องการเวลาการ train เพิ่มเติมเพื่อเรียนรู้จากข้อมูลอย่างเพียงพอ

การเปลี่ยนแปลง max_iter จะปรับปรุงโมเดลหรือไม่นั้นขึ้นอยู่กับว่าโมเดลนั้นมาชนกันภายในจำนวน epochs ที่กำหนดหรือไม่ หากเป็นเช่นนั้น การเพิ่ม max_iter อาจไม่ส่งผลกระทบที่มีนัยสำคัญ

In [None]:
hidden_layer_sizes_list = [(10, 10, 10), (20, 20), (5, 5, 5, 5)]

# Iterate over the different configurations
for hidden_layer_sizes in hidden_layer_sizes_list:
    mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, max_iter=1000)
    mlp.fit(X_train, y_train)

    res = mlp.predict(X_test)

    print(f"Hidden Layer Sizes: {hidden_layer_sizes}")
    print("Accuracy:\t %.3f" % accuracy_score(y_test, res))
    print(classification_report(y_test, res))
    print("-" * 65)

Hidden Layer Sizes: (10, 10, 10)
Accuracy:	 0.891
              precision    recall  f1-score   support

           0       0.90      0.98      0.94      1595
           1       0.61      0.21      0.31       213

    accuracy                           0.89      1808
   macro avg       0.76      0.59      0.62      1808
weighted avg       0.87      0.89      0.87      1808

-----------------------------------------------------------------
Hidden Layer Sizes: (20, 20)
Accuracy:	 0.881
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      1595
           1       0.43      0.05      0.08       213

    accuracy                           0.88      1808
   macro avg       0.66      0.52      0.51      1808
weighted avg       0.83      0.88      0.84      1808

-----------------------------------------------------------------
Hidden Layer Sizes: (5, 5, 5, 5)
Accuracy:	 0.889
              precision    recall  f1-score   support

           0 

In [None]:
max_iter_values = [100, 500, 2000]

# Iterate over the different max_iter values
for max_iter in max_iter_values:
    mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=max_iter)
    mlp.fit(X_train, y_train)

    res = mlp.predict(X_test)

    print(f"Max Iterations: {max_iter}")
    print("Accuracy:\t %.3f" % accuracy_score(y_test, res))
    print(classification_report(y_test, res))
    print("-" * 65)

Max Iterations: 100
Accuracy:	 0.887
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      1595
           1       0.64      0.08      0.15       213

    accuracy                           0.89      1808
   macro avg       0.77      0.54      0.54      1808
weighted avg       0.86      0.89      0.85      1808

-----------------------------------------------------------------
Max Iterations: 500
Accuracy:	 0.885
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      1595
           1       0.57      0.10      0.17       213

    accuracy                           0.88      1808
   macro avg       0.73      0.54      0.55      1808
weighted avg       0.85      0.88      0.85      1808

-----------------------------------------------------------------
Max Iterations: 2000
Accuracy:	 0.887
              precision    recall  f1-score   support

           0       0.89      0.99      0.94    

## Artificial Neural Network : keras

Fitting a logistic regression model

### Part 3: Train a model

In [None]:
import keras; print(keras.__version__)

2.12.0


In [None]:
from keras import models
from keras import layers

In [None]:
X_train.shape

(2713, 48)

In [None]:
X_train_scaled.shape

(2713, 48)

In [None]:
nn = models.Sequential()
nn.add(layers.Dense(48,activation = 'linear',input_shape=(None,48)))
nn.add(layers.Dense(1,activation = 'sigmoid'))

In [None]:
nn.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, None, 48)          2352      
                                                                 
 dense_3 (Dense)             (None, None, 1)           49        
                                                                 
Total params: 2,401
Trainable params: 2,401
Non-trainable params: 0
_________________________________________________________________


In [None]:
import numpy as np
X_train_add = np.expand_dims(X_train, axis=0)
y_train_add = np.expand_dims(y_train, axis=0)

In [None]:
history = nn.fit(X_train_add,y_train_add,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

**Scale**

In [None]:
history_scaled = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### Part 4: Model Evaluation

In [None]:
X_test_add = np.expand_dims(X_test, axis=0)
y_test_add = np.expand_dims(y_test, axis=0)

In [None]:
test_loss, test_acc = nn.evaluate(X_test_add, y_test_add)
print('Test Loss: %s\nTest Accuracy: %s' % (test_loss,test_acc))

Test Loss: 11.898946762084961
Test Accuracy: 0.21681416034698486


In [None]:
history.history

{'loss': [8.029413223266602,
  2.4076905250549316,
  2.3891794681549072,
  2.3886168003082275,
  2.388425827026367,
  2.3856630325317383,
  2.3854916095733643,
  2.383188009262085,
  2.382948160171509,
  2.380647659301758,
  2.3764290809631348,
  2.3562171459198,
  2.29811429977417,
  2.270775556564331,
  2.266965866088867,
  2.2656099796295166,
  2.2647242546081543,
  2.2592051029205322,
  2.238877058029175,
  2.2289021015167236,
  2.1983048915863037,
  2.1566193103790283,
  2.1155688762664795,
  2.100203037261963,
  2.099846124649048,
  2.099771499633789,
  2.0994873046875,
  2.099649429321289,
  2.099435806274414,
  2.099595069885254,
  2.099355936050415,
  2.099520444869995,
  2.09928560256958,
  2.099452018737793,
  2.099212646484375,
  2.099381923675537,
  2.0991413593292236,
  2.0993118286132812,
  2.0990700721740723,
  2.0992419719696045,
  2.098999500274658,
  2.0991721153259277,
  2.0989291667938232,
  2.099102258682251,
  2.0988595485687256,
  2.099032402038574,
  2.09879040

**Scale**

In [None]:
test_loss_scaled, test_acc_scaled = nn.evaluate(X_test_scaled, y_test)
print('Test Loss: %s\nTest Accuracy: %s' % (test_loss_scaled,test_acc_scaled))

Test Loss: 0.28165361285209656
Test Accuracy: 0.894911527633667


In [None]:
history_scaled.history

{'loss': [0.4332336485385895,
  0.3526853024959564,
  0.34627005457878113,
  0.34329134225845337,
  0.34097155928611755,
  0.3387063443660736,
  0.3367612063884735,
  0.3348955512046814,
  0.33329299092292786,
  0.3316706120967865,
  0.33042794466018677,
  0.3289601504802704,
  0.32775792479515076,
  0.32655730843544006,
  0.325387179851532,
  0.3242611885070801,
  0.32325395941734314,
  0.32224681973457336,
  0.32125139236450195,
  0.32017412781715393,
  0.3193478286266327,
  0.31842079758644104,
  0.31756654381752014,
  0.31674644351005554,
  0.3158336579799652,
  0.3149379789829254,
  0.31424060463905334,
  0.31343090534210205,
  0.31262993812561035,
  0.3118389844894409,
  0.31108012795448303,
  0.31027859449386597,
  0.3095405399799347,
  0.30885055661201477,
  0.3081107437610626,
  0.3073071837425232,
  0.30658942461013794,
  0.3058350384235382,
  0.30515792965888977,
  0.3044547736644745,
  0.3037559986114502,
  0.30309900641441345,
  0.3023800551891327,
  0.3016667068004608,
  

### Part 5: Model tuning

#### Note:

After building the classifier, try answering the following questions.

1. What is the Accuracy Score?
2. If you change your preprosessing method, can you improve the model?
3. If you change your parameters setting, can you improve the model?

#####**1.** Accuracy Score ของ keras ประมาณ 0.217 และ Accuracy Score ของ keras_scaled ประมาณ 0.895

#####**2.** การทำ preprosessing(scaled) มีผลกระทบเชิงบวกต่อประสิทธิภาพของโมเดลของคุณ เนื่องจาก keras_scaled มีความแม่นยำมากกว่าเยอะ และเป็นขั้นตอน common preprocessing เมื่อทำงานกับ neural networks ช่วยให้ model เรียนรู้ได้อย่างมีประสิทธิภาพมากขึ้น การปรับขนาดทำให้แน่ใจได้ว่าฟีเจอร์อินพุตมีช่วงที่ใกล้เคียงกัน ป้องกันไม่ให้ features์ บางอย่างครอบงำ features อื่นๆ ในระหว่างการ train และช่วยให้ model มาบรรจบกันได้เร็วขึ้นและสรุปได้ดีขึ้น

##### **3.** จากโค้ดด้านล่าง
*   nn2 มี 3 hidden layers ซึ่งอาจต้องการข้อมูลการ train มากขึ้นและเวลาการ train นานขึ้นเพื่อป้องกันการ overfitting มากเกินไปเนื่องจากความซับซ้อนที่เพิ่มขึ้น
*    nn2 อาจมีแนวโน้มที่จะหายไปหรือเกิดปัญหา exploding gradient

*   nn3 มีฟังก์ชันการเปิดใช้งาน (ReLU)  อาจทำให้เกิดความไม่เชิงเส้นใน model  ซึ่งอาจช่วยจับความสัมพันธ์ที่ไม่เชิงเส้นในข้อมูล
*   nn3 มีการเลือกจำนวนหน่วย (64 ใน nn3) hidden layer อยู่อาจส่งผลต่อความสามารถของ model ในการเรียนรู้รูปแบบที่ซับซ้อน



In [None]:
nn2 = models.Sequential()
nn2.add(layers.Dense(48,activation = 'linear',input_shape=(None,48)))
nn2.add(layers.Dense(48,activation = 'linear'))
nn2.add(layers.Dense(48,activation = 'linear'))
nn2.add(layers.Dense(1,activation = 'sigmoid'))

nn2.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
nn2.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_20 (Dense)            (None, None, 48)          2352      
                                                                 
 dense_21 (Dense)            (None, None, 48)          2352      
                                                                 
 dense_22 (Dense)            (None, None, 48)          2352      
                                                                 
 dense_23 (Dense)            (None, None, 1)           49        
                                                                 
Total params: 7,105
Trainable params: 7,105
Non-trainable params: 0
_________________________________________________________________


In [None]:
history2 = nn2.fit(X_train_add,y_train_add,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
test_loss2, test_acc2 = nn2.evaluate(X_test_add, y_test_add)
print('Test Loss: %s\nTest Accuracy: %s' % (test_loss2,test_acc2))

Test Loss: 1.8268595933914185
Test Accuracy: 0.8810840845108032


In [None]:
nn3 = models.Sequential()
nn3.add(layers.Dense(64, activation='relu', input_shape=(48,)))
nn3.add(layers.Dense(1, activation='sigmoid'))

nn3.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
nn3.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 64)                3136      
                                                                 
 dense_25 (Dense)            (None, 1)                 65        
                                                                 
Total params: 3,201
Trainable params: 3,201
Non-trainable params: 0
_________________________________________________________________


In [None]:
history3 = nn3.fit(X_train, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
test_loss3, test_acc3 = nn.evaluate(X_test, y_test)
print('Test Loss: %s\nTest Accuracy: %s' % (test_loss3,test_acc3))

Test Loss: 1.8172096014022827
Test Accuracy: 0.8821902871131897
