### **Build a regression model with Keras**

#### **A. Creating baseline model**

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from sklearn.model_selection import train_test_split

In [2]:
filepath='https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'
concrete_data = pd.read_csv(filepath)

In [3]:
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
concrete_data.shape

(1030, 9)

In [5]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [6]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [7]:
# Create X and y (features and labels)
X = concrete_data.drop("Strength", axis=1)
y = concrete_data["Strength"]

In [8]:
# View X
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [9]:
# View y
y.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [10]:
# Splitting data into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
len(X), len(X_train), len(X_test)

(1030, 721, 309)

In [11]:
# Create a model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [12]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Train and test the model at the same time
    res = model.fit(X_train, y_train, epochs=50, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 144.7666473388672
Cycle #2: mean_squared_error 119.5103759765625
Cycle #3: mean_squared_error 111.17548370361328
Cycle #4: mean_squared_error 105.7327651977539
Cycle #5: mean_squared_error 110.7430191040039
Cycle #6: mean_squared_error 107.98838806152344
Cycle #7: mean_squared_error 101.95978546142578
Cycle #8: mean_squared_error 116.21357727050781
Cycle #9: mean_squared_error 80.99010467529297
Cycle #10: mean_squared_error 72.45655822753906
Cycle #11: mean_squared_error 69.4825668334961
Cycle #12: mean_squared_error 69.41679382324219
Cycle #13: mean_squared_error 63.2273063659668
Cycle #14: mean_squared_error 62.11308288574219
Cycle #15: mean_squared_error 59.61676025390625
Cycle #16: mean_squared_error 59.02305603027344
Cycle #17: mean_squared_error 58.47664260864258
Cycle #18: mean_squared_error 55.28010940551758
Cycle #19: mean_squared_error 55.35075759887695
Cycle #20: mean_squared_error 49.393062591552734
Cycle #21: mean_squared_error 49.7838287353515

In [13]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 65.40029121398926
The standard deviation of the mean squared errors: 23.132115564776328


#### **B. Normalize the data**

In [14]:
X_norm = (X - X.mean())/X.std()
X_norm.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
5,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,0.701883
6,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
7,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597
8,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,-0.279597
9,1.85474,-0.856472,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597


In [15]:
# Create a model
model_2 = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model_2.compile(optimizer='adam', loss='mean_squared_error')

In [16]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Splitting data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3)
    #Train and test the model at the same time
    res = model_2.fit(X_train, y_train, epochs=50, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 455.60980224609375
Cycle #2: mean_squared_error 150.6235809326172
Cycle #3: mean_squared_error 114.4305648803711
Cycle #4: mean_squared_error 88.17723083496094
Cycle #5: mean_squared_error 64.32269287109375
Cycle #6: mean_squared_error 55.37126922607422
Cycle #7: mean_squared_error 51.237003326416016
Cycle #8: mean_squared_error 49.92914962768555
Cycle #9: mean_squared_error 44.66703414916992
Cycle #10: mean_squared_error 43.922828674316406
Cycle #11: mean_squared_error 47.19723129272461
Cycle #12: mean_squared_error 40.47425842285156
Cycle #13: mean_squared_error 39.83116149902344
Cycle #14: mean_squared_error 40.1787109375
Cycle #15: mean_squared_error 35.89670181274414
Cycle #16: mean_squared_error 39.57476806640625
Cycle #17: mean_squared_error 35.07663345336914
Cycle #18: mean_squared_error 34.45836639404297
Cycle #19: mean_squared_error 33.93509292602539
Cycle #20: mean_squared_error 32.28630065917969
Cycle #21: mean_squared_error 33.756378173828125
C

In [17]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 50.596184997558595
The standard deviation of the mean squared errors: 61.60791616836794


##### The mean of the mean squared errors from Case B is slight lower compared to Case A, however it seems like data normalization do not contribute much as errors in both A and B are still considered high.

#### **C. Increate the number of epochs - repeating Part B with 100 epochs**

In [18]:
# Create a model
model_3 = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model_3.compile(optimizer='adam', loss='mean_squared_error')

In [19]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Splitting data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3)
    #Train and test the model at the same time
    res = model_3.fit(X_train, y_train, epochs=100, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 159.4805450439453
Cycle #2: mean_squared_error 99.79444122314453
Cycle #3: mean_squared_error 75.74494171142578
Cycle #4: mean_squared_error 74.35004425048828
Cycle #5: mean_squared_error 58.27476119995117
Cycle #6: mean_squared_error 50.24244689941406
Cycle #7: mean_squared_error 47.843360900878906
Cycle #8: mean_squared_error 45.58302688598633
Cycle #9: mean_squared_error 45.47398376464844
Cycle #10: mean_squared_error 48.722530364990234
Cycle #11: mean_squared_error 46.84992218017578
Cycle #12: mean_squared_error 40.91883850097656
Cycle #13: mean_squared_error 46.4486083984375
Cycle #14: mean_squared_error 47.10515594482422
Cycle #15: mean_squared_error 40.302154541015625
Cycle #16: mean_squared_error 47.984275817871094
Cycle #17: mean_squared_error 40.21487045288086
Cycle #18: mean_squared_error 45.738502502441406
Cycle #19: mean_squared_error 36.517818450927734
Cycle #20: mean_squared_error 49.08242416381836
Cycle #21: mean_squared_error 51.21449661254

In [20]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 48.88705337524414
The standard deviation of the mean squared errors: 19.072192367588524


##### There is a slight decrease in Case C comparing to Case B. However the result is not significant enough, it seems like increasing the numbers of epochs does not contribute much as the errors for both cases is still consider quite high.

#### **D. Increase the number of hidden layers - repeating part B with 3 hidden layers, each of 10 nodes and ReLU activation function.**

In [21]:
# Create a model
model_4 = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model_4.compile(optimizer='adam', loss='mean_squared_error')

In [22]:
list_of_mean_squared_error = []
for cycle in range(50):
    #Splitting data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3)
    #Train and test the model at the same time
    res = model_4.fit(X_train, y_train, epochs=50, verbose=0, validation_data=(X_test, y_test))
    #Find mean_squared_error as last value in history.
    mean_squared_error = res.history['val_loss'][-1]
    #Add value of mean_squared_error for every cycle in list.
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))

Cycle #1: mean_squared_error 100.92414855957031
Cycle #2: mean_squared_error 61.28036880493164
Cycle #3: mean_squared_error 50.937904357910156
Cycle #4: mean_squared_error 43.54908752441406
Cycle #5: mean_squared_error 37.81708908081055
Cycle #6: mean_squared_error 32.360015869140625
Cycle #7: mean_squared_error 37.070823669433594
Cycle #8: mean_squared_error 32.59604263305664
Cycle #9: mean_squared_error 29.07569694519043
Cycle #10: mean_squared_error 35.336334228515625
Cycle #11: mean_squared_error 30.933639526367188
Cycle #12: mean_squared_error 31.914108276367188
Cycle #13: mean_squared_error 33.102760314941406
Cycle #14: mean_squared_error 32.133541107177734
Cycle #15: mean_squared_error 28.380245208740234
Cycle #16: mean_squared_error 25.636995315551758
Cycle #17: mean_squared_error 23.91638946533203
Cycle #18: mean_squared_error 23.075387954711914
Cycle #19: mean_squared_error 24.63487434387207
Cycle #20: mean_squared_error 27.722440719604492
Cycle #21: mean_squared_error 25.517

In [23]:
print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

The mean of the mean squared errors: 27.875657691955567
The standard deviation of the mean squared errors: 13.341021901136477


##### It is obvious that Case D has the lowest mean of the mean squared errors compared to the rest of the cases. This shows that additional layers holds the highest potential and its importance in lowering the errors comparing to other tuning methods. Further the comparisons of the previous cases i.e Case A, B and C proven that comparison using neural network with only one hidden layer may not be the best suggestion as it might produce poor and unpredictable result.