## Deep Learning with Keras: Assignment (PART B)

In [5]:
# import keras libraries
import numpy as np
import pandas as pd

In [6]:
# load concrete dataset
concrete = pd.read_csv("concrete_data.csv")
concrete.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [7]:
# identify concrete dataset
concrete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


### B. Build a baseline model + Use Normalised Data

In [8]:
# import libraries for buidling neural nentwork model
import keras
from keras.models import Sequential
from keras.layers import Dense

In [9]:
# create a deep learning model
model = Sequential()
ncols = concrete.drop(columns=["Strength"], axis=1).shape[1]

# add hidden layers
# create optimizer
model.add(Dense(5, activation="relu", input_shape = (ncols,)))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mean_squared_error")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Model Training

In [26]:
# import scikit learn library for training and testing data
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error 

In [11]:
concrete.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [15]:
# normalise data
def normalise_data(data, feature):
  # detertmine the mean value and std from input feature
  x_mean = data[feature].mean()
  x_std = data[feature].std()

  # apply z score to normalise input feature's data
  x_norm = (data[feature] - x_mean)/x_std
  return x_norm

def apply_data_normalisation(data, feature_list):
  norm_data = data
  for feature in feature_list:
    feat_norm = normalise_data(data, feature)
    norm_data.loc[:,feature] = feat_norm
  return norm_data

In [17]:
concrete.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [18]:
concrete_norm = concrete.copy()
concrete_norm = apply_data_normalisation(concrete, concrete.columns)
concrete_norm.head()

 -0.27959729]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_data.loc[:,feature] = feat_norm


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597,2.644123
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597,1.560663
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134,0.266498
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221,0.313188
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069,0.507732


In [19]:
# train data
concreteX = concrete_norm.drop(columns=["Strength"], axis=1)
concreteY = concrete_norm["Strength"]

conc_xtrain, conc_xtest, conc_ytrain, conc_ytest = train_test_split(concreteX, 
                                                                    concreteY, 
                                                                    test_size=0.3,)


In [20]:
# train the model
# retrieve all mse values + store into a mse_values
model_history = model.fit(conc_xtrain, conc_ytrain, validation_data=(conc_xtest, conc_ytest), epochs=50)
mse_values = model_history.history["val_loss"]

# predict the model
conc_pred = model.predict(conc_xtest)

# evaluate the model + create list of mse
mse = mean_squared_error(conc_ytest, conc_pred)

Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 1.5190 - val_loss: 1.4307
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.3070 - val_loss: 1.3196
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.2705 - val_loss: 1.2306
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0253 - val_loss: 1.1634
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0431 - val_loss: 1.0958
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0023 - val_loss: 1.0382
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9125 - val_loss: 0.9849
Epoch 8/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8649 - val_loss: 0.9360
Epoch 9/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [21]:
# mean squared error: result
print(f"mean squared error at (50/50): {mse}")

mean squared error at (50/50): 0.33882612534787065


In [22]:
# mse values
mse_list = mse_values
epoch_list = [e for e in range(1,len(mse_list)+1)]

print("Epoch - MSE value")
for ep, mse_val in zip(epoch_list, mse_list):
  print(f"{ep} - {mse_val}")

Epoch - MSE value
1 - 1.4306682348251343
2 - 1.3195760250091553
3 - 1.2305784225463867
4 - 1.1633884906768799
5 - 1.0958470106124878
6 - 1.038169026374817
7 - 0.9849076271057129
8 - 0.935964047908783
9 - 0.887898862361908
10 - 0.8393442034721375
11 - 0.7942368388175964
12 - 0.7517083287239075
13 - 0.7124555706977844
14 - 0.672663688659668
15 - 0.6381121873855591
16 - 0.6093049645423889
17 - 0.5855404138565063
18 - 0.5628821849822998
19 - 0.5435333847999573
20 - 0.528987467288971
21 - 0.5134493112564087
22 - 0.5015997290611267
23 - 0.48901045322418213
24 - 0.47762346267700195
25 - 0.46786069869995117
26 - 0.45927953720092773
27 - 0.4514012038707733
28 - 0.44308334589004517
29 - 0.43595895171165466
30 - 0.4287061095237732
31 - 0.42159998416900635
32 - 0.4157083332538605
33 - 0.40912285447120667
34 - 0.4029211699962616
35 - 0.39647799730300903
36 - 0.39065787196159363
37 - 0.38655582070350647
38 - 0.382201224565506
39 - 0.377448707818985
40 - 0.37300166487693787
41 - 0.36813563108444214
4

In [23]:
# mse values: mean and std
# convert epoch and mse into dataframe
# take the mean and std of it

def create_mse_dict(epoch: list, mse_val:list) -> dict:
  # create a dict for all mse values
  mse_dict = {}
  mse_sample = [epoch, mse_val]
  mse_feat = ["Epoch","MSE"]

  for feat, sample in zip(mse_feat, mse_sample):
    mse_dict[feat] = sample
  return mse_dict

def convert_mse_data(epoch, mse_val):
  mse_dict = create_mse_dict(epoch, mse_val)
  mse_data = pd.DataFrame(mse_dict)
  return mse_data

In [24]:
mse_dict_ = create_mse_dict(epoch_list, mse_list)
mse_data = convert_mse_data(epoch_list, mse_list)
mse_data.head()

Unnamed: 0,Epoch,MSE
0,1,1.430668
1,2,1.319576
2,3,1.230578
3,4,1.163388
4,5,1.095847


In [25]:
# find the statistical measurement
mse_mean = mse_data["MSE"].mean()
mse_std = mse_data["MSE"].std()
print(f"Mean value of MSE: {mse_mean:.3f}")
print(f"Standard Deviation of MSE: {mse_std:.3f}")

Mean value of MSE: 0.589
Standard Deviation of MSE: 0.288


**How does the mean of the mean squared errors compare to that from Step A?**

+ mean of mse from part A (without normalisation): 23756.258
+ mean of mse from part B (with normalisation): 0.589