## Deep Learning with Keras: Assignment (PART C)

In [1]:
# import keras libraries
import numpy as np
import pandas as pd

In [2]:
# load concrete dataset
concrete = pd.read_csv("concrete_data.csv")
concrete.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# identify concrete dataset
concrete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


### C. Increase the numbers or epochs

In [4]:
# import libraries for buidling neural nentwork model
import keras
from keras.models import Sequential
from keras.layers import Dense

In [5]:
# create a deep learning model
model = Sequential()
ncols = concrete.drop(columns=["Strength"], axis=1).shape[1]

# add hidden layers
# create optimizer
model.add(Dense(10, activation="relu", input_shape = (ncols,)))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mean_squared_error")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Model Training

In [6]:
# import scikit learn library for training and testing data
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error 

In [7]:
concrete.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [8]:
# normalise data
def normalise_data(data, feature):
  # detertmine the mean value and std from input feature
  x_mean = data[feature].mean()
  x_std = data[feature].std()

  # apply z score to normalise input feature's data
  x_norm = (data[feature] - x_mean)/x_std
  return x_norm

def apply_data_normalisation(data, feature_list):
  norm_data = data
  for feature in feature_list:
    feat_norm = normalise_data(data, feature)
    norm_data.loc[:,feature] = feat_norm
  return norm_data

In [9]:
concrete.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [10]:
concrete_norm = concrete.copy()
concrete_norm = apply_data_normalisation(concrete, concrete.columns)
concrete_norm.head()

 -0.27959729]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  norm_data.loc[:,feature] = feat_norm


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597,2.644123
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597,1.560663
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134,0.266498
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221,0.313188
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069,0.507732


In [11]:
# train data
concreteX = concrete_norm.drop(columns=["Strength"], axis=1)
concreteY = concrete_norm["Strength"]

conc_xtrain, conc_xtest, conc_ytrain, conc_ytest = train_test_split(concreteX, 
                                                                    concreteY, 
                                                                    test_size=0.3,)


In [12]:
# train the model
# retrieve all mse values + store into a mse_values
model_history = model.fit(conc_xtrain, conc_ytrain, validation_data=(conc_xtest, conc_ytest), epochs=100)
mse_values = model_history.history["val_loss"]

# predict the model
conc_pred = model.predict(conc_xtest)

# evaluate the model + create list of mse
mse = mean_squared_error(conc_ytest, conc_pred)

Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 33ms/step - loss: 1.3681 - val_loss: 1.4781
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.2868 - val_loss: 1.2625
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.0413 - val_loss: 1.1105
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.0142 - val_loss: 0.9962
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.8729 - val_loss: 0.9060
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7751 - val_loss: 0.8371
Epoch 7/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.7262 - val_loss: 0.7805
Epoch 8/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.6823 - val_loss: 0.7315
Epoch 9/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━

In [13]:
# mean squared error: result
print(f"mean squared error at (50/50): {mse}")

mean squared error at (50/50): 0.17326638918213413


In [14]:
# mse values
mse_list = mse_values
epoch_list = [e for e in range(1,len(mse_list)+1)]

print("Epoch - MSE value")
for ep, mse_val in zip(epoch_list, mse_list):
  print(f"{ep} - {mse_val}")

Epoch - MSE value
1 - 1.478148102760315
2 - 1.2625083923339844
3 - 1.110525131225586
4 - 0.9962260723114014
5 - 0.9059926271438599
6 - 0.8371454477310181
7 - 0.7804785966873169
8 - 0.7315096259117126
9 - 0.6963468790054321
10 - 0.6630730032920837
11 - 0.6345922350883484
12 - 0.6089112162590027
13 - 0.5852699875831604
14 - 0.5630972385406494
15 - 0.5431855320930481
16 - 0.5252379775047302
17 - 0.5076689124107361
18 - 0.49100515246391296
19 - 0.47532376646995544
20 - 0.46081918478012085
21 - 0.4472140669822693
22 - 0.4327113628387451
23 - 0.4201573133468628
24 - 0.4067680537700653
25 - 0.3964855968952179
26 - 0.3866655230522156
27 - 0.37751972675323486
28 - 0.36837247014045715
29 - 0.3594560921192169
30 - 0.35166701674461365
31 - 0.3437967002391815
32 - 0.3361669182777405
33 - 0.3294162452220917
34 - 0.3224548101425171
35 - 0.31595373153686523
36 - 0.3093407452106476
37 - 0.30244919657707214
38 - 0.2956499755382538
39 - 0.2892191708087921
40 - 0.2828259766101837
41 - 0.27668237686157227


In [15]:
# mse values: mean and std
# convert epoch and mse into dataframe
# take the mean and std of it

def create_mse_dict(epoch: list, mse_val:list) -> dict:
  # create a dict for all mse values
  mse_dict = {}
  mse_sample = [epoch, mse_val]
  mse_feat = ["Epoch","MSE"]

  for feat, sample in zip(mse_feat, mse_sample):
    mse_dict[feat] = sample
  return mse_dict

def convert_mse_data(epoch, mse_val):
  mse_dict = create_mse_dict(epoch, mse_val)
  mse_data = pd.DataFrame(mse_dict)
  return mse_data

In [16]:
mse_dict_ = create_mse_dict(epoch_list, mse_list)
mse_data = convert_mse_data(epoch_list, mse_list)
mse_data.head()

Unnamed: 0,Epoch,MSE
0,1,1.478148
1,2,1.262508
2,3,1.110525
3,4,0.996226
4,5,0.905993


In [17]:
# find the statistical measurement
mse_mean = mse_data["MSE"].mean()
mse_std = mse_data["MSE"].std()
print(f"Mean value of MSE: {mse_mean:.3f}")
print(f"Standard Deviation of MSE: {mse_std:.3f}")

Mean value of MSE: 0.342
Standard Deviation of MSE: 0.245


**How does the mean of the mean squared errors compare to that from Step A?**

+ mean of mse from part B (without normalisation): 0.589
+ mean of mse from part C (with normalisation): 0.342

By increasing the number of epochs shows that the means of mse has decreased. When epoch has doubled from part B in part C, the mean value of part B has reduced from 0.589 to 0.498 in part C. 