# Build a Regression Model in Keras for Predicting Strength __- PART D -__

#### Import libraries

In [18]:
import pandas as pd
import numpy as np

#### Read data

In [19]:
df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [20]:
df.shape

(1030, 9)

#### Check null values if exist

In [21]:
df.isnull().any()

Cement                False
Blast Furnace Slag    False
Fly Ash               False
Water                 False
Superplasticizer      False
Coarse Aggregate      False
Fine Aggregate        False
Age                   False
Strength              False
dtype: bool

#### Extract the input and target data

In [22]:
X = df.drop(columns=['Strength'], axis=1).values
y = df.Strength.values

n_cols = X.shape[1]

#### Normalize the data

In [23]:
X_norm = (X - X.mean()) / X.std()
X_norm

array([[ 0.68491213, -0.84750875, -0.84750875, ...,  2.10382035,
         1.07085517, -0.76804989],
       [ 0.68491213, -0.84750875, -0.84750875, ...,  2.1463876 ,
         1.07085517, -0.76804989],
       [ 0.09606522, -0.44311991, -0.84750875, ...,  1.79733617,
         0.83815422, -0.08129831],
       ...,
       [-0.42609301, -0.45191714, -0.53932188, ...,  1.68495864,
         1.36598808, -0.76804989],
       [-0.39601215, -0.31768842, -0.84750875, ...,  1.9607944 ,
         1.39124464, -0.76804989],
       [-0.10712244, -0.5623082 , -0.62530772, ...,  1.60578356,
         1.31348847, -0.76804989]])

#### split the data into train and test by holding 30% of data for testing

In [24]:
from sklearn.model_selection import train_test_split

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.30)

#### import libraries for creating model

In [26]:
import keras
from keras.models import Sequential
from keras.layers import Dense

### Create a neural network model
###### Three hidden layers, each of 10 nodes and ReLU activation function
###### Use the adam optimizer and the mean squared error as the loss function

In [27]:
## define regression model

# create model
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))

# compile model
model.compile(optimizer='adam', loss='mean_squared_error')

####  Train the model on the training data using 50 epochs

In [28]:
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
23/23 - 0s - loss: 1509.4286
Epoch 2/50
23/23 - 0s - loss: 1465.5109
Epoch 3/50
23/23 - 0s - loss: 1390.4244
Epoch 4/50
23/23 - 0s - loss: 1265.6251
Epoch 5/50
23/23 - 0s - loss: 1074.1910
Epoch 6/50
23/23 - 0s - loss: 807.7154
Epoch 7/50
23/23 - 0s - loss: 525.4946
Epoch 8/50
23/23 - 0s - loss: 346.8811
Epoch 9/50
23/23 - 0s - loss: 305.3018
Epoch 10/50
23/23 - 0s - loss: 300.5808
Epoch 11/50
23/23 - 0s - loss: 297.6159
Epoch 12/50
23/23 - 0s - loss: 295.5896
Epoch 13/50
23/23 - 0s - loss: 293.0128
Epoch 14/50
23/23 - 0s - loss: 291.5000
Epoch 15/50
23/23 - 0s - loss: 289.0738
Epoch 16/50
23/23 - 0s - loss: 286.8678
Epoch 17/50
23/23 - 0s - loss: 285.0705
Epoch 18/50
23/23 - 0s - loss: 282.8091
Epoch 19/50
23/23 - 0s - loss: 281.0154
Epoch 20/50
23/23 - 0s - loss: 279.3507
Epoch 21/50
23/23 - 0s - loss: 277.4119
Epoch 22/50
23/23 - 0s - loss: 275.2126
Epoch 23/50
23/23 - 0s - loss: 273.3626
Epoch 24/50
23/23 - 0s - loss: 272.0016
Epoch 25/50
23/23 - 0s - loss: 269.8886
Epoc

<tensorflow.python.keras.callbacks.History at 0x2326f1f0ef0>

#### Evaluate the model on the test data 

In [29]:
from sklearn.metrics import mean_squared_error

In [30]:
# predict test data
y_pred = model.predict(X_test)

In [31]:
# mean squared error of the test data
mean_squared_error(y_test, y_pred)

225.58796403203937

#### create a list of 50 mean squared errors

In [32]:
means = []
for i in range(50):
    print('Step %d' % (i + 1))
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.30)
    model.fit(X_train, y_train, epochs=50, verbose=0)
    y_pred = model.predict(X_test)
    means.append(mean_squared_error(y_test, y_pred))

Step 1
Step 2
Step 3
Step 4
Step 5
Step 6
Step 7
Step 8
Step 9
Step 10
Step 11
Step 12
Step 13
Step 14
Step 15
Step 16
Step 17
Step 18
Step 19
Step 20
Step 21
Step 22
Step 23
Step 24
Step 25
Step 26
Step 27
Step 28
Step 29
Step 30
Step 31
Step 32
Step 33
Step 34
Step 35
Step 36
Step 37
Step 38
Step 39
Step 40
Step 41
Step 42
Step 43
Step 44
Step 45
Step 46
Step 47
Step 48
Step 49
Step 50


#### Report the mean and the standard deviation of the mean squared errors.

In [33]:
print('The mean of mean squared errors : %f' % np.mean(means))
print('The standard deviation of mean squared errors : %f' % np.std(means))

The mean of mean squared errors : 55.747284
The standard deviation of mean squared errors : 22.220232


#### How does the mean of the mean squared errors compare to that from Step B?

The mean of the mean squared errors from Step B is bigger than Step D. So after creating more hidden layers, the model in Step D makes less errors than in Step B.