### DATA CLEANUP

In [1]:
import pandas as pd
import numpy as np

concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [2]:
concrete_data.shape

(1030, 9)

In [3]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [4]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [5]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [6]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [7]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

### NORMALIZE THE DATA

In [8]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [9]:
n_cols = predictors_norm.shape[1] # number of predictors
n_cols

8

In [10]:
# Use the Keras library to build a neural network 
import keras

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# D: INCREASE NUMBER OF HIDDEN LAYERS TO 3 

In [11]:

from keras.models import Sequential
from keras.layers import Dense

In [12]:

# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### RANDOM SPLIT OF DATA INTO TRAINING AND TEST SETS BY 30%

In [13]:

from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)

### TRAIN THE MODEL ON TRAINING DATA USING 50 EPOCHS

In [15]:

# build the model
model = regression_model()

In [16]:
#fit the model
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
 - 2s - loss: 1548.1494
Epoch 2/50
 - 1s - loss: 1519.2200
Epoch 3/50
 - 0s - loss: 1477.5119
Epoch 4/50
 - 1s - loss: 1413.9405
Epoch 5/50
 - 1s - loss: 1315.1343
Epoch 6/50
 - 1s - loss: 1173.7059
Epoch 7/50
 - 0s - loss: 985.1878
Epoch 8/50
 - 1s - loss: 767.4347
Epoch 9/50
 - 1s - loss: 547.2492
Epoch 10/50
 - 1s - loss: 385.2817
Epoch 11/50
 - 1s - loss: 300.2957
Epoch 12/50
 - 1s - loss: 267.5331
Epoch 13/50
 - 1s - loss: 248.9070
Epoch 14/50
 - 1s - loss: 233.3810
Epoch 15/50
 - 1s - loss: 222.0073
Epoch 16/50
 - 1s - loss: 212.4388
Epoch 17/50
 - 1s - loss: 203.9959
Epoch 18/50
 - 1s - loss: 196.5661
Epoch 19/50
 - 1s - loss: 191.0096
Epoch 20/50
 - 1s - loss: 185.0127
Epoch 21/50
 - 1s - loss: 180.6004
Epoch 22/50
 - 1s - loss: 175.9038
Epoch 23/50
 - 0s - loss: 171.6534
Epoch 24/50
 - 1s - loss: 167.6690
Epoch 25/50
 - 0s - loss: 164.5517
Epoch 26/50
 - 0s - loss: 160.8060
Epoch 27/50
 - 1s - loss: 157.4614
Epoch 28/50
 - 1s - loss: 154.5420
Epoch 29/50
 - 1s - los

<keras.callbacks.History at 0x7f152bb55ef0>

### EVALUATE THE MODEL ON THE TEST DATA AND COMPUTE MEAN SQUARED ERROR

In [17]:

Test_data = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
Test_data



93.57078710265915

In [18]:
# The mean squared error between the predicted concrete strength and the actual concrete strength
from sklearn.metrics import mean_squared_error



In [19]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)

print("Mean squared error: %.2f"
      % mean_squared_error(y_test, y_pred))

Mean squared error: 93.57


### LIST OF 50 MEAN SQUARED ERRORS


In [20]:
List_Of_Mean_Squared_Errors = 50
mean_squared_errors = []

for i in range(0, List_Of_Mean_Squared_Errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    MEAN_SQUARED_ERRORS = model.evaluate(X_test, y_test, verbose=0)
    print("MEAN_SQUARED_ERRORS "+str(i+1)+": "+str(MEAN_SQUARED_ERRORS))
    y_pred = model.predict(X_test)   
    
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)



MEAN_SQUARED_ERRORS 1: 51.036090097766866
MEAN_SQUARED_ERRORS 2: 48.311753652628184
MEAN_SQUARED_ERRORS 3: 35.416093023849534
MEAN_SQUARED_ERRORS 4: 37.83434599965907
MEAN_SQUARED_ERRORS 5: 39.12779137077455
MEAN_SQUARED_ERRORS 6: 38.14996458874551
MEAN_SQUARED_ERRORS 7: 41.12288157145182
MEAN_SQUARED_ERRORS 8: 29.469890014253387
MEAN_SQUARED_ERRORS 9: 31.511053375441666
MEAN_SQUARED_ERRORS 10: 31.57183418767738
MEAN_SQUARED_ERRORS 11: 33.95893005568619
MEAN_SQUARED_ERRORS 12: 25.700832663230525
MEAN_SQUARED_ERRORS 13: 33.35153685881482
MEAN_SQUARED_ERRORS 14: 34.555129165402505
MEAN_SQUARED_ERRORS 15: 32.344432590077226
MEAN_SQUARED_ERRORS 16: 22.611548186120093
MEAN_SQUARED_ERRORS 17: 25.288324837545748
MEAN_SQUARED_ERRORS 18: 26.51230202523636
MEAN_SQUARED_ERRORS 19: 24.76602220380962
MEAN_SQUARED_ERRORS 20: 31.287279931472728
MEAN_SQUARED_ERRORS 21: 23.555413477629134
MEAN_SQUARED_ERRORS 22: 26.741224504983155
MEAN_SQUARED_ERRORS 23: 20.768498812678562
MEAN_SQUARED_ERRORS 24: 24.94

### THE MEAN AND THE STANDARD DEVIATION OF THE MEAN SQUARED ERRORS

In [21]:
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

Mean: 27.683891501994204
Standard Deviation: 7.080206507940839


# DISCUSSION

How does the mean of the mean squared errors compare to that from Step B?
For this final step, we have gone through data normalization, increasing epochs and increased hidden layers. Compared to Step B, the Mean has significantly decreases. Indicating the model’s accuracy was improving.
