### DATA CLEANUP

In [1]:
import pandas as pd
import numpy as np

concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [2]:
concrete_data.shape

(1030, 9)

In [3]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [4]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [5]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [6]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [7]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [8]:
n_cols = concrete_data.shape[1]
n_cols

9

# A: BUILD A BASELINE MODEL

In [9]:
# Use the Keras library to build a neural network 
import keras

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [21]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [22]:

# define regression model
def regression_model():
    # create model
    model = Sequential()
    n_cols = concrete_data.shape[1]
    
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### RANDOM SPLIT OF DATA INTO TRAINING AND TEST SETS BY 30%

In [27]:

from sklearn.model_selection import train_test_split

In [28]:
X_train, X_test, y_train, y_test = train_test_split(concrete_data, target, test_size=0.3, random_state=42)

### TRAIN THE MODEL ON TRAINING DATA USING 50 EPOCHS

In [29]:
# build the model
model = regression_model()

In [30]:
#fit the model
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
 - 1s - loss: 220727.5644
Epoch 2/50
 - 0s - loss: 73898.1839
Epoch 3/50
 - 0s - loss: 12754.2675
Epoch 4/50
 - 0s - loss: 2962.6513
Epoch 5/50
 - 0s - loss: 2515.1227
Epoch 6/50
 - 0s - loss: 2248.6071
Epoch 7/50
 - 0s - loss: 2030.3953
Epoch 8/50
 - 0s - loss: 1821.3108
Epoch 9/50
 - 0s - loss: 1619.5283
Epoch 10/50
 - 0s - loss: 1435.9207
Epoch 11/50
 - 0s - loss: 1267.5799
Epoch 12/50
 - 0s - loss: 1107.9172
Epoch 13/50
 - 0s - loss: 964.0100
Epoch 14/50
 - 0s - loss: 839.4609
Epoch 15/50
 - 0s - loss: 728.6572
Epoch 16/50
 - 0s - loss: 636.5923
Epoch 17/50
 - 0s - loss: 548.1392
Epoch 18/50
 - 0s - loss: 478.3029
Epoch 19/50
 - 0s - loss: 414.6735
Epoch 20/50
 - 0s - loss: 362.8292
Epoch 21/50
 - 0s - loss: 319.4703
Epoch 22/50
 - 0s - loss: 282.1376
Epoch 23/50
 - 0s - loss: 251.0116
Epoch 24/50
 - 0s - loss: 225.5363
Epoch 25/50
 - 0s - loss: 204.7607
Epoch 26/50
 - 0s - loss: 186.7287
Epoch 27/50
 - 0s - loss: 172.5956
Epoch 28/50
 - 0s - loss: 160.5716
Epoch 29/50
 

<keras.callbacks.History at 0x7ff762b25198>

### EVALUATE THE MODEL ON THE TEST DATA AND COMPUTE MEAN SQUARED ERROR

In [31]:

Test_data = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
Test_data



85.00825927635613

In [32]:
# The mean squared error between the predicted concrete strength and the actual concrete strength
from sklearn.metrics import mean_squared_error



In [33]:
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)

print("Mean squared error: %.2f"
      % mean_squared_error(y_test, y_pred))

Mean squared error: 85.01


### LIST OF 50 MEAN SQUARED ERRORS


In [45]:
List_Of_Mean_Squared_Errors = 50
mean_squared_errors = []

for i in range(0, List_Of_Mean_Squared_Errors):
    X_train, X_test, y_train, y_test = train_test_split(concrete_data, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    MEAN_SQUARED_ERRORS = model.evaluate(X_test, y_test, verbose=0)
    print("MEAN_SQUARED_ERRORS "+str(i+1)+": "+str(MEAN_SQUARED_ERRORS))
    y_pred = model.predict(X_test)   
    
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)



MEAN_SQUARED_ERRORS 1: 0.007491324181309796
MEAN_SQUARED_ERRORS 2: 0.0007712879513155271
MEAN_SQUARED_ERRORS 3: 0.0013943183892120195
MEAN_SQUARED_ERRORS 4: 0.08430251587652465
MEAN_SQUARED_ERRORS 5: 0.0003196814886846619
MEAN_SQUARED_ERRORS 6: 0.00023431264569125407
MEAN_SQUARED_ERRORS 7: 0.006589391428531181
MEAN_SQUARED_ERRORS 8: 0.011506095383931133
MEAN_SQUARED_ERRORS 9: 0.020521487507209616
MEAN_SQUARED_ERRORS 10: 0.0016780031014619618
MEAN_SQUARED_ERRORS 11: 0.5721816943301352
MEAN_SQUARED_ERRORS 12: 0.007114538662195881
MEAN_SQUARED_ERRORS 13: 0.5247067236205907
MEAN_SQUARED_ERRORS 14: 1.1831173873642116
MEAN_SQUARED_ERRORS 15: 0.0028636633655098553
MEAN_SQUARED_ERRORS 16: 0.0001507126005472358
MEAN_SQUARED_ERRORS 17: 0.0005581196415250138
MEAN_SQUARED_ERRORS 18: 6.780916036519751e-05
MEAN_SQUARED_ERRORS 19: 0.0009066792514663442
MEAN_SQUARED_ERRORS 20: 0.03666613967094606
MEAN_SQUARED_ERRORS 21: 0.0077797538468246516
MEAN_SQUARED_ERRORS 22: 0.016805261873842056
MEAN_SQUARED_ER

### THE MEAN AND THE STANDARD DEVIATION OF THE MEAN SQUARED ERRORS

In [47]:
print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

Mean: 0.07917338878113113
Standard Deviation: 0.21717707945468856


# DISCUSSION :

Depending on my computer’s power and your patience to wait for the Mean Squared Errors to load up,
I found out that for Step A, the results were somewhat in the zero range. 
We will apply the normalization of data, increasing the epochs and adding hidden layers to see if our model could yield a more accurate report.