# Concrete Strength Prediction
Simple implementation of neural networks to predict the strength of concrete.
*Developed by Swattik Kundu as the final assignment project for the IBM Introduction to Neural Networks Course on Coursera

Import the pandas and numpy libraries

In [17]:
import numpy as np
import pandas as pd

Load the dataset into a pandas dataframe and inspect the data

In [18]:
concrete_data = pd.read_csv('C:\STUFF\Code\Machine Learning\IBM\concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [19]:
concrete_data.shape

(1030, 9)

In [20]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


To check for missing values

In [21]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

Taking the features for predictors and target

In [22]:
predictors = concrete_data.drop('Strength', axis=1)
target = concrete_data['Strength']

In [23]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [24]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

Normalizing the Data

In [25]:
predictors = (predictors - predictors.mean()) / predictors.std()
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [26]:
n_cols = predictors.shape[1]

Importing the keras library and required models. Then to create the function for the model

In [27]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Helper function of neural networks with 3 hidden layers

In [28]:
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

Training the model with the given specifics 50 times and creating a MSE list of 50 values.

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
scores = []
for i in range(0,50):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model = regression_model()
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)
    pred = model.predict(X_test)
    scores.append(mean_squared_error(y_test, pred))


Epoch 1/50
16/16 - 0s - loss: 1537.9413 - val_loss: 1605.2795 - 450ms/epoch - 28ms/step
Epoch 2/50
16/16 - 0s - loss: 1510.7313 - val_loss: 1575.5664 - 43ms/epoch - 3ms/step
Epoch 3/50
16/16 - 0s - loss: 1479.7662 - val_loss: 1539.3430 - 52ms/epoch - 3ms/step
Epoch 4/50
16/16 - 0s - loss: 1441.1691 - val_loss: 1495.9077 - 36ms/epoch - 2ms/step
Epoch 5/50
16/16 - 0s - loss: 1393.0894 - val_loss: 1436.5328 - 43ms/epoch - 3ms/step
Epoch 6/50
16/16 - 0s - loss: 1325.6086 - val_loss: 1356.2002 - 34ms/epoch - 2ms/step
Epoch 7/50
16/16 - 0s - loss: 1237.0223 - val_loss: 1247.6655 - 36ms/epoch - 2ms/step
Epoch 8/50
16/16 - 0s - loss: 1120.9141 - val_loss: 1114.2523 - 42ms/epoch - 3ms/step
Epoch 9/50
16/16 - 0s - loss: 984.1619 - val_loss: 957.5436 - 68ms/epoch - 4ms/step
Epoch 10/50
16/16 - 0s - loss: 825.5741 - val_loss: 788.8058 - 56ms/epoch - 3ms/step
Epoch 11/50
16/16 - 0s - loss: 664.1161 - val_loss: 613.1553 - 49ms/epoch - 3ms/step
Epoch 12/50
16/16 - 0s - loss: 507.1037 - val_loss: 460.

In [30]:
scores

[125.54276363689982,
 172.4258407349501,
 154.09374323290035,
 158.66349977596008,
 163.9569713544997,
 140.33187158016963,
 181.86007849363722,
 149.56934077816564,
 152.5057345510821,
 135.78732076582904,
 136.65932371489612,
 144.21517926701438,
 167.68397971170444,
 183.4186285100223,
 140.23600753508475,
 157.1961906382478,
 137.5498148825271,
 136.76016885083504,
 130.21573701584498,
 160.94565797293393,
 137.9775180137798,
 116.75092424271782,
 156.61309499108364,
 140.98279863621724,
 140.39407845077054,
 137.3182319273427,
 138.37258421844925,
 155.8029946311804,
 143.7202019673379,
 129.72224452121483,
 162.93335242121898,
 152.39340537152657,
 143.07827968501968,
 132.78999945460242,
 135.3678138282199,
 145.88609527013992,
 146.2439029142809,
 167.4430947235472,
 165.51088199775793,
 179.6466704576565,
 139.00867799932306,
 139.33669866455264,
 153.26632498067792,
 129.54930082500957,
 154.12786207377462,
 164.78389968779553,
 135.91068170827043,
 161.7535677702772,
 133.81

Mean of MSE scores

In [31]:
np.mean(scores)

148.79585653830824

Standard Deviation of MSE scores

In [32]:
np.std(scores)

15.218414053274012