### Regression Model with keras

In [1]:
# importing numpy and pandas
import pandas as pd
import numpy as np

Let's download and read the data into a pandas dataframe.

In [3]:
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')
concrete_data.head(8)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3
5,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90,47.03
6,380.0,95.0,0.0,228.0,0.0,932.0,594.0,365,43.7
7,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28,36.45


The above dataset is about the compressive strength of different samples of concrete based on the volumes of the different materials that were used to make them

In [5]:
# Check on shape of the data
concrete_data.shape

(1030, 9)

In [6]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


<br></br>
#### Looking out for null values

In [7]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

#### The data is clean

## Dividing data into predictors and targets

In [11]:
concrete_data_columns = concrete_data.columns
display(concrete_data_columns)

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

In [12]:
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

In [14]:
predictors.tail()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28
1029,260.9,100.5,78.3,200.6,8.6,864.5,761.5,28


In [15]:
target.tail()

1025    44.28
1026    31.18
1027    23.70
1028    32.77
1029    32.40
Name: Strength, dtype: float64

## Normalizing the data by subtracting the mean and dividing by the standard deviation

In [16]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head(8)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
5,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,0.701883
6,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
7,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597


### Import keras

In [17]:
import keras

Using TensorFlow backend.


In [18]:
# importing more packages from keras library
from keras.models import Sequential
from keras.layers import Dense

### Building a neural network

In [22]:
model = Sequential()
model.add(Dense(50, activation='relu', input_shape=(predictors_norm.shape[1], )))
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 50)                450       
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 51        
Total params: 3,051
Trainable params: 3,051
Non-trainable params: 0
_________________________________________________________________


In [24]:
## fitting the model
model.fit(predictors_norm, target, validation_split=0.3, epochs=100, verbose=2)

Train on 721 samples, validate on 309 samples
Epoch 1/100
 - 0s - loss: 1492.4271 - val_loss: 991.9525
Epoch 2/100
 - 0s - loss: 1258.6212 - val_loss: 785.5891
Epoch 3/100
 - 0s - loss: 924.7010 - val_loss: 540.1082
Epoch 4/100
 - 0s - loss: 561.9987 - val_loss: 321.1367
Epoch 5/100
 - 0s - loss: 312.8468 - val_loss: 209.2205
Epoch 6/100
 - 0s - loss: 233.9199 - val_loss: 178.4418
Epoch 7/100
 - 0s - loss: 215.9875 - val_loss: 170.2796
Epoch 8/100
 - 0s - loss: 205.5690 - val_loss: 164.9094
Epoch 9/100
 - 0s - loss: 196.3473 - val_loss: 164.7826
Epoch 10/100
 - 0s - loss: 189.5524 - val_loss: 161.2928
Epoch 11/100
 - 0s - loss: 183.2977 - val_loss: 160.3400
Epoch 12/100
 - 0s - loss: 177.7111 - val_loss: 159.1137
Epoch 13/100
 - 0s - loss: 173.4108 - val_loss: 158.8340
Epoch 14/100
 - 0s - loss: 169.2559 - val_loss: 157.6215
Epoch 15/100
 - 0s - loss: 165.1248 - val_loss: 157.1987
Epoch 16/100
 - 0s - loss: 161.4776 - val_loss: 155.3280
Epoch 17/100
 - 0s - loss: 158.2193 - val_loss: 1

<keras.callbacks.History at 0x7f2d9736bcc0>