In [1]:
# import required packages
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from statistics import mean, stdev

Using TensorFlow backend.


In [2]:
# download dataset
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')

# break the dataset into input vector x and target vector y
x = concrete_data[concrete_data.columns[concrete_data.columns != 'Strength']] # all columns except Strength
y = concrete_data['Strength'] # Strength column
x.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [3]:
# input shape
num_cols = x.shape[1] # number of predictors
print(num_cols)

8


In [4]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(num_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [5]:
test_loss = []   # list for storing mean squared errors
iter = 50        # number of iterations
for i in range(iter):
    # split data into train and test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
    x_train.head()

    # build the model
    model = regression_model()

    # fit the model
    model.fit(x_train, y_train, epochs=50, verbose=0)

    # evaluate 
    y_pred = model.predict(x_test)
    test_loss.append(mean_squared_error(y_test, y_pred))

print(test_loss)

[161.20908691093186, 89.80964638996088, 260.58840105021096, 112.66891662520479, 132.18617013652212, 291.00510347790606, 164.2497470636703, 114.68337225809128, 189.79653795291543, 178.35161283135136, 187.14637019810198, 116.1525320672427, 585.746116892622, 467.743508407462, 255.2311428090262, 115.28527458608639, 302.2010328662479, 173.9693017183023, 156.1697414820813, 167.12481289509927, 112.90509251249256, 156.66142416780988, 968.0049370464335, 129.6175932555826, 361.20503535725976, 202.12268374664396, 185.12516143566992, 321.5253114807992, 86.95366891578624, 117.42662750936658, 483.0070105838583, 589.500168365218, 251.96655867848708, 647.1136271808032, 149.3634845393482, 113.3984680612001, 111.48548380407225, 191.02958511045594, 130.94797700771875, 250.37056061967402, 104.88984351253184, 555.4879558779087, 101.05385431008914, 2315.6873168048346, 214.14921395022242, 109.67858718675824, 1360.2206813208036, 339.77446225249906, 1042.1914264210304, 180.19500596303132]


In [6]:
# normalize test_loss
mu = mean(test_loss)
sigma = stdev(test_loss)

# mean and the standard deviation of the mean squared errors
print("mean: {} and std_dev: {}".format(mu, sigma))

mean: 322.0875447119485 and std_dev: 388.1740029638677
