In [1]:
# import required packages
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from statistics import mean, stdev

Using TensorFlow backend.


In [2]:
# download dataset
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')

# break the dataset into input vector x and target vector y
x = concrete_data[concrete_data.columns[concrete_data.columns != 'Strength']] # all columns except Strength
y = concrete_data['Strength'] # Strength column

# normalize the data so that it can be optimized faster
X = (x - x.mean()) / x.std()
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [3]:
# input shape
num_cols = X.shape[1] # number of predictors
print(num_cols)

8


In [4]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(num_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [5]:
test_loss = []   # list for storing mean squared errors
iter = 50        # number of iterations
for i in range(iter):
    # split data into train and test set
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # build the model
    model = regression_model()

    # fit the model
    model.fit(x_train, y_train, epochs=50, verbose=0)

    # evaluate 
    y_pred = model.predict(x_test)
    test_loss.append(mean_squared_error(y_test, y_pred))

print(test_loss)

[120.65123542220839, 129.60231555945163, 134.88842360874924, 155.52119280575013, 120.34984503903172, 116.92534743433411, 114.47871900472207, 109.94261703533499, 97.37082559633697, 119.41236136548011, 122.57107399509643, 132.66412709988896, 115.62139745987095, 113.24249576570999, 129.21392335960292, 123.75339156211062, 124.3868495476732, 139.38168022073776, 136.30272286640817, 94.3719804850537, 133.26430890296612, 122.89778553153522, 134.36645700662265, 128.24992034890855, 128.68147186892693, 109.65070023029895, 140.2170729446935, 105.75667682783373, 149.4886992559989, 83.48498925666443, 122.49770844275221, 111.19285797456334, 125.9140744352038, 109.51861299582052, 112.30014807149246, 141.03221350345837, 129.38016595657606, 123.08354035212403, 139.6132270857133, 128.7735319372803, 123.5230726431385, 136.64400187147973, 135.43521838863217, 111.61484774602056, 147.5667894492743, 84.17342215049482, 142.66939155250122, 147.5311799003528, 110.16469984543687, 155.86564248517408]


In [6]:
# normalize test_loss
mu = mean(test_loss)
sigma = stdev(test_loss)

# mean and the standard deviation of the mean squared errors
print("mean: {} and std_dev: {}".format(mu, sigma))

mean: 124.50409908390981 and std_dev: 16.228760359720834
