In [1]:
# import required packages
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from statistics import mean, stdev

Using TensorFlow backend.


In [2]:
# download dataset
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')

# break the dataset into input vector x and target vector y
x = concrete_data[concrete_data.columns[concrete_data.columns != 'Strength']] # all columns except Strength
y = concrete_data['Strength'] # Strength column

# normalize the data so that it can be optimized faster
X = (x - x.mean()) / x.std()
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [3]:
# input shape
num_cols = X.shape[1] # number of predictors
print(num_cols)

8


In [4]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(num_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [5]:
test_loss = []   # list for storing mean squared errors
iter = 50        # number of iterations
for i in range(iter):
    # split data into train and test set
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # build the model
    model = regression_model()

    # fit the model
    model.fit(x_train, y_train, epochs=100, verbose=0)

    # evaluate 
    y_pred = model.predict(x_test)
    test_loss.append(mean_squared_error(y_test, y_pred))

print(test_loss)

[162.16936032891493, 186.11445789951767, 175.55772843395144, 166.34753163098665, 174.9256351419736, 176.08369814371383, 158.8119027525855, 185.98623849957232, 154.40432139480245, 145.72160954381584, 172.60772117745444, 155.81426065920448, 190.46882812488954, 163.94090688128728, 163.13533933207984, 178.4156614710049, 171.59904466523028, 152.78301802976435, 159.6289547957221, 150.27301697379968, 151.02927743074434, 136.84473983898195, 142.79196755377427, 172.45279333417616, 173.54718393780072, 158.83763075074967, 133.78313410802917, 205.47260369945914, 150.5800128626281, 193.64918452271382, 154.72711494563657, 172.86525254455753, 161.6700082769854, 168.3636989091346, 157.11709976561605, 173.19323663017076, 152.16437652833457, 166.62130075914257, 194.12681207268008, 158.19582613701252, 157.89840026766586, 175.77458521508726, 155.38273418770385, 178.73641389431424, 200.78536893799034, 171.42281468163284, 174.22074426441964, 167.21202428415023, 142.6212443005568, 179.81411839266]


In [6]:
# normalize test_loss
mu = mean(test_loss)
sigma = stdev(test_loss)

# mean and the standard deviation of the mean squared errors
print("mean: {} and std_dev: {}".format(mu, sigma))

mean: 166.5338187782956 and std_dev: 15.841868282210221
