In [1]:
# import required packages
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from statistics import mean, stdev

Using TensorFlow backend.


In [2]:
# download dataset
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')

# break the dataset into input vector x and target vector y
x = concrete_data[concrete_data.columns[concrete_data.columns != 'Strength']] # all columns except Strength
y = concrete_data['Strength'] # Strength column

# normalize the data so that it can be optimized faster
X = (x - x.mean()) / x.std()
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [3]:
# input shape
num_cols = X.shape[1] # number of predictors
print(num_cols)

8


In [4]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(num_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [5]:
test_loss = []   # list for storing mean squared errors
iter = 50        # number of iterations
for i in range(iter):
    # split data into train and test set
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # build the model
    model = regression_model()

    # fit the model
    model.fit(x_train, y_train, epochs=50, verbose=0)

    # evaluate 
    y_pred = model.predict(x_test)
    test_loss.append(mean_squared_error(y_test, y_pred))

print(test_loss)

[378.9899563281826, 602.8045655675558, 326.38490892481593, 296.67215753686196, 223.97233329833912, 260.55602760590546, 364.1222318074611, 400.514848346083, 322.11596322274585, 379.4479276183143, 477.71242757729675, 307.7305987681382, 436.9469572639079, 265.7847184120053, 357.3498735163634, 279.4975182415364, 791.961025851801, 329.70525975294095, 392.7176159963523, 357.0113427217229, 389.9268703260071, 365.2420841337461, 204.95716561937942, 401.36329856771937, 344.40698469253977, 531.9402412951123, 529.8661028929725, 350.23533053427155, 301.39997245687437, 340.5128273570166, 289.8221330649697, 295.4672652155815, 229.59903128869166, 389.6627418466915, 437.5646258047289, 367.73924305981376, 377.80462524644975, 253.93897236092172, 333.5440252361794, 386.2775816632753, 300.699356173907, 313.36488661226633, 940.327715310097, 326.61525195953845, 588.102436955757, 217.90688471233634, 593.0001116180143, 249.97552049083063, 594.6423068833019, 288.08589631430004]


In [6]:
# normalize test_loss
mu = mean(test_loss)
sigma = stdev(test_loss)

# mean and the standard deviation of the mean squared errors
print("mean: {} and std_dev: {}".format(mu, sigma))

mean: 381.71979496103245 and std_dev: 141.1954818866688
