# Start import data

In [1]:
import pandas as pd
import numpy as np

concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

# Build a baseline model
Use the Keras library to build a neural network with the following:

- Three hidden layers, each of 10 nodes and ReLU activation function.

- Use the **adam** optimizer and the **mean squared erro**r  as the loss function.

In [2]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [3]:
def regression_model():
    # Three hidden layers, each of 10 nodes and ReLU activation function.

    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_pred_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


1. Randomly split the data into a training and test sets by holding **30%** of the data for testing. You can use the **train_test_splithelper function from Scikit-learn**.

In [4]:
from sklearn.model_selection import train_test_split

# normalized version of the data

In [5]:
# we need to use Strength column as our target to predict
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] 
target = concrete_data['Strength'] 

predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

n_pred_cols = predictors.shape[1] # number of predictors
n_pred_cols

# split predictors data into training data and testing data (predictors is the column without Strength)
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=40)

2. Train the model on the training data using **50 epochs**.

In [6]:
model = regression_model()
epochs = 50
model.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f173783b898>

In [7]:
loss = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss



102.7367172241211

3. Evaluate the model on the test data and compute the **mean squared error** between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

In [8]:
from sklearn.metrics import mean_squared_error

mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

102.7367147056353 0.0


4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [9]:
repeat = 50
epochs = 50
mean_squared_errors = []
for i in range(0, repeat):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=epochs, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print("mean_squared_errors "+str(i+1)+": "+str(MSE))
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)

print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))
print('\n')

mean_squared_errors 1: 71.21405029296875
mean_squared_errors 2: 62.59098815917969
mean_squared_errors 3: 47.30638122558594
mean_squared_errors 4: 47.802913665771484
mean_squared_errors 5: 39.1929817199707
mean_squared_errors 6: 42.958412170410156
mean_squared_errors 7: 45.13362121582031
mean_squared_errors 8: 39.61233139038086
mean_squared_errors 9: 34.77840042114258
mean_squared_errors 10: 38.44464111328125
mean_squared_errors 11: 34.7132682800293
mean_squared_errors 12: 37.272342681884766
mean_squared_errors 13: 37.24258041381836
mean_squared_errors 14: 43.52910232543945
mean_squared_errors 15: 35.32040023803711
mean_squared_errors 16: 40.6007194519043
mean_squared_errors 17: 37.73777770996094
mean_squared_errors 18: 32.95217514038086
mean_squared_errors 19: 29.998695373535156
mean_squared_errors 20: 32.72590637207031
mean_squared_errors 21: 32.868446350097656
mean_squared_errors 22: 34.2216682434082
mean_squared_errors 23: 25.301366806030273
mean_squared_errors 24: 29.1331729888916
