# Get the dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [4]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [5]:
# number of predictors
n_cols = predictors.shape[1]

In [6]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [7]:
# normalize the data
# subtracting the mean from the individual predictors and dividing by the standard deviation
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


# Build neural network

Use the Keras library to build a neural network with the following:
- One hidden layer of 10 nodes, and a ReLU activation function
- Use the adam optimizer and the mean squared error  as the loss function

In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [9]:
# build a model for regression with one hidden layer of 10 nodes, and a ReLU activation function
model = keras.Sequential()
model.add(layers.Dense(10, activation='relu', input_shape=(n_cols,)))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def train_model(mse_list):
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    # 2. Train the model on the training data using 50 epochs.    
    model.fit(X_train, y_train, epochs=50, verbose=0)    
    # 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.
    loss_val  = model.evaluate(X_test, y_test)
    print("test loss, test acc:", loss_val)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("mean squared error:", mse)
    mse_list.append(mse)

In [11]:
# 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors
mse_list = []
for i in range(0, 50):
    train_model(mse_list)   

test loss, test acc: 128.58995056152344
mean squared error: 128.58996712617366
test loss, test acc: 91.87136840820312
mean squared error: 91.87137166627068
test loss, test acc: 51.6630744934082
mean squared error: 51.66307346607763
test loss, test acc: 43.67446517944336
mean squared error: 43.674467670984136
test loss, test acc: 32.52668762207031
mean squared error: 32.5266923956446
test loss, test acc: 31.935884475708008
mean squared error: 31.935883966201413
test loss, test acc: 32.8129997253418
mean squared error: 32.8130013812404
test loss, test acc: 28.480300903320312
mean squared error: 28.480297010727433
test loss, test acc: 35.807945251464844
mean squared error: 35.80794229860793
test loss, test acc: 33.71898651123047
mean squared error: 33.71898482258324
test loss, test acc: 28.466882705688477
mean squared error: 28.4668821078428
test loss, test acc: 32.855709075927734
mean squared error: 32.85570772228363
test loss, test acc: 27.624908447265625
mean squared error: 27.62490861

In [12]:
# 5. Report the mean and the standard deviation of the mean squared errors

import statistics

print("mse mean:", statistics.mean(mse_list))
print("standard deviation:", statistics.stdev(mse_list))

mse mean: 29.843857101964403
standard deviation: 17.998475856541926


In [None]:
# 6. Discussion of the average mean squared error and how it compares with part
# How does the mean of the mean squared errors compare to that from Step B ?

print("part B mse mean : 48.73")
print("Part D mse mean : 29.84")
print("Part D mse < Part B mse, prediction is more precise with 3 hidden layers")