# Get the dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [4]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [5]:
# number of predictors
n_cols = predictors.shape[1]

In [6]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# Build neural network

Use the Keras library to build a neural network with the following:
- One hidden layer of 10 nodes, and a ReLU activation function
- Use the adam optimizer and the mean squared error  as the loss function

In [7]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [10]:
# build a model for regression with one hidden layer of 10 nodes, and a ReLU activation function
model = keras.Sequential()
model.add(layers.Dense(10, activation='relu', input_shape=(n_cols,)))
model.add(layers.Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def train_model(mse_list):
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3)
    # 2. Train the model on the training data using 50 epochs.    
    model.fit(X_train, y_train, epochs=50, verbose=0)    
    # 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.
    loss_val  = model.evaluate(X_test, y_test)
    print("test loss, test acc:", loss_val)
    y_pred = model.predict(X_test)

    print("y_test", y_test.shape)
    print("y_pred", y_pred.shape)
    mse = mean_squared_error(y_test, y_pred)
    print("mean squared error:", mse)
    mse_list.append(mse)

In [12]:
# 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors
mse_list = []
for i in range(0, 50):
    train_model(mse_list)   

test loss, test acc: 1561.082763671875
y_test (309,)
y_pred (309, 6)


ValueError: y_true and y_pred have different number of output (1!=6)

In [None]:
# 5. Report the mean and the standard deviation of the mean squared errors

import statistics

print("mse mean:", statistics.mean(mse_list))
print("standard deviation:", statistics.stdev(mse_list))