# Assignment: Build A Regression Model in Keras

In [1]:
import pandas as pd
import numpy as np
import keras

Using TensorFlow backend.


### Load the dataset

In [2]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
#An overview of data
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


### Data Check

In [4]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

The dataset is clean of null values.

### Split data into predictors and target

In [5]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [6]:
#Save the number of predictors
n_cols = predictors.shape[1]

In [7]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


## Part A: Build a baseline model: aregression-based neural network

In [8]:
from keras.models import Sequential
from keras.layers import Dense

In [9]:
# Define the regression model
def regression_model_A():
    # create a model with one hidden layer of 10 nodes, and a ReLU activation function
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model: use the adam optimizer and the mean squared error as the loss function
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [10]:
# Build the model
modelA = regression_model_A()

Instructions for updating:
Colocations handled automatically by placer.


In [18]:
# Step1 -- Randomly split the data into a training and test sets by holding 30% of the data for testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3)

In [19]:
# Step2 -- Fit the model: train the model for 50 epochs
modelA.fit(X_train, y_train, epochs=50, verbose=0)

<keras.callbacks.History at 0x7f187b1722e8>

In [20]:
# Step3 -- Evaluate the model
predA = modelA.predict(X_test)

from sklearn.metrics import mean_squared_error
print("MSE: %.4f" % mean_squared_error(y_test, predA))

MSE: 45.8883


In [14]:
# Define a function for iterations
def findMSE(model, X, y, epo):
    # Repeat steps 1~3 50 times and return a list of 50 MSEs.
    # The parameter epo represents the number of epochs, and it is as an input for later uses in Part C

    MSE = np.zeros(50)
    for i in range(0,50):
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
        model.fit(X_train, y_train, epochs=epo, verbose=0)
        pred = model.predict(X_test)
        MSE[i] = mean_squared_error(y_test, pred)
    
    return MSE

In [16]:
# The list of 50 MSEs
MSEs_A = findMSE(modelA, predictors, target, 50)
MSEs_A

array([54.20270122, 44.12202838, 50.73532788, 47.12873419, 51.41697453,
       60.1215717 , 44.97313138, 47.03752168, 52.22529828, 46.87911126,
       44.86061902, 53.82262303, 52.57363236, 47.33023959, 42.5715369 ,
       45.68825197, 48.03200921, 44.41625912, 47.86674743, 43.43208535,
       62.38883944, 44.1918944 , 43.91723629, 54.23934657, 47.73414638,
       48.16857438, 46.60882094, 51.2955984 , 57.83787663, 69.8498359 ,
       44.3381365 , 49.07732938, 44.79610017, 46.33486463, 51.14380753,
       59.69811833, 48.80550841, 47.49938101, 42.1611038 , 57.59096684,
       43.65890179, 45.94470148, 51.11675451, 50.77755863, 49.11362152,
       47.55048205, 48.84108546, 52.30225544, 50.60371019, 40.75142726])

In [17]:
print("Mean of MSEs: %.4f" % np.mean(MSEs_A))
print("Standard deviation of MSEs: %.4f" % np.std(MSEs_A))

Mean of MSEs: 49.3555
Standard deviation of MSEs: 5.6167


## Part B: Normalize the data

In [21]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [22]:
# The model is the same as in PartA 
# Report Mean of MSEs
print("Mean of MSEs: %.4f" % np.mean(findMSE(modelA, predictors_norm, target, 50)))

Mean of MSEs: 55.2857


Discussion: How does the mean of the MSEs compare to that from Part A?

Answer: The mean of MSEs goes up after the normalization so is larger than that from Part A.  

## Part C: Increate the number of epochs

In [23]:
# Repeat Part B but use 100 epochs this time for training.
# The model is the same as in PartA, but epo=100

# Report Mean of MSEs
print("Mean of MSEs: %.4f" % np.mean(findMSE(modelA, predictors_norm, target, 100)))

Mean of MSEs: 28.9480


Discussion: How does the mean of the MSEs compare to that from Part B?

Answer: The mean of MSEs after increasing the number of epochs gets much smaller compare to that from Part B.  

## Part D: Increase the number of hidden layers

In [24]:
# Create a new regression model
def regression_model_D():
    # create model with 3 hidden layers, each of 10 nodes and ReLU activation function
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [26]:
# Build the model
modelD = regression_model_D()

# Repeat Part B but with the new model
# Report Mean of MSEs
print("Mean of MSEs: %.4f" % np.mean(findMSE(modelD, predictors_norm, target, 50)))

Mean of MSEs: 40.5056


Discussion: How does the mean of the MSEs compare to that from Part B?

Answer: The mean of MSEs after increasing the number of hidden layers becomes smaller than that from Part B.  