# Peer-graded Assignment: Build a Regression Model in Keras

## A. Build a baseline model

In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

In [2]:
# import data
df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [10]:
predictors = df[df.columns[df.columns != 'Strength']]
target = df['Strength']

In [12]:
predictors.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270


In [14]:
target[:5]

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [24]:
n_cols = predictors.shape[1]

In [18]:
# define regression model with number of hidden layers as argument 
def regression_model(layers=1):
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    for i in range(0, layers):
        model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [30]:
model_a = regression_model()

In [28]:
# import skit-learn model selection and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [38]:
# define a fit model that accepts model, predictors, target and epochs as argument
def fit_model(model, train, test, epochs=50):
    mean_sq_err = list()
    
    for i in range(0, 50):
        # fit the model with one hidden layer
        X_train, X_test, y_train, y_test = train_test_split(train, test, test_size=0.3, random_state=42)
        model.fit(X_train, y_train, epochs=50, verbose=0)
        
        y_result = model.predict(X_test)

        mean_sq_err_current = mean_squared_error(y_test, y_result)

        mean_sq_err.append(mean_sq_err_current)
    
    # use numpy to get mean and standard deviation of the mean squared errors
    mean_sq_err_np = np.asarray(mean_sq_err)
    mean = np.mean(mean_sq_err_np)
    std_dev = np.std(mean_sq_err_np)
    
    return mean, std_dev

In [39]:
mean_a, std_dev_a = fit_model(model_a, predictors, target)
print(f'Mean of the mean squared errors: {mean_a}')
print(f'Standard deviation of the mean squared errors: {std_dev_a}')

Mean of the mean squared errors: 47.303614409435475
Standard deviation of the mean squared errors: 5.646295940543211


## B. Normalize the data

In [40]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


Now, let's redo step A with the normalized version of the training data

In [41]:
mean_a, std_dev_a = fit_model(model_a, predictors_norm, target)
print(f'Mean of the mean squared errors: {mean_a}')
print(f'Standard deviation of the mean squared errors: {std_dev_a}')

Mean of the mean squared errors: 35.7153450247004
Standard deviation of the mean squared errors: 10.200661089478105


__How does the mean of the mean squared errors compare to that from Step A?__

The mean of the mean squared errors has decreased from 47.3 to 35.7 while the standard deviation has increased from 5.65 to 10.2

## C. Increate the number of epochs

In [43]:
mean_c, std_dev_c = fit_model(model_a, predictors_norm, target, epochs=100)
print(f'Mean of the mean squared errors: {mean_c}')
print(f'Standard deviation of the mean squared errors: {std_dev_c}')

Mean of the mean squared errors: 31.789301316732256
Standard deviation of the mean squared errors: 0.2288649932530328


__How does the mean of the mean squared errors compare to that from Step B?__

Both the mean and the standard deviation of the mean squared errors have decreased from 35.7 to 31.79 and from 10.2 to 0.23 respectively.

## D. Increase the number of hidden layers

In [44]:
# a new model with 3 hidden layers
model_d = regression_model(layers=3)

In [45]:
mean_d, std_dev_d = fit_model(model_d, predictors_norm, target, epochs=50)
print(f'Mean of the mean squared errors: {mean_d}')
print(f'Standard deviation of the mean squared errors: {std_dev_d}')

Mean of the mean squared errors: 39.88658112114288
Standard deviation of the mean squared errors: 11.491183026573784


__How does the mean of the mean squared errors compare to that from Step B?__

Mean of the mean squared errors: 35.7153450247004
Standard deviation of the mean squared errors: 10.200661089478105

Both the mean and the standard deviation of the mean squared errors have increased from 35.7 to 39.89 and from 10.2 to 11.49 respectively.