## Part A

### Importing packages

In [63]:
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Importing the data

In [64]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')

In [65]:
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


### Splitting the data into predictors and targets

In [66]:
predictors = concrete_data.iloc[:, :-1]
targets = concrete_data.iloc[:, -1]
n_cols = predictors.shape[1]

In [67]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [68]:
targets.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

### Building a neural network with 1 hidden layer

In [80]:
def first_model():
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape = (n_cols,))) # hidden layer
    model.add(Dense(1)) # output layer
    model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mse'])
    return model

### Evaluating the network using unnormalized data (50 epochs)

In [94]:
errors = []
for i in range(50):
    model = first_model() # create the model
    X_train, X_test, y_train, y_test = train_test_split(predictors, targets, test_size = 0.3) # create training and test splits
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 0) # fit the model
    mse = model.evaluate(X_test, y_test, verbose = 0)[1] # compute the MSE
    errors.append(mse)

In [95]:
mu = np.mean(errors)
sigma = np.std(errors)

In [96]:
print('The mean is {}'.format(mu))

The mean is 430.13746634202477


In [97]:
print('The standard deviation is {}'.format(sigma))

The standard deviation is 608.6403598852753


## Part B

### Normalizing the data

In [99]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [100]:
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


### Evaluating the network using normalized data (50 epochs)

In [101]:
errors = []
for i in range(50):
    model = first_model() # create the model
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, targets, test_size = 0.3) # create training and test splits
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 0) # fit the model
    mse = model.evaluate(X_test, y_test, verbose = 0)[1] # compute the MSE
    errors.append(mse)

In [102]:
mu = np.mean(errors)
sigma = np.std(errors)

In [103]:
print('The mean is {}'.format(mu))

The mean is 366.6015106117224


In [104]:
print('The standard deviation is {}'.format(sigma))

The standard deviation is 111.97177485670116


The mean is smaller than in Part A. Since normalized features have similar ranges of values, no single feature can disproportionately contribute to the output of the model, in turn leading to greater accuracy.

## Part C

### Evaluating the network using normalized data (100 epochs)

In [108]:
errors = []
for i in range(50):
    model = first_model() # create the model
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, targets, test_size = 0.3) # create training and test splits
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 100, verbose = 0) # fit the model
    mse = model.evaluate(X_test, y_test, verbose = 0)[1] # compute the MSE
    errors.append(mse)

In [109]:
mu = np.mean(errors)
sigma = np.std(errors)

In [112]:
print('The mean is {}'.format(mu))

The mean is 167.35739794141654


In [113]:
print('The standard deviation is {}'.format(sigma))

The standard deviation is 17.920026235892173


The mean is smaller than in Part B. Since the loss is reduced with every training iteration, models trained over more epochs tend to perform better than those trained over fewer epochs. However, one should be wary of overfitting when increasing the number of epochs.

## Part D

### Building a neural network with 3 hidden layers

In [120]:
def second_model():
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape = (n_cols,))) # hidden layer 1
    model.add(Dense(10, activation = 'relu')) # hidden layer 2
    model.add(Dense(10, activation = 'relu')) # hidden layer 3
    model.add(Dense(1)) # output layer
    model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mse'])
    return model

### Evaluating the network using normalized data (50 epochs)

In [121]:
errors = []
for i in range(50):
    model = second_model() # create the model
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, targets, test_size = 0.3) # create training and test splits
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 0) # fit the model
    mse = model.evaluate(X_test, y_test, verbose = 0)[1] # compute the MSE
    errors.append(mse)

In [122]:
mu = np.mean(errors)
sigma = np.std(errors)

In [123]:
print('The mean is {}'.format(mu))

The mean is 131.55026768212178


In [124]:
print('The standard deviation is {}'.format(sigma))

The standard deviation is 18.55386593706933


The mean is smaller than in Part B. Complex models generally perform better than simple models on the same data. Again, we should be careful not to overfit the training data.