# Build a Regression Model in Keras

## A.Build a baseline model

In [40]:
# Installing required libraries

!pip install tensorflow
!pip install numpy
!pip install pandas
!pip install keras



In [41]:
# importing libraries
import tensorflow
import keras
import sklearn
import pandas as pd
import numpy as np

In [42]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

#### The dataset is about the compressive strength of different samples of concrete based on the volumes of the different ingredients that were used to make them. Ingredients include:

    1. Cement

    2. Blast Furnace Slag

    3. Fly Ash

    4. Water

    5. Superplasticizer

    6. Coarse Aggregate

    7. Fine Aggregate


In [43]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [44]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [45]:
# Normalizing data so that

In [46]:
columns = concrete_data.columns
columns

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

In [47]:
predictors = concrete_data[columns[columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [48]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [49]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

#### 1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.

#### 2. Train the model on the training data using 50 epochs.

In [50]:
hidden_layers = 1
nodes_in_hidden = 10
epochs = 50
n_cols = predictors.shape[1] # number of columns
n_cols

8

In [51]:
def regression_model(number_of_hidden_layers, nodes_in_hidden_nodes):
  '''
  Creates a sequential model and adds a Dense layer with the specified number of nodes and activation function.'''
  model = Sequential()
  for i in  range(number_of_hidden_layers):
    model.add(Dense(nodes_in_hidden_nodes, activation='relu', input_shape=(n_cols,), name='hidden_layer_{}'.format(i)))
  model.add(Dense(1, name='output_layer'))
  model.compile(optimizer='adam', loss='mean_squared_error')
  return model

#### 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

#### 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [52]:
model = regression_model(hidden_layers, nodes_in_hidden)
num_of_repitions = 50
mse_list = []   # Storing mse's
for i in range(num_of_repitions):
  print('Iteration: {}'.format(i))
  X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)
  model.fit(X_train, y_train, validation_split=0.3, epochs=epochs, verbose=2)
  predictions = model.predict(X_test)
  mse = sklearn.metrics.mean_squared_error(y_test, predictions)
  mse_list.append(mse)
print(mse_list)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Iteration: 1
Epoch 1/50
16/16 - 0s - loss: 2062.5059 - val_loss: 1717.2284 - 223ms/epoch - 14ms/step
Epoch 2/50
16/16 - 0s - loss: 2007.6693 - val_loss: 1669.4486 - 101ms/epoch - 6ms/step
Epoch 3/50
16/16 - 0s - loss: 1942.4434 - val_loss: 1615.9261 - 107ms/epoch - 7ms/step
Epoch 4/50
16/16 - 0s - loss: 1887.5923 - val_loss: 1566.0490 - 92ms/epoch - 6ms/step
Epoch 5/50
16/16 - 0s - loss: 1832.8073 - val_loss: 1519.8577 - 162ms/epoch - 10ms/step
Epoch 6/50
16/16 - 0s - loss: 1779.7648 - val_loss: 1479.4116 - 99ms/epoch - 6ms/step
Epoch 7/50
16/16 - 0s - loss: 1726.7592 - val_loss: 1430.5366 - 100ms/epoch - 6ms/step
Epoch 8/50
16/16 - 0s - loss: 1674.6538 - val_loss: 1388.5320 - 117ms/epoch - 7ms/step
Epoch 9/50
16/16 - 0s - loss: 1626.1947 - val_loss: 1345.5735 - 126ms/epoch - 8ms/step
Epoch 10/50
16/16 - 0s - loss: 1580.2980 - val_loss: 1306.5566 - 137ms/epoch - 9ms/step
Epoch 11/50
16/16 - 0s - loss: 1533.1697 - val_loss

#### 5. Report the mean and the standard deviation of the mean squared errors.

In [53]:
mean_mse_A = np.mean(mse_list)
std_mse_A = np.std(mse_list)
print('Mean: {}, Standard Deviation: {}'.format(mean_mse_A, std_mse_A))

Mean: 99.58352388257866, Standard Deviation: 246.92048635314848


## B. Normalize the data (5 marks)
- Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

In [54]:
normalized_predictors = (predictors - predictors.mean()) / predictors.std()
normalized_predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [55]:
normalized_predictors.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,-4.139084e-16,-1.793603e-16,0.0,-1.379695e-16,-1.931572e-16,7.243397e-16,-4.759946e-16,4.139084e-17
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.714421,-0.8564718,-0.846733,-2.798851,-1.038638,-2.211064,-2.239829,-0.707016
25%,-0.8496407,-0.8564718,-0.846733,-0.7805147,-1.038638,-0.5262618,-0.5317114,-0.612034
50%,-0.0791135,-0.6014861,-0.846733,0.1607513,0.0326992,-0.06326279,0.07383152,-0.2795973
75%,0.6586406,0.8003558,1.001791,0.4885554,0.6688058,0.7264077,0.6288606,0.1636517
max,2.476712,3.309068,2.279976,3.064159,4.351528,2.213149,2.731735,5.055221


In [56]:
normalized_target = (target - target.mean()) / target.std()
normalized_target.head()

0    2.644123
1    1.560663
2    0.266498
3    0.313188
4    0.507732
Name: Strength, dtype: float64

In [57]:
normalized_target.describe()

count    1030.000000
mean        0.000000
std         1.000000
min        -2.004578
25%        -0.724778
50%        -0.082185
75%         0.617574
max         2.800357
Name: Strength, dtype: float64

In [58]:
num_of_repitions = 50
mse_list_normalized = []   # Storing mse's
for i in range(num_of_repitions):
  print('Iteration: {}'.format(i))
  X_train, X_test, y_train, y_test = train_test_split(normalized_predictors, normalized_target, test_size=0.3, random_state=42)
  model.fit(X_train, y_train, validation_split=0.3, epochs=epochs, verbose=2)
  predictions = model.predict(X_test)
  mse = sklearn.metrics.mean_squared_error(y_test, predictions)
  mse_list_normalized.append(mse)
print(mse_list_normalized)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Iteration: 1
Epoch 1/50
16/16 - 0s - loss: 0.1798 - val_loss: 0.1764 - 287ms/epoch - 18ms/step
Epoch 2/50
16/16 - 0s - loss: 0.1779 - val_loss: 0.1752 - 121ms/epoch - 8ms/step
Epoch 3/50
16/16 - 0s - loss: 0.1771 - val_loss: 0.1738 - 138ms/epoch - 9ms/step
Epoch 4/50
16/16 - 0s - loss: 0.1760 - val_loss: 0.1741 - 94ms/epoch - 6ms/step
Epoch 5/50
16/16 - 0s - loss: 0.1758 - val_loss: 0.1727 - 118ms/epoch - 7ms/step
Epoch 6/50
16/16 - 0s - loss: 0.1740 - val_loss: 0.1718 - 103ms/epoch - 6ms/step
Epoch 7/50
16/16 - 0s - loss: 0.1727 - val_loss: 0.1709 - 95ms/epoch - 6ms/step
Epoch 8/50
16/16 - 0s - loss: 0.1717 - val_loss: 0.1695 - 100ms/epoch - 6ms/step
Epoch 9/50
16/16 - 0s - loss: 0.1712 - val_loss: 0.1684 - 88ms/epoch - 6ms/step
Epoch 10/50
16/16 - 0s - loss: 0.1702 - val_loss: 0.1680 - 135ms/epoch - 8ms/step
Epoch 11/50
16/16 - 0s - loss: 0.1698 - val_loss: 0.1690 - 94ms/epoch - 6ms/step
Epoch 12/50
16/16 - 0s - loss: 0

In [59]:
mean_mse_normalized = np.mean(mse_list_normalized)
std_mse_normalized = np.std(mse_list_normalized)
print('Mean: {}, Standard Deviation: {}'.format(mean_mse_normalized, std_mse_normalized))

Mean: 0.15525912617540116, Standard Deviation: 0.012055328873901296


### How does the mean of the mean squared errors compare to that from Step A?
- A: Mean: 99.58352388257866, Standard Deviation: 246.92048635314848
- B: Mean: 0.15525912617540116, Standard Deviation: 0.012055328873901296

The mean of the mean squared errors (MSE) of model B (after normalization) is much smaller than that of model A (before normalization). This is expected because normalization typically reduces the scale of the data, which can lead to smaller MSE values.
In essence, normalization helps to improve the comparability of models by placing them on a common scale. This can be particularly useful when comparing models that have been trained on data with different units or scales.

## C. Increate the number of epochs (5 marks)
- Repeat Part B but use 100 epochs this time for training.

In [60]:
num_of_repitions = 50
epochs = 100
mse_list_C = []   # Storing mse's
for i in range(num_of_repitions):
  print('Iteration: {}'.format(i))
  X_train, X_test, y_train, y_test = train_test_split(normalized_predictors, normalized_target, test_size=0.3, random_state=42)
  model.fit(X_train, y_train, validation_split=0.3, epochs=epochs, verbose=2)
  predictions = model.predict(X_test)
  mse = sklearn.metrics.mean_squared_error(y_test, predictions)
  mse_list_C.append(mse)
print(mse_list_C)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 26/100
16/16 - 0s - loss: 0.0875 - val_loss: 0.1286 - 73ms/epoch - 5ms/step
Epoch 27/100
16/16 - 0s - loss: 0.0881 - val_loss: 0.1282 - 71ms/epoch - 4ms/step
Epoch 28/100
16/16 - 0s - loss: 0.0889 - val_loss: 0.1289 - 96ms/epoch - 6ms/step
Epoch 29/100
16/16 - 0s - loss: 0.0880 - val_loss: 0.1273 - 80ms/epoch - 5ms/step
Epoch 30/100
16/16 - 0s - loss: 0.0877 - val_loss: 0.1264 - 76ms/epoch - 5ms/step
Epoch 31/100
16/16 - 0s - loss: 0.0877 - val_loss: 0.1295 - 86ms/epoch - 5ms/step
Epoch 32/100
16/16 - 0s - loss: 0.0879 - val_loss: 0.1289 - 76ms/epoch - 5ms/step
Epoch 33/100
16/16 - 0s - loss: 0.0876 - val_loss: 0.1284 - 71ms/epoch - 4ms/step
Epoch 34/100
16/16 - 0s - loss: 0.0871 - val_loss: 0.1274 - 71ms/epoch - 4ms/step
Epoch 35/100
16/16 - 0s - loss: 0.0872 - val_loss: 0.1292 - 63ms/epoch - 4ms/step
Epoch 36/100
16/16 - 0s - loss: 0.0879 - val_loss: 0.1266 - 75ms/epoch - 5ms/step
Epoch 37/100
16/16 - 0s - loss: 0

In [61]:
mean_mse_C = np.mean(mse_list_C)
std_mse_C = np.std(mse_list_C)
print('Mean: {}, Standard Deviation: {}'.format(mean_mse_C, std_mse_C))

Mean: 0.14668789261530354, Standard Deviation: 0.0009653286299347489


### How does the mean of the mean squared errors compare to that from Step B?
- B: Mean: 0.15525912617540116, Standard Deviation: 0.012055328873901296
- C: Mean: 0.14668789261530354, Standard Deviation: 0.0009653286299347489

The mean MSE of Model C (100 epochs) is lower than the mean MSE of Model B (50 epochs). This indicates that:
Model C generally performs better than Model B in terms of minimizing the average squared error between predicted and actual values.
The improvement in performance is likely due to the increased number of epochs, allowing the model to learn more complex patterns in the data.

## D. Increase the number of hidden layers (5 marks)

Repeat part B but use a neural network with the following instead:
- Three hidden layers, each of 10 nodes and ReLU activation function.

In [63]:
hidden_layers = 3
nodes_in_hidden = 10
epochs = 100
model = regression_model(hidden_layers, nodes_in_hidden)

mse_list_D = []   # Storing mse's
for i in range(num_of_repitions):
  print('Iteration: {}'.format(i))
  X_train, X_test, y_train, y_test = train_test_split(normalized_predictors, normalized_target, test_size=0.3, random_state=42)
  model.fit(X_train, y_train, validation_split=0.3, epochs=epochs, verbose=2)
  predictions = model.predict(X_test)
  mse = sklearn.metrics.mean_squared_error(y_test, predictions)
  mse_list_D.append(mse)
print(mse_list_D)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 26/100
16/16 - 0s - loss: 0.0326 - val_loss: 0.1557 - 83ms/epoch - 5ms/step
Epoch 27/100
16/16 - 0s - loss: 0.0322 - val_loss: 0.1547 - 70ms/epoch - 4ms/step
Epoch 28/100
16/16 - 0s - loss: 0.0322 - val_loss: 0.1534 - 69ms/epoch - 4ms/step
Epoch 29/100
16/16 - 0s - loss: 0.0321 - val_loss: 0.1531 - 64ms/epoch - 4ms/step
Epoch 30/100
16/16 - 0s - loss: 0.0329 - val_loss: 0.1534 - 68ms/epoch - 4ms/step
Epoch 31/100
16/16 - 0s - loss: 0.0324 - val_loss: 0.1523 - 74ms/epoch - 5ms/step
Epoch 32/100
16/16 - 0s - loss: 0.0326 - val_loss: 0.1505 - 65ms/epoch - 4ms/step
Epoch 33/100
16/16 - 0s - loss: 0.0324 - val_loss: 0.1539 - 74ms/epoch - 5ms/step
Epoch 34/100
16/16 - 0s - loss: 0.0319 - val_loss: 0.1502 - 69ms/epoch - 4ms/step
Epoch 35/100
16/16 - 0s - loss: 0.0334 - val_loss: 0.1533 - 79ms/epoch - 5ms/step
Epoch 36/100
16/16 - 0s - loss: 0.0325 - val_loss: 0.1531 - 76ms/epoch - 5ms/step
Epoch 37/100
16/16 - 0s - loss: 0

In [64]:
mean_mse_D = np.mean(mse_list_D)
std_mse_D = np.std(mse_list_D)
print('Mean: {}, Standard Deviation: {}'.format(mean_mse_D, std_mse_D))

Mean: 0.20652033954190532, Standard Deviation: 0.016298590794683813


### How does the mean of the mean squared errors compare to that from Step B?
- Model C: Normalized dataset, 100 epochs
  Mean MSE: 0.14668789261530354
  Standard Deviation: 0.0009653286299347489
- Model D: Normalized dataset, 100 epochs, 3 hidden layers with 10 nodes each
  Mean MSE: 0.20652033954190532
  Standard Deviation: 0.016298590794683813
The mean MSE of Model D (0.2065) is higher than the mean MSE of Model C (0.1467). This indicates that:
- Model C generally performs better than Model D in terms of minimizing the average squared error between predicted and actual values.
- Model D has a higher standard deviation compared to Model C. This suggests that the performance of Model D is less consistent across different runs or subsets of data.