# Regression Model using Keras

Our first step is to import all the libraries and dependiencies needed for our task

In [37]:
import pandas as pd
import numpy as np

import keras
from keras.layers import Dense, Input
from keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

Then we save our task data in a dataFrame

In [5]:
df = pd.read_csv('https://cocl.us/concrete_data')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


We check that our data is clean

In [8]:
df.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [13]:
X = df[['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer', 'Coarse Aggregate', 'Fine Aggregate']]
y = df['Strength']

print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

X shape: (1030, 7)
y shape: (1030,)


Now we can begin with out Deep learning tasks

## Part A

Use the Keras library to build a neural network with the following:

- One hidden layer of 10 nodes, and a ReLU activation function

- Use the adam optimizer and the mean squared error  as the loss function.

In [27]:
def first_model(ncols):
    model = Sequential()
    model.add(Input(shape=(ncols,)))
    model.add(Dense(10,activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.

2. Train the model on the training data using 50 epochs.

3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

5. Report the mean and the standard deviation of the mean squared errors.

In [31]:
def train_mse_50(X,y,f_model,epochs):
    mse_ = []
    for i in range(50):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)
        model = f_model(X.shape[1])
        model.fit(X_train,y_train,epochs=epochs,verbose=0)
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        mse_.append(mse)
        print(f"Iteration {i+1} - Test MSE: {mse}")
    return mse_

In [32]:
mse_list = train_mse_50(X,y,first_model,50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 1 - Test MSE: 210.73780961516067
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 2 - Test MSE: 407.71142237215685
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 3 - Test MSE: 160.4209508352479
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 4 - Test MSE: 256.55291604227864
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 5 - Test MSE: 161.1641681941033
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 6 - Test MSE: 209.38291066751438
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 7 - Test MSE: 1624.5995713772004
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 8 - Test MSE: 341.488312613865
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/

In [35]:
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard Deviation of MSE: {std_mse}")

Mean MSE: 372.3357330785238
Standard Deviation of MSE: 372.33142147113307


## Part B

Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

In [38]:
s = StandardScaler()
X_norm = s.fit_transform(X)
mse_list = train_mse_50(X_norm,y,first_model,50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 1 - Test MSE: 334.96265339493226
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 2 - Test MSE: 409.5838368936774
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 3 - Test MSE: 261.5180083964583
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 4 - Test MSE: 329.1121181099586
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 5 - Test MSE: 443.87760272995797
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 6 - Test MSE: 452.95641005819783
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 7 - Test MSE: 276.58339319080903
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 8 - Test MSE: 312.4747369012568
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/

In [39]:
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard Deviation of MSE: {std_mse}")

Mean MSE: 364.3545314649238
Standard Deviation of MSE: 95.95271040218941


## Part C

Repeat Part B but use 100 epochs this time for training.

In [40]:
mse_list = train_mse_50(X_norm,y,first_model,100)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 1 - Test MSE: 194.93196745169553
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 2 - Test MSE: 201.4761130818425
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 3 - Test MSE: 214.41025152401895
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 4 - Test MSE: 220.28626034099997
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 5 - Test MSE: 230.95523901177765
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 6 - Test MSE: 181.60667277245537
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 7 - Test MSE: 238.7316643659852
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Iteration 8 - Test MSE: 198.07420679677648
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3m

In [41]:
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard Deviation of MSE: {std_mse}")

Mean MSE: 196.74199271409682
Standard Deviation of MSE: 16.971686176457514


## Part D

Repeat part B but use a neural network with the following instead:

- Three hidden layers, each of 10 nodes and ReLU activation function.

In [43]:
def second_model(ncols):
    model = Sequential()
    model.add(Input(shape=(ncols,)))
    for _ in range(3): model.add(Dense(10,activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

In [44]:
mse_list = train_mse_50(X_norm,y,second_model,50)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Iteration 1 - Test MSE: 182.78743886994593
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Iteration 2 - Test MSE: 156.83923627060818
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Iteration 3 - Test MSE: 172.78523804669433
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Iteration 4 - Test MSE: 167.46106490906735
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Iteration 5 - Test MSE: 176.00082865440956
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Iteration 6 - Test MSE: 179.0438634428292
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Iteration 7 - Test MSE: 157.53878322773855
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Iteration 8 - Test MSE: 167.889140514794
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms

In [45]:
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f"Mean MSE: {mean_mse}")
print(f"Standard Deviation of MSE: {std_mse}")

Mean MSE: 170.63849142077396
Standard Deviation of MSE: 11.910451843680663
