In [18]:
import pandas as pd
import numpy as np

from keras.layers import Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv(r"../Module 3/concrete_data.csv")
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# A. Baseline Model

In [6]:
X = df.drop('Strength', axis=1)
y = df.Strength

In [15]:
def baseline_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

In [None]:
mse = []
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    base_model = baseline_model()
    base_model.fit(X_train, y_train, epochs=50)
    y_pred = base_model.predict(X_test)
    error = mean_squared_error(y_test, y_pred)
    mse.append([i+1, error])

Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 891us/step - loss: 56480.1289 - mean_squared_error: 56480.1289 
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 784us/step - loss: 10305.5947 - mean_squared_error: 10305.5947
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 756us/step - loss: 2598.1362 - mean_squared_error: 2598.1362
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 817us/step - loss: 2450.9250 - mean_squared_error: 2450.9250
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2216.8999 - mean_squared_error: 2216.8999 
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2009.1982 - mean_squared_error: 2009.1982 
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 795us/step - loss: 1663.9525 - mean_squared_error: 1663.9525
Epoch 8/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━

In [20]:
mean = np.mean(mse[1])
std = np.std(mse[1])
print(f"Mean of MSEs = {mean}")
print(f"Standard Deviation of MSEs = {std}")

Mean of MSEs = 238.94294976912565
Standard Deviation of MSEs = 236.94294976912565


---

# B. Normalized Data Model

In [23]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [None]:
def baseline_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_train_scaled.shape[1],)))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

In [None]:
normalize_mse = []
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    base_model = baseline_model()
    base_model.fit(X_train_scaled, y_train, epochs=50)
    y_pred = base_model.predict(X_test_scaled)
    error = mean_squared_error(y_test, y_pred)
    normalize_mse.append([i+1, error])

Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 955us/step - loss: 1604.5848 - mean_squared_error: 1604.5848 
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1481.6422 - mean_squared_error: 1481.6422 
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 920us/step - loss: 1564.6797 - mean_squared_error: 1564.6797
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 783us/step - loss: 1437.6185 - mean_squared_error: 1437.6185
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 743us/step - loss: 1534.3157 - mean_squared_error: 1534.3157
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 985us/step - loss: 1446.6086 - mean_squared_error: 1446.6086
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 929us/step - loss: 1427.3036 - mean_squared_error: 1427.3036
Epoch 8/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━

In [33]:
print(f"Mean of Normalized MSEs = {np.mean(normalize_mse[1])}")
print(f"Standard Deviation of Normalized MSEs = {np.std(normalize_mse[1])}")

Mean of Normalized MSEs = 171.12511049289165
Standard Deviation of Normalized MSEs = 169.12511049289165


After scaling the data, the results significantly improved in the error. Which means that the model is sensitive to scaling.

# C. Increase no. of Epochs to 100

In [None]:
normalized_mse = []
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    base_model = baseline_model()
    base_model.fit(X_train_scaled, y_train, epochs=100)
    y_pred = base_model.predict(X_test_scaled)
    error = mean_squared_error(y_test, y_pred)
    normalized_mse.append([i+1, error])

Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1586.7678 - mean_squared_error: 1586.7678   
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1569.1528 - mean_squared_error: 1569.1528 
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1557.0375 - mean_squared_error: 1557.0375 
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 638us/step - loss: 1507.4015 - mean_squared_error: 1507.4015
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 761us/step - loss: 1466.8256 - mean_squared_error: 1466.8256
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 856us/step - loss: 1450.0364 - mean_squared_error: 1450.0364
Epoch 7/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 918us/step - loss: 1501.2584 - mean_squared_error: 1501.2584
Epoch 8/100
[1m23/23[0m [32m━━━━━━━━━━━

In [35]:
print(f"Mean of MSEs = {np.mean(normalized_mse[1])}")
print(f"Standard Deviation of MSEs = {np.std(normalized_mse[1])}")

Mean of MSEs = 80.39181053369956
Standard Deviation of MSEs = 78.39181053369956


Again when the no. of epoch increased, the error significantly improved.

# D. More Hidden Layers

In [36]:
def baseline_normalized_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_train_scaled.shape[1],)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

In [None]:
new_mse = []
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    base_model = baseline_normalized_model()
    base_model.fit(X_train_scaled, y_train, epochs=50)
    y_pred = base_model.predict(X_test_scaled)
    error = mean_squared_error(y_test, y_pred)
    new_mse.append([i+1, error])

Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1641.8219 - mean_squared_error: 1641.8219
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1563.8594 - mean_squared_error: 1563.8594 
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1501.3643 - mean_squared_error: 1501.3643 
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 910us/step - loss: 1537.3062 - mean_squared_error: 1537.3062
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 961us/step - loss: 1492.6429 - mean_squared_error: 1492.6429
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1356.3600 - mean_squared_error: 1356.3600 
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 897us/step - loss: 1246.0398 - mean_squared_error: 1246.0398
Epoch 8/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [39]:
print(f"Mean of new MSEs = {np.mean(new_mse[1])}")

Mean of new MSEs = 75.35805293878559


Compared to the mean of part B, there's a significant drop in the error.