## PACKAGES IMPORTS


In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import keras
from keras.models import Sequential
from keras.layers import Dense

## DATA IMPORT

In [2]:
concrete_data = pd.read_csv(
    "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv"
)
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
X = concrete_data.loc[:, concrete_data.columns != "Strength"]
Y = concrete_data.loc[:, concrete_data.columns == "Strength"]
n_cols = X.shape[1]

# **A** - Baseline Model

In [4]:
# Define regression model
def regression_model():
    # Create model
    model = Sequential()
    model.add(Dense(10, activation="relu", input_shape=(n_cols,)))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model

In [5]:
# Split data into test and train
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [6]:
# Build model
model = regression_model()

# Fit model
model.fit(X_train, Y_train, epochs=50, verbose=0)

<keras.src.callbacks.History at 0x2750dac6e10>

In [7]:
# Predict Y_test from X_test
prediction = model.predict(X_test)

# Calculate MSE prediction vs Y_test
error = mean_squared_error(Y_test, prediction)
print(f"Mean Squared Error for model is: {error}")

Mean Squared Error for model is: 128.8232437725296


In [8]:
# Function to do repeat all previous steps
def MSE_repeat(X, Y, iterations):
    error_list = []

    for i in range(0, iterations):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
        model = regression_model()
        model.fit(X_train, Y_train, epochs=50, verbose=0)
        prediction = model.predict(X_test)
        error = mean_squared_error(Y_test, prediction)
        error_list.append(error)
    return error_list

In [9]:
# Run function for 50 iterations
error_list_A = MSE_repeat(X, Y, 50)

# Calulate mean and std for the 50 MSE returned
mean_MSE_A = np.mean(error_list_A)
std_MSE_A = np.std(error_list_A)

print(f"Mean of MSE= {mean_MSE_A} | Standard deviation of MSE= {std_MSE_A}")

Mean of MSE= 470.37549240877956 | Standard deviation of MSE= 545.0265196608758


## **B** - Normalize The Data

In [10]:
# Normalize The predictors (X)
X_norm = (X - X.mean()) / X.std()

In [11]:
# Run function for 50 iterations
error_list_B = MSE_repeat(X_norm, Y, 50)

# Calulate mean and std for the 50 MSE returned
mean_MSE_B = np.mean(error_list_B)
std_MSE_B = np.std(error_list_B)

print(f"Mean of  MSE= {mean_MSE_B} | Standard deviation of  MSE= {std_MSE_B}")

Mean of  MSE= 362.12520270502233 | Standard deviation of  MSE= 93.94264536005694


***The mean MSE decreased and the standard deviation for MSEs also decreased. (Compared to A)***

# **C** - Increase number of Epochs

In [15]:
# Function to do repeat all previous steps
def MSE_repeat_C(X, Y, iterations):
    error_list = []

    for i in range(0, iterations):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
        model = regression_model()
        model.fit(X_train, Y_train, epochs=100, verbose=0)
        prediction = model.predict(X_test)
        error = mean_squared_error(Y_test, prediction)
        error_list.append(error)
    return error_list

In [16]:
# Run function for 50 iterations
error_list_C = MSE_repeat_C(X_norm, Y, 50)

# Calulate mean and std for the 50 MSE returned
mean_MSE_C = np.mean(error_list_C)
std_MSE_C = np.std(error_list_C)

print(f"Mean of MSE= {mean_MSE_C} | Standard deviation of MSE= {std_MSE_C}")

Mean of MSE= 166.6485635043565 | Standard deviation of MSE= 17.048411854203547


***The mean MSE decreased and the standard deviation for MSEs also decreased. (Compared to B)***

# **D** - Increase number of Hidden Layers

In [17]:
# Define regression model with increased layers
def regression_model_D():
    # Create model
    model = Sequential()
    model.add(Dense(10, activation="relu", input_shape=(n_cols,)))
    model.add(Dense(10, activation="relu"))
    model.add(Dense(10, activation="relu"))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model

In [18]:
# Function to do repeat all previous steps
def MSE_repeat_D(X, Y, iterations):
    error_list = []

    for i in range(0, iterations):
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
        model = regression_model_D()
        model.fit(X_train, Y_train, epochs=50, verbose=0)
        prediction = model.predict(X_test)
        error = mean_squared_error(Y_test, prediction)
        error_list.append(error)
    return error_list

In [19]:
# Run function for 50 iterations
error_list_D = MSE_repeat_D(X_norm, Y, 50)

# Calulate mean and std for the 50 MSE returned
mean_MSE_D = np.mean(error_list_D)
std_MSE_D = np.std(error_list_D)

print(f"Mean of 50 MSE= {mean_MSE_D} | Standard deviation of 50 MSE= {std_MSE_D}")

Mean of 50 MSE= 156.97673195749528 | Standard deviation of 50 MSE= 202.98555374881778


***The mean MSE decreased and the standard deviation for MSEs also decreased. (Compared to B)***

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.pointplot(x = [' A', ' B', ' C', ' D'], y = [55.94411376953125,67.02201274871827,36.91457672119141,33.73752418518066], color = '#6495ed').set_title('Mean Squared Error')
plt.xlabel('Section')
plt.ylabel('Mean Squared Error')