In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# import
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error


In [None]:
# Load dataset
data = pd.read_csv('............./concrete_data.csv')
X = data.drop('Strength', axis=1)
y = data['Strength']
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


# A. Build a baselien model

In [None]:
# Baseline model Function
def baseline_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],) ))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


# Store mean squared error
mse_list = []

# Repeat the process 50 times
for i in range(50):
  #Split the data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

  # Build the mdoel
  model = baseline_model()
  model.fit(X_train, y_train, epochs=50, verbose=0)

  # Evaluate
  y_pred = model.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  mse_list.append(mse)


# Calculate the mean and standard daviation of the mse
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)
print(f'Mean MSE: {mean_mse}')
print(f'Standard deviation: {std_mse}')

Mean MSE: 284.5172666078205
Standard deviation: 304.52903700506295


# B. Normalize the data

In [None]:
# Normalize
X_norm = (X - X.mean()) / X.std()

# New list for mean squared error
mse_list_norm = []


# Repeat the process 50 times
for i in range(50):
  #Split the data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

  # Build the mdoel
  model = baseline_model()
  model.fit(X_train, y_train, epochs=50, verbose=0)

  # Evaluate
  y_pred = model.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  mse_list_norm.append(mse)


# Calculate the mean and standard daviation of the mse
mean_mse_norm = np.mean(mse_list_norm)
std_mse_norm= np.std(mse_list_norm)
print(f'Mean MSE: {mean_mse_norm}')
print(f'Standard deviation: {std_mse_norm}')



Mean MSE: 453.89928643005044
Standard deviation: 528.620108807699


# C. Increase the Number of Epochs

In [None]:
# New list for mean squared error
mse_list_epochs = []

# Repeat the process 50 times
for i in range(50):
  #Split the data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

  # Build the mdoel
  model = baseline_model()
  model.fit(X_train, y_train, epochs=100, verbose=0)

  # Evaluate
  y_pred = model.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  mse_list_epochs.append(mse)


# Calculate the mean and standard daviation of the mse
mean_mse_epochs = np.mean(mse_list_epochs)
std_mse_epochs= np.std(mse_list_epochs)
print(f'Mean MSE: {mean_mse_epochs}')
print(f'Standard deviation: {std_mse_epochs}')






# D. Increase the number of hidden Layers

In [None]:
# Build a model with three layers

def build_three_layer_model():
  model = Sequential()
  model.add(Dense(10, activation='relu', input_shape=(X.shape[1],)))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mean_squared_error')
  return model

# New list for mean squared error
mse_list_layers = []

# Repeat the process 50 times
for i in range(50):
  #Split the data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

  # Build the mdoel
  model = build_three_layer_model()
  model.fit(X_train, y_train, epochs=50, verbose=0)

  # Evaluate
  y_pred = model.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  mse_list_layers.append(mse)


# Calculate the mean and standard daviation of the mse
mean_mse_layers = np.mean(mse_list_layers)
std_mse_layers= np.std(mse_list_layers)
print(f'Mean MSE: {mean_mse_layers}')
print(f'Standard deviation: {std_mse_layers}')



# Discussion

In [None]:
print(f'The Mean MSE for teh baseline model is {mean_mse: .2f}, which serves as the reference point.')
print(f'After normalizining the data, the mean MSE decreased to {mean_mse_norm: .2f}, which indicates that normalization improves model performance')
print(f'Increasing the number of epochs to 100 resulted in a mean MSE of {mean_mse_epochs: .2f}, showing a further improvement in model performance')
print(f'Adding two more hidden layers to the model resulted in a mean MSE of {mean_mse_layers: .2f}, this shows that increasing the model complexity can improve performance, but it is important to balance complexity with the risk ')