In [1]:
!pip install numpy==2.0.2
!pip install pandas==2.2.2
!pip install tensorflow_cpu==2.18.0
!pip install scikit-learn

Collecting numpy==2.0.2
  Downloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m12.9 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.0.2
Collecting pandas==2.2.2
  Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting tzdata>=2022.7 (from pandas==2.2.2)
  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.metrics import mean_squared_error
import numpy as np

In [4]:
# Load the data
url = 'https://cocl.us/concrete_data'  # Replace with your actual CSV file URL
data = pd.read_csv(url)

In [5]:
# To check the count of rows and columns in the dataset
data.shape

(1030, 9)

In [6]:
# To display top 5 rows of the dataset
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [7]:
# To get the details of datatype for each column and memory required
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


In [8]:
# To get overall statistics of the dataset
data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [9]:
# To display null values in each column
data.isnull().sum()


Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [10]:
#To display the columns of the dataset
concrete_data_columns = data.columns
data.columns


Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

In [11]:
#Define the predictors and the Target
predictors = data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = data['Strength']
print(predictors.head())

   Cement  Blast Furnace Slag  Fly Ash  Water  Superplasticizer  \
0   540.0                 0.0      0.0  162.0               2.5   
1   540.0                 0.0      0.0  162.0               2.5   
2   332.5               142.5      0.0  228.0               0.0   
3   332.5               142.5      0.0  228.0               0.0   
4   198.6               132.4      0.0  192.0               0.0   

   Coarse Aggregate  Fine Aggregate  Age  
0            1040.0           676.0   28  
1            1055.0           676.0   28  
2             932.0           594.0  270  
3             932.0           594.0  365  
4             978.4           825.5  360  


In [12]:
print(target.head())

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64


In [14]:
n_cols = predictors.shape[1] # Number of predictors

In [15]:
# Step A: Without Normalization and Epoch = 50, Evaluate 50 Times

# Define the regression model with one hidden layers
def regression_model():
    model = Sequential()
    model.add(Input(shape=(predictors.shape[1],)))  # Specify the input shape
    model.add(Dense(10, activation='relu'))  # First hidden layer with 10 nodes and ReLU activation
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mean_squared_error')  # Compile the model
    return model

# Initialize an empty list to store the mean squared errors
mse_list = []

# Loop to train and evaluate the model 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3)
    
    # Build the model
    model = regression_model()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model on the test data
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mse_list.append(mse)

mean_mse_A = np.mean(mse_list)
std_mse_A = np.std(mse_list)

print(f"Mean of Mean Squared Errors (Step A - No Normalization, 50 epochs): {mean_mse_A}")
print(f"Standard Deviation of Mean Squared Errors (Step A - No Normalization, 50 epochs): {std_mse_A}")


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [16]:
# Step B: With Normalization and Epoch = 50, Evaluate 50 Times, Compare with Step A

# Normalize the predictors
predictors_norm = (predictors - predictors.mean()) / predictors.std()

# Initialize an empty list to store the mean squared errors
mse_list = []

# Loop to train and evaluate the model 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    
    # Build the model
    model = regression_model()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model on the test data
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mse_list.append(mse)

mean_mse_B = np.mean(mse_list)
std_mse_B = np.std(mse_list)

print(f"Mean of Mean Squared Errors (Step B - With Normalization, 50 epochs): {mean_mse_B}")
print(f"Standard Deviation of Mean Squared Errors (Step B - With Normalization, 50 epochs): {std_mse_B}")
print(f"Difference in Mean Squared Errors between Step A and Step B: {mean_mse_A - mean_mse_B}")


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [17]:
# Step C: With Normalization and Epoch = 100, Compare with Step B

# Initialize an empty list to store the mean squared errors
mse_list = []

# Loop to train and evaluate the model 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    
    # Build the model
    model = regression_model()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=100, verbose=0)
    
    # Evaluate the model on the test data
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mse_list.append(mse)

mean_mse_C = np.mean(mse_list)
std_mse_C = np.std(mse_list)

print(f"Mean of Mean Squared Errors (Step C - With Normalization, 100 epochs): {mean_mse_C}")
print(f"Standard Deviation of Mean Squared Errors (Step C - With Normalization, 100 epochs): {std_mse_C}")
print(f"Difference in Mean Squared Errors between Step B and Step C: {mean_mse_B - mean_mse_C}")


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [18]:
# Step D: Increase Hidden Layers to Three, Each of 10 Nodes and ReLU Activation, Compare with Step B

# Define the regression model with three hidden layers
def regression_model_increased_layers():
    model = Sequential()
    model.add(Input(shape=(predictors_norm.shape[1],)))  # Specify the input shape
    model.add(Dense(10, activation='relu'))  # First hidden layer with 10 nodes and ReLU activation
    model.add(Dense(10, activation='relu'))  # Second hidden layer with 10 nodes and ReLU activation
    model.add(Dense(10, activation='relu'))  # Third hidden layer with 10 nodes and ReLU activation
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mean_squared_error')  # Compile the model
    return model

# Initialize an empty list to store the mean squared errors
mse_list = []

# Loop to train and evaluate the model 50 times
for i in range(50):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    
    # Build the model
    model = regression_model_increased_layers()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model on the test data
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mse_list.append(mse)

mean_mse_D = np.mean(mse_list)
std_mse_D = np.std(mse_list)

print(f"Mean of Mean Squared Errors (Step D - Increased Layers, With Normalization, 50 epochs): {mean_mse_D}")
print(f"Standard Deviation of Mean Squared Errors (Step D - Increased Layers, With Normalization, 50 epochs): {std_mse_D}")
print(f"Difference in Mean Squared Errors between Step B and Step D: {mean_mse_B - mean_mse_D}")


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 208ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[