In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Data Preprocessing
### Even though not explicitly required by the assignment, this step is a basic step necessary to ensure proper functioning of the models

In [67]:
# GET THE DATASET
df = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [68]:
# DATA PRE-PROCESSING

# Get the target and predictors
columns = df.columns
predictors = df[columns[:-1]]
target = df[columns[-1]]

# PART B
### Normalising the predictors!

### Mean Squared Error: 355.8860413203666

In [69]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
predictors = scaler.fit_transform(predictors)

In [70]:
# How many output nodes?
n_cols = predictors.shape[1]

# Part A
### Building a baseline model (5 Marks)

### Mean Squared Error: 588.1540352816005

In [77]:
import keras
from keras.api.models import Sequential
from keras.api.layers import Dense

def basic_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [78]:
# PREPARING TO TRAIN 50 TIMES!

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

mse_list = []

In [79]:
for i in range(50):
    print(i)
    # split 50 times
    x_train, x_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)
    
    # build model 50 times
    model = basic_model()
    model.fit(x_train, y_train, validation_split=0.3, epochs=100, verbose=0)
    
    # evaluate 50 times
    y_pred = model.predict(x_test)
    
    # calculate mse 50 times
    model_mse = mean_squared_error(y_test, y_pred)
    mse_list.append(model_mse)


0
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
1
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
2
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
3
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
4
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
6
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
7
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
9
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
11
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
12
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
13
[1m10

In [80]:
mse_mean = np.mean(mse_list)
mse_std_dev = np.std(mse_list)

print(f'Mean MSE: {mse_mean} and Standard Deviation: {mse_std_dev}')

Mean MSE: 107.08879544933001 and Standard Deviation: 9.947951369691012


# PART C
### Working with 100 epochs

### Mean Squared Error: 290.3209633953763

In [None]:
for i in range(50):
    print(i)
    # split 50 times
    x_train, x_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)
    
    # build model 50 times
    model = basic_model()
    model.fit(x_train, y_train, validation_split=0.3, epochs=100, verbose=0)
    
    # evaluate 50 times
    y_pred = model.predict(x_test)
    
    # calculate mse 50 times
    model_mse = mean_squared_error(y_test, y_pred)
    mse_list.append(model_mse)


# PART D
### Working with 3 hidden layers

### Mean Squared Error: 107.08879544933001

In [None]:
import keras
from keras.api.models import Sequential
from keras.api.layers import Dense

def new_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# CONCLUSION

### Clearly from Part A, to Part B, to Part C, to Part D, the mean squared error kept DECREASING, creating the most optimal model at Part D