In [1]:
# when to use neural network to build a model instead of simply using sklear?
# For complex or non-linear problems where a simple logistic regression or linear model cannot capture the relationship between the inputs and output.
# When the dataset has high dimensionality or requires feature engineering and transformations.
# When you need deep learning to learn representations of the data (e.g., using multiple layers, non-linear activations).

In [2]:
# Why it's more complex:
# Neural networks allow you to customize the model architecture by stacking layers, adding activation functions, and choosing different optimization techniques.
# This flexibility requires more lines of code, as you're designing the model from scratch.

In [3]:
from urllib.request import urlretrieve
import pandas as pd

url = 'https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'
out_file = 'concrete_data.csv'

urlretrieve(url, out_file)

df = pd.read_csv(out_file)
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
# check number rows and columns
df.shape

(1030, 9)

In [5]:
# check if there are missing values
df.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [6]:
predictors = df.drop('Strength', axis=1)
target = df['Strength']

n_cols = predictors.shape[1]  # number of predictors (columns)

# we should normalize the predictors dataframe when building a regression model with Keras
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [7]:
# build the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(50, activation='relu', input_shape=(n_cols,)))  # input layer
# 50: number of neurons in the layer
# we can also use Rule of Thumb for number of neurons: Start with 2n or 3n, where n is the number of input features, and adjust it untill get the best validation performance
# relu: most commonlly used activation function
# (n_cols,): The comma is required to indicate that this is a tuple with a single element (a one-dimensional input).  # without the comma, input_shape=(n_cols) would be interpreted as an integer rather than a tuple.
model.add(Dense(50, activation='relu'))  # hidden layer (adjust the number of the hidden layers to get the best loss?)
model.add(Dense(1))  # output layer

 # last step to build the model. To specify the optimizer, loss function for training the model.
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# train the model (no need to train_test_split beforehand anymore)
reg_model = model.fit(predictors_norm, target, validation_split=0.3, epochs=100, verbose=2)
# validation_split=0.3: 0.7 for training, 0.3 for validation

#epochs=100: the model will go through the entire training dataset 100 times for better training.
# adjust the number to get the best loss?
 
# Verbose controls how much detail you see while the model trains.
# verbose=0: No output (silent).
# verbose=1: Shows a progress bar for each epoch.
# verbose=2: Shows a simple line for each epoch (easier to read).

Epoch 1/100
23/23 - 1s - 52ms/step - loss: 1619.6010 - val_loss: 1142.6183
Epoch 2/100
23/23 - 0s - 4ms/step - loss: 1480.5070 - val_loss: 1027.0006
Epoch 3/100
23/23 - 0s - 4ms/step - loss: 1251.8744 - val_loss: 840.0289
Epoch 4/100
23/23 - 0s - 6ms/step - loss: 916.4598 - val_loss: 601.3319
Epoch 5/100
23/23 - 0s - 4ms/step - loss: 564.5486 - val_loss: 380.0210
Epoch 6/100
23/23 - 0s - 3ms/step - loss: 326.8695 - val_loss: 249.9112
Epoch 7/100
23/23 - 0s - 3ms/step - loss: 253.3609 - val_loss: 200.9149
Epoch 8/100
23/23 - 0s - 3ms/step - loss: 234.2119 - val_loss: 184.7339
Epoch 9/100
23/23 - 0s - 3ms/step - loss: 219.8742 - val_loss: 179.4827
Epoch 10/100
23/23 - 0s - 4ms/step - loss: 210.1443 - val_loss: 172.9318
Epoch 11/100
23/23 - 0s - 3ms/step - loss: 201.1124 - val_loss: 165.0484
Epoch 12/100
23/23 - 0s - 4ms/step - loss: 193.8352 - val_loss: 161.9014
Epoch 13/100
23/23 - 0s - 3ms/step - loss: 187.5954 - val_loss: 158.7616
Epoch 14/100
23/23 - 0s - 4ms/step - loss: 182.1974 - 

In [9]:
# loss: loss of train set
# val_loss: loss of validation set

# to get the mean and std of all of the losses:
import numpy as np
print(np.mean(reg_model.history['loss']))
print(np.mean(reg_model.history['val_loss']))
print(np.std(reg_model.history['loss']))
print(np.std(reg_model.history['val_loss']))

153.952935295105
163.60066230773927
254.57753597001363
159.74971668724226
