In [None]:
import pandas as pd
import numpy as np

# **Download, explore & clean the dataset**
Now, let's start by downloading our dataset and explore it a bit so we know with what we are working with.

In [None]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Okay, so we have a dataset with 9 features: Cement, Blast Furnace Slag, Fly Ash, Water, Superplasticizer, Coarse Aggregate, Fine Aggregate, Age, Strength. We want to predict the attribute Strength.

Let's check how many data points we have

In [None]:
concrete_data.shape # we have 1030 samples

(1030, 9)

Let's check the dataset for any missing values

In [None]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [None]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

The data looks clean, so we are ready to be build our model.

# **Split the data into predictors and target**

In [None]:
predictors = concrete_data.drop(['Strength'], axis=1) # remove the Strength column from the dataFrame
target = concrete_data['Strength'] # Strength column

Quick sanity check of our predictors and target:

In [None]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [None]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


Finally, the last step is to normalize the data by substracting the mean and dividing by the standard deviation.


In [None]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
n_cols = predictors_norm.shape[1] # number of predictors
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


# **Build our model with Keras**

In [None]:
import keras # import keras

In [None]:
# import the rest of the packages
from keras.models import Sequential
from keras.layers import Dense

We define a function the defines our regression model

In [None]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1))

    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

The above model has two hidden layers each with 50 hidden nodes.


# **Train and Test the Network**


In [None]:
model = regression_model()

Next, we will train and test the model at the same time using the *fit* method. We will leave out 20% of the data for validation and we will train the model for 100 epochs.


In [None]:
model.fit(predictors_norm, target, validation_split=0.2, epochs=100, verbose=2)

Epoch 1/100
26/26 - 1s - loss: 1571.3706 - val_loss: 1144.1097 - 1s/epoch - 54ms/step
Epoch 2/100
26/26 - 0s - loss: 1408.4474 - val_loss: 1009.3365 - 90ms/epoch - 3ms/step
Epoch 3/100
26/26 - 0s - loss: 1123.6533 - val_loss: 795.1569 - 79ms/epoch - 3ms/step
Epoch 4/100
26/26 - 0s - loss: 738.6453 - val_loss: 543.2873 - 98ms/epoch - 4ms/step
Epoch 5/100
26/26 - 0s - loss: 422.3061 - val_loss: 355.2255 - 96ms/epoch - 4ms/step
Epoch 6/100
26/26 - 0s - loss: 280.4122 - val_loss: 267.3870 - 91ms/epoch - 3ms/step
Epoch 7/100
26/26 - 0s - loss: 237.1936 - val_loss: 239.3581 - 91ms/epoch - 4ms/step
Epoch 8/100
26/26 - 0s - loss: 216.7279 - val_loss: 212.7437 - 100ms/epoch - 4ms/step
Epoch 9/100
26/26 - 0s - loss: 203.1832 - val_loss: 206.5611 - 83ms/epoch - 3ms/step
Epoch 10/100
26/26 - 0s - loss: 192.5727 - val_loss: 197.6523 - 95ms/epoch - 4ms/step
Epoch 11/100
26/26 - 0s - loss: 184.1422 - val_loss: 186.6663 - 80ms/epoch - 3ms/step
Epoch 12/100
26/26 - 0s - loss: 177.0787 - val_loss: 181.7

<keras.src.callbacks.History at 0x789ba1b0a620>

And.. that's it! This is how we can create a regression model with keras, we can now make predictions:

In [None]:
means = np.array([2.476712, -0.856472, -0.846733, -0.916319, -0.620147, 0.862735, -1.217079, -0.279597])
std_devs = np.array([0.797405, 0.418344, 0.450949, 0.584833, 0.586064, 0.475220, 0.154757, 0.110175])

# Create an input example
input_example = np.array([2.5, -0.9, -0.8, -0.9, -0.6, 0.9, -1.2, -0.2])

# Normalize the input using the same mean and standard deviation
normalized_input = (input_example - means) / std_devs

# Reshape the input to match the model's expected input shape
normalized_input = normalized_input.reshape(1, -1)

# Assuming 'model' is your trained Sequential model
predictions = model.predict(normalized_input)

print(predictions)

[[39.816776]]
