# Deep Learning - Basic Analysis of Cement Strength

In [1]:
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

### Import dataset

In [2]:
df = pd.read_csv('https://cocl.us/concrete_data')
df

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,32.77


In [3]:
df.isnull().any()

Cement                False
Blast Furnace Slag    False
Fly Ash               False
Water                 False
Superplasticizer      False
Coarse Aggregate      False
Fine Aggregate        False
Age                   False
Strength              False
dtype: bool

### Split data into training and test

In [4]:
X = df.loc[:, df.columns != 'Strength']
y = df[['Strength']]
print(X.shape)
print(y.shape)

(1030, 8)
(1030, 1)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

### Define model network and construct model

In [6]:
def regression_model(params):
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(params,)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

In [7]:
model = regression_model(X_train.shape[1])

### Train model and evaluate performance

In [8]:
mse_list = []
for i in range(50):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    mse_list.append(scores[1])
print('Complete')

Complete


In [9]:
print(np.mean(mse_list))
print(np.std(mse_list))

95.05189239501954
20.28482634559286


Not great

### Try normalization

In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
X_norm = (X - X.mean()) / X.std()
X_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [12]:
scaler = StandardScaler()
X_norm = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
X_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.477915,-0.856888,-0.847144,-0.916764,-0.620448,0.863154,-1.21767,-0.279733
1,2.477915,-0.856888,-0.847144,-0.916764,-0.620448,1.056164,-1.21767,-0.279733
2,0.491425,0.795526,-0.847144,2.175461,-1.039143,-0.526517,-2.240917,3.553066
3,0.491425,0.795526,-0.847144,2.175461,-1.039143,-0.526517,-2.240917,5.057677
4,-0.790459,0.678408,-0.847144,0.488793,-1.039143,0.070527,0.647884,4.978487


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3, random_state=0)

Model does not need to be redefined as number of parameters have not changed

In [14]:
mse_list = []
for i in range(50):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    mse_list.append(scores[1])
print('Complete')

Complete


In [15]:
print(np.mean(mse_list))
print(np.std(mse_list))

46.35354019165039
24.26184657903627


MSE values are much better than before normalization

### Try increasing epochs

In [16]:
mse_list = []
for i in range(50):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    mse_list.append(scores[1])
print('Complete')

Complete


In [18]:
print(np.mean(mse_list))
print(np.std(mse_list))

35.91215553283691
0.549244180698564


MSE values are slightly better, but notably greatly reduced variance

### Try increasing layers

In [20]:
def regression_model(params):
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(params,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

In [21]:
model = regression_model(X_train.shape[1])

In [22]:
mse_list = []
for i in range(50):
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=0)
    scores = model.evaluate(X_test, y_test, verbose=0)
    mse_list.append(scores[1])
print('Complete')

Complete


In [23]:
print(np.mean(mse_list))
print(np.std(mse_list))

35.635666580200194
9.280285257208115


Small improvement with increasing layers, but increased variance