## Build a baseline model

In [39]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

In [40]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [42]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [43]:
n_cols = predictors.shape[1] 
n_cols

8

In [44]:
def regression_model():
    model = Sequential()
    model.add(Dense(10,activation = 'relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [45]:
#Split the data into train and test
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

In [46]:
model = regression_model()
#We will leave 30% of the data for testing/validating the model
model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 1s - loss: 228732.4432 - val_loss: 194052.1321
Epoch 2/50
 - 0s - loss: 174348.3509 - val_loss: 146993.6791
Epoch 3/50
 - 0s - loss: 132704.5093 - val_loss: 112023.5920
Epoch 4/50
 - 0s - loss: 101820.8957 - val_loss: 86642.5805
Epoch 5/50
 - 0s - loss: 79486.0604 - val_loss: 68783.5356
Epoch 6/50
 - 0s - loss: 63785.2545 - val_loss: 55981.2504
Epoch 7/50
 - 0s - loss: 52199.9226 - val_loss: 46267.0419
Epoch 8/50
 - 0s - loss: 43275.4585 - val_loss: 38612.7167
Epoch 9/50
 - 0s - loss: 36251.5486 - val_loss: 32469.6404
Epoch 10/50
 - 0s - loss: 30612.0302 - val_loss: 27479.2232
Epoch 11/50
 - 0s - loss: 26028.1588 - val_loss: 23411.0118
Epoch 12/50
 - 0s - loss: 22263.2108 - val_loss: 20066.8093
Epoch 13/50
 - 0s - loss: 19166.0919 - val_loss: 17229.9675
Epoch 14/50
 - 0s - loss: 16518.8987 - val_loss: 14861.1034
Epoch 15/50
 - 0s - loss: 14301.6049 - val_loss: 12821.3284
Epoch 16/50
 - 0s - loss: 12396.7861 - val_loss: 11078.3

<keras.callbacks.History at 0x7f079dfe9dd8>

In [47]:
#Testing
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



1580.7913936874243

In [48]:
from sklearn.metrics import mean_squared_error

In [49]:
mse = mean_squared_error(y_test, y_pred)
standard_deviation = np.std(mse)
print(mean, standard_deviation)

95.72059631796559 0.0


In [51]:
total_mean_squared_errors = 50
mean_squared_errors = []
for i in range(0, total_mean_squared_errors):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=i)
    model.fit(X_train, y_train, epochs=50, verbose=0)
    MSE = model.evaluate(X_test, y_test, verbose=0)
    print(MSE)
    y_red = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)



print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

1426.1301222125303
1413.1900737478509
1285.6760222302285
1262.3520808050162
1153.8307532646895
1062.118849473478
1201.7432399120146
954.846392881523
954.6859695780239
869.925111838529
829.1490644436439
710.8386637369791
710.4904055302197
716.2587422491277
673.0179427557392
552.0299583854799
513.4922967312019
542.0222835602498
500.5449981195641
488.72305979312046
456.07096709711266
448.38659707473704
415.0862922545004
388.288591749074
321.58557168411204
326.20409850086594
361.16546265438535
299.8148002747965
351.01572395991354
287.0335517562323
299.954539277407
267.4262282979527
245.4713610800339
291.5341069983819
282.9048122171445
286.8036181008546
287.40519669912396
258.1386909855222
288.1498588882989
265.12130668171017
260.3650111720014
249.7412354305724
271.32108156272125
275.35073368603355
272.28764901269216
274.5533093202461
286.4371301842353
278.9901219834016
263.43150216013095
273.7856963815041
Mean: 1666.6970998123063
Standard Deviation: 54.35568647261169
