## Cement Strength

## Data Preparation 

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv(r"concrete_data.csv")
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


In [4]:
X = df[df.columns[:-1]]
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [5]:
Y = df[df.columns[-1]]
Y.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# Part A

## Model Building

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error


In [7]:
#A. Build a baseline model 
def Model():
    model = Sequential(
        [
            Dense(10, activation='relu', input_shape=(8,)),
            Dense(1)
        ]
    )
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [8]:

def Train(model, X,Y, num_epochs, repeitition):
    # 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.
    mse=[]
    for i in range(repeitition):
        # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
        Xtrain, Xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3, random_state=None, shuffle=True)

        #2. Train the model on the training data using 50 epochs
        model.fit(x=Xtrain, y=ytrain,validation_data=(Xtest, ytest), epochs=num_epochs, verbose=2)
        yhat = model.predict(x=Xtest)

        #3. Evaluate the model on the test data and compute the mean squared error 
        mse.append(mean_squared_error(ytest, yhat))
    return mse


In [9]:
mseA = Train(Model(), X,Y, 50, 50)

Epoch 1/50
23/23 - 14s - loss: 90340.0078 - val_loss: 51142.4062
Epoch 2/50
23/23 - 0s - loss: 30605.9336 - val_loss: 16053.1875
Epoch 3/50
23/23 - 0s - loss: 9200.4795 - val_loss: 5001.9702
Epoch 4/50
23/23 - 0s - loss: 3189.2549 - val_loss: 2235.8899
Epoch 5/50
23/23 - 0s - loss: 1859.3578 - val_loss: 1774.5911
Epoch 6/50
23/23 - 0s - loss: 1659.8477 - val_loss: 1651.9280
Epoch 7/50
23/23 - 0s - loss: 1567.9879 - val_loss: 1560.3580
Epoch 8/50
23/23 - 0s - loss: 1476.2129 - val_loss: 1466.1188
Epoch 9/50
23/23 - 0s - loss: 1384.0630 - val_loss: 1373.1736
Epoch 10/50
23/23 - 0s - loss: 1293.5936 - val_loss: 1282.6235
Epoch 11/50
23/23 - 0s - loss: 1206.2363 - val_loss: 1193.6061
Epoch 12/50
23/23 - 0s - loss: 1123.1754 - val_loss: 1112.4038
Epoch 13/50
23/23 - 0s - loss: 1046.4912 - val_loss: 1032.1642
Epoch 14/50
23/23 - 0s - loss: 971.7897 - val_loss: 964.6052
Epoch 15/50
23/23 - 0s - loss: 910.1774 - val_loss: 897.7755
Epoch 16/50
23/23 - 0s - loss: 851.2601 - val_loss: 839.8058
Ep

In [44]:
# mse

## 5. Report the mean and the standard deviation of the mean squared errors

In [10]:
print('Mean of mean squared error : ',np.mean(mseA))
print('Standard Deviation of mean squared error : ',np.std(mseA))

Mean of mean squared error :  57.644654561407954
Standard Deviation of mean squared error :  46.499770434296465


# B. Normalize the data

In [6]:
X = df[df.columns[:-1]]
X = (X - X.mean())/X.std()
X

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.551340
3,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
...,...,...,...,...,...,...,...,...
1025,-0.045623,0.487998,0.564271,-0.092126,0.451190,-1.322363,-0.065861,-0.279597
1026,0.392628,-0.856472,0.959602,0.675872,0.702285,-1.993711,0.496651,-0.279597
1027,-1.269472,0.759210,0.850222,0.521336,-0.017520,-1.035561,0.080068,-0.279597
1028,-1.168042,1.307430,-0.846733,-0.279443,0.852942,0.214537,0.191074,-0.279597


In [7]:
Y = df[df.columns[-1]]
Y

0       79.99
1       61.89
2       40.27
3       41.05
4       44.30
        ...  
1025    44.28
1026    31.18
1027    23.70
1028    32.77
1029    32.40
Name: Strength, Length: 1030, dtype: float64

In [49]:
mseB = Train(Model(), X, Y, 50, 50)

Epoch 1/50
23/23 - 15s - loss: 1551.2439 - val_loss: 1534.8044
Epoch 2/50
23/23 - 0s - loss: 1537.1727 - val_loss: 1521.4260
Epoch 3/50
23/23 - 0s - loss: 1522.9814 - val_loss: 1507.9891
Epoch 4/50
23/23 - 0s - loss: 1508.6047 - val_loss: 1494.2241
Epoch 5/50
23/23 - 0s - loss: 1493.8297 - val_loss: 1479.8726
Epoch 6/50
23/23 - 0s - loss: 1478.4071 - val_loss: 1464.8383
Epoch 7/50
23/23 - 0s - loss: 1462.2872 - val_loss: 1449.0741
Epoch 8/50
23/23 - 0s - loss: 1445.2645 - val_loss: 1432.6704
Epoch 9/50
23/23 - 0s - loss: 1427.3400 - val_loss: 1415.4419
Epoch 10/50
23/23 - 0s - loss: 1408.5762 - val_loss: 1396.7203
Epoch 11/50
23/23 - 0s - loss: 1388.3060 - val_loss: 1377.3683
Epoch 12/50
23/23 - 0s - loss: 1366.9607 - val_loss: 1356.4081
Epoch 13/50
23/23 - 0s - loss: 1344.2389 - val_loss: 1334.1398
Epoch 14/50
23/23 - 0s - loss: 1320.2767 - val_loss: 1310.6666
Epoch 15/50
23/23 - 0s - loss: 1294.9388 - val_loss: 1286.2081
Epoch 16/50
23/23 - 0s - loss: 1268.5590 - val_loss: 1260.2941


In [51]:
print('Mean of mean squared error : ',np.mean(mseB))
print('Standard Deviation of mean squared error : ',np.std(mseB))

Mean of mean squared error :  48.45075405918751
Standard Deviation of mean squared error :  53.409659593806325


# C. Increate the number of epochs (5 marks)
## Repeat Part B but use 100 epochs this time for training

In [8]:
mseC = Train(Model(), X, Y, 100, 50)

Epoch 1/100
23/23 - 12s - loss: 1508.6895 - val_loss: 1671.4807
Epoch 2/100
23/23 - 0s - loss: 1494.0970 - val_loss: 1656.4680
Epoch 3/100
23/23 - 0s - loss: 1479.2241 - val_loss: 1641.6396
Epoch 4/100
23/23 - 0s - loss: 1464.3057 - val_loss: 1626.4211
Epoch 5/100
23/23 - 0s - loss: 1448.8046 - val_loss: 1610.6152
Epoch 6/100
23/23 - 0s - loss: 1432.6304 - val_loss: 1594.0925
Epoch 7/100
23/23 - 0s - loss: 1415.7792 - val_loss: 1576.4036
Epoch 8/100
23/23 - 0s - loss: 1397.8807 - val_loss: 1557.8606
Epoch 9/100
23/23 - 0s - loss: 1379.0454 - val_loss: 1537.8762
Epoch 10/100
23/23 - 0s - loss: 1358.8877 - val_loss: 1517.0032
Epoch 11/100
23/23 - 0s - loss: 1337.6842 - val_loss: 1494.3777
Epoch 12/100
23/23 - 0s - loss: 1314.9823 - val_loss: 1470.6342
Epoch 13/100
23/23 - 0s - loss: 1290.6866 - val_loss: 1444.8838
Epoch 14/100
23/23 - 0s - loss: 1265.1886 - val_loss: 1416.7488
Epoch 15/100
23/23 - 0s - loss: 1237.9822 - val_loss: 1387.3474
Epoch 16/100
23/23 - 0s - loss: 1209.1688 - val_

In [9]:
print('Mean of mean squared error : ',np.mean(mseC))
print('Standard Deviation of mean squared error : ',np.std(mseC))

Mean of mean squared error :  41.20931083839643
Standard Deviation of mean squared error :  18.46047766253536


# D. Increase the number of hidden layers (5 marks)

## Repeat part B but use a neural network with the following instead:

## - Three hidden layers, each of 10 nodes and ReLU activation function

In [None]:
def Model3Hidden():
    model = Sequential(
        [
            Dense(10, activation='relu', input_shape=(8,)),
            Dense(10, activation='relu'),
            Dense(10, activation='relu'),
            Dense(1)
        ]
    )
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [None]:
mseD = Train(Model(), X, Y, 50, 50)