## Cement Strength

## Data Preparation 

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv(r"concrete_data.csv")
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


In [4]:
X = df[df.columns[:-1]]
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [5]:
Y = df[df.columns[-1]]
Y.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# Part A

## Model Building

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error


In [7]:
#A. Build a baseline model 
def Model():
    model = Sequential(
        [
            Dense(10, activation='relu', input_shape=(8,)),
            Dense(1)
        ]
    )
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [8]:

def Train(model, X,Y, num_epochs, repeitition):
    # 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.
    mse=[]
    for i in range(repeitition):
        # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
        Xtrain, Xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3, random_state=None, shuffle=True)

        #2. Train the model on the training data using 50 epochs
        model.fit(x=Xtrain, y=ytrain,validation_data=(Xtest, ytest), epochs=num_epochs, verbose=2)
        yhat = model.predict(x=Xtest)

        #3. Evaluate the model on the test data and compute the mean squared error 
        mse.append(mean_squared_error(ytest, yhat))
    return mse


In [9]:
mseA = Train(Model(), X,Y, 50, 50)

Epoch 1/50
23/23 - 11s - loss: 7458.0522 - val_loss: 3240.7637
Epoch 2/50
23/23 - 0s - loss: 3253.5354 - val_loss: 2956.2812
Epoch 3/50
23/23 - 0s - loss: 2735.5308 - val_loss: 2613.4001
Epoch 4/50
23/23 - 0s - loss: 2319.6790 - val_loss: 2143.9846
Epoch 5/50
23/23 - 0s - loss: 1986.7427 - val_loss: 1837.4036
Epoch 6/50
23/23 - 0s - loss: 1741.0839 - val_loss: 1593.1726
Epoch 7/50
23/23 - 0s - loss: 1537.5001 - val_loss: 1380.6960
Epoch 8/50
23/23 - 0s - loss: 1363.8853 - val_loss: 1287.7559
Epoch 9/50
23/23 - 0s - loss: 1242.3788 - val_loss: 1099.3206
Epoch 10/50
23/23 - 0s - loss: 1115.0005 - val_loss: 999.5062
Epoch 11/50
23/23 - 0s - loss: 1000.8863 - val_loss: 892.8224
Epoch 12/50
23/23 - 0s - loss: 906.8937 - val_loss: 797.5647
Epoch 13/50
23/23 - 0s - loss: 815.2520 - val_loss: 756.1564
Epoch 14/50
23/23 - 0s - loss: 733.3043 - val_loss: 649.9251
Epoch 15/50
23/23 - 0s - loss: 668.0966 - val_loss: 588.9756
Epoch 16/50
23/23 - 0s - loss: 595.5778 - val_loss: 531.9191
Epoch 17/50


In [10]:
# mse

## 5. Report the mean and the standard deviation of the mean squared errors

In [11]:
print('Mean of mean squared error : ',np.mean(mseA))
print('Standard Deviation of mean squared error : ',np.std(mseA))

Mean of mean squared error :  94.89013340622519
Standard Deviation of mean squared error :  31.29601576093814
