# Concrete Compressive Strength Prediction A

## 1. Import important libraries and packages

In [17]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

### Import data

In [2]:
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


## Evaluate the imported data

In [3]:
concrete_data.shape 

(1030, 9)

In [4]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [5]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [6]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] #All the columns except strength
predictors_columns = predictors.columns
predictors = predictors[predictors_columns[predictors_columns != 'Age']] #All the columns except age
target = concrete_data['Strength'] 

In [7]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5


In [8]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# A. Build a baseline model

## 1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.

In [9]:
X_train,X_test,y_train,y_test = train_test_split(predictors,target,test_size = 0.3,random_state=85)

In [10]:
column_nums = X_train.shape[1]

Use the Keras library to build a neural network with the following:

- One hidden layer of 10 nodes, and a ReLU activation function

- Use the adam optimizer and the mean squared error as the loss function.


In [11]:
def model_A():
    model = Sequential()
    model.add(Dense(10,activation='relu',input_shape = (column_nums,)))
    model.add(Dense(1))
    model.compile(optimizer='adam',loss='mean_squared_error')
    return model

## 2. Train the model on the training data using 50 epochs.

In [12]:
model = model_A()
model.fit(X_train,y_train,epochs=50,verbose=0) #Made the logg of the model development silent verbose = 0

<keras.callbacks.callbacks.History at 0x1d0948f6748>

## 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

In [13]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_pred,y_test)
print("The Mean Squared Error between the predicted and actual strength is "+str(mse))

The Mean Squared Error between the predicted and actual strength is 699.1613243839363


## 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [14]:
mselist = [] #created an empty list
for i in range (0,50):
    X_train,X_test,y_train,y_test = train_test_split(predictors,target,test_size = 0.3,random_state=85)
    #Used the function model_A() to prepare the model
    model = model_A()
    print("Started training: "+str(i+1))
    model.fit(X_train,y_train,epochs=50,verbose=0)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_pred,y_test)
    print("Finished training: "+str(i+1)+"\n The MSE is: "+str(mse))
    mselist.append(mse)
print("Done")
print(mselist)

Started training: 1
Finished training: 1
 The MSE is: 192.3738387822652
Started training: 2
Finished training: 2
 The MSE is: 282.8972747068329
Started training: 3
Finished training: 3
 The MSE is: 156.01798998664114
Started training: 4
Finished training: 4
 The MSE is: 929.8183156248818
Started training: 5
Finished training: 5
 The MSE is: 159.9187713579155
Started training: 6
Finished training: 6
 The MSE is: 157.44098872258922
Started training: 7
Finished training: 7
 The MSE is: 243.89767930639286
Started training: 8
Finished training: 8
 The MSE is: 151.72620408763083
Started training: 9
Finished training: 9
 The MSE is: 153.3925284252704
Started training: 10
Finished training: 10
 The MSE is: 178.6513923169541
Started training: 11
Finished training: 11
 The MSE is: 157.27950238629347
Started training: 12
Finished training: 12
 The MSE is: 742.4473495459673
Started training: 13
Finished training: 13
 The MSE is: 176.28258499421065
Started training: 14
Finished training: 14
 The MS

In [15]:
mean_of_mse = np.mean(mselist) #Compute the mean of the 50 mean squared errors
std_of_mse = np.std(mselist) #Compute the standard deviation of the 50 mean squared errors

In [16]:
print("The mean of the 50 mean squared errors is: "+str(mean_of_mse))
print("The standard deviation of the 50 mean squared errors is: "+str(std_of_mse))

The mean of the 50 mean squared errors is: 346.45256374676217
The standard deviation of the 50 mean squared errors is: 290.08208497911727
