#  Regression_Exo

In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np



## Data Preparation

### collect data and null-check

In [2]:
data = pd.read_csv('concrete_data.csv')
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
data.shape

(1030, 9)

In [4]:
# check missing values
data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


### Split data into predictors and target

In [6]:
data_columns = data.columns

predictors = data[data_columns[data_columns != 'Strength']] # all columns except Strength as predictor
target = data['Strength'] # Strength column as target 

In [7]:
n_cols = predictors.shape[1]

## A  Build baseline Model

Build regression model

In [8]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [9]:
def regression_model_D():
    # create model
    model = Sequential()
    model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

Train the model

In [10]:
def collectRegrassionEvaluation(predictors_data, nbr_epochs, model_choice= 0):
    mse_list = np.array([])
    for i in range(50):
        # 1 Randomly split the data into a training and test sets by holding 30% 
        X_train, X_test, y_train, y_test = train_test_split(predictors_data, target, test_size=0.3)
        # 2 Train the model on the training data using 50 epochs.
        model =  regression_model() if (model_choice == 0) else regression_model_D()
        # fit the model
        model.fit(X_train, y_train, validation_split=0.3, epochs=nbr_epochs, verbose=2)
        # 3 Evaluate the model 
        mse = model.evaluate(X_test, y_test)
        mse_list = np.append(mse_list, mse)
    return mse_list
    

    
    

In [None]:
mse_list = collectRegrassionEvaluation(predictors, 50)

mean and the standard deviation of the mean squared errors.

In [12]:
std_mse_list = np.std( mse_list)
mean_mse_list = np.mean(mse_list)

In [13]:
print('Standard deviation: {} \n mean: {}'.format(std_mse_list, mean_mse_list)) 

Standard deviation: 207.0954618334072 
 mean: 114.40502899169923


## B With Normalize the data 

normalise data

In [14]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [None]:
mse_list = collectRegrassionEvaluation(predictors_norm, 50)


In [16]:
std_mse_list = np.std( mse_list)
mean_mse_list = np.mean(mse_list)

In [17]:
print('Standard deviation: {} \n mean: {}'.format(std_mse_list, mean_mse_list)) 

Standard deviation: 10.780085569945596 
 mean: 142.33671829223633


Compare to Step A, mean by B ist bigger and standard deviation is smaller.

## C Increase the number of epochs

In [None]:
mse_list = collectRegrassionEvaluation(predictors_norm, 100)

In [20]:
std_mse_list = np.std( mse_list)
mean_mse_list = np.mean(mse_list)
print('Standard deviation: {} \n mean: {}'.format(std_mse_list, mean_mse_list)) 

Standard deviation: 13.22893949113685 
 mean: 88.07152618408203


Compare to Step B, mean of MSE by C ist very smaller.

## D Increase the number of hidden layers

In [None]:
# def regression_model_D():
#     # create model
#     model = Sequential()
#     model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
#     model.add(Dense(10, activation='relu'))
#     model.add(Dense(10, activation='relu'))
#     model.add(Dense(10, activation='relu'))
#     model.add(Dense(1))
    
#     # compile model
#     model.compile(optimizer='adam', loss='mean_squared_error')
#     return model

In [None]:
mse_list = collectRegrassionEvaluation(predictors_norm, 50, 1)

In [22]:
std_mse_list = np.std( mse_list)
mean_mse_list = np.mean(mse_list)
print('Standard deviation: {} \n mean: {}'.format(std_mse_list, mean_mse_list)) 

Standard deviation: 22.936867304072514 
 mean: 98.04634857177734


Compare to Step B, mean of MSE by D ist very smaller