In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import os

import keras
from keras.models import Sequential
from keras.layers import Dense

In [6]:
data_dir = 'Data'
input_filename = 'concrete_data.csv'

In [7]:
concrete_data = pd.read_csv(os.path.join(data_dir, input_filename))

In [8]:
concrete_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


In [13]:
def prepare_data(data, target_col, test_size=0.3):
    y = data[target_col].values
    X = data[data.columns.difference([target_col, ])].copy()
    return train_test_split(X, y, test_size=test_size)

def train_model(X, y, epochs=50, num_layers=1):
    n_cols = X.shape[1]
    
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols, ), ))
    for layer in range(1, num_layers):
        model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    model.fit(X, y, epochs=epochs, validation_split=0.3, verbose=0)
    
    return model
    
def evaluate(data, iterations=50, epochs=50, num_layers=1):
    mse_list = []
    for _itr in range(iterations):     
        X_train, X_test, y_train, y_test = prepare_data(data, 'Strength')
        model = train_model(X_train, y_train, epochs, num_layers)
        y_pred = model.predict(X_test, verbose=0)
        mse = mean_squared_error(y_test, y_pred)
        mse_list.append(mse)
        print(f"Iteration {_itr}: {mse}")
    return np.mean(mse_list), np.std(mse_list)

In [14]:
evaluate(concrete_data, epochs=50)

Iteration 0: 280.79297161919715
Iteration 1: 171.00784604484525
Iteration 2: 1021.8426159557142
Iteration 3: 198.66335717424408
Iteration 4: 1353.98138567672
Iteration 5: 764.1236098037368
Iteration 6: 208.03551670765415
Iteration 7: 724.9768912054285
Iteration 8: 320.3248300630025
Iteration 9: 1103.1582573297808
Iteration 10: 237.71947031375205
Iteration 11: 962.6365042393579
Iteration 12: 117.72941476384054
Iteration 13: 162.55993986258818
Iteration 14: 255.41230426103718
Iteration 15: 1346.1045695840446
Iteration 16: 134.26770707391708
Iteration 17: 1023.3455850019726
Iteration 18: 118.20772855455293
Iteration 19: 1615.5210349456015
Iteration 20: 3640.4527395153123
Iteration 21: 109.80436959685896
Iteration 22: 105.71011828849903
Iteration 23: 135.8646502053765
Iteration 24: 134.03689064486943
Iteration 25: 137.26339240628232
Iteration 26: 759.2579108717401
Iteration 27: 229.17775893177722
Iteration 28: 1331.0134879653085
Iteration 29: 505.6737244104162
Iteration 30: 126.84498496197

(727.765693148059, 1124.3040020031433)