In [97]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score, mean_absolute_percentage_error, mean_squared_log_error
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd

df_raw = pd.read_csv('data/Concrete_Data.csv')
df_raw.columns



Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

In [98]:
# Select all independent variables.
X = df_raw.iloc[:, :-1]
# Select only the target variable.
y = df_raw.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                   test_size= 0.2,
                                                   shuffle= True, # Shuffle the data to avoid bias
                                                   random_state= 50)

# Multiple Linear Regression

In [99]:
mlr_model = LinearRegression()
mlr_model.fit(X_train, y_train)

LinearRegression()

In [100]:
mlr_y_predicted =  mlr_model.predict(X_test)

# Mean Absolute Error
mlr_metric_mae = mean_absolute_error(y_test, mlr_y_predicted)
print(f'Mean Absolute Error {mlr_metric_mae}')

# Mean Absolute Percentage Error
mlr_metric_mape = mean_absolute_percentage_error(y_test, mlr_y_predicted)
print(f'Mean Absolute Percentage Error {mlr_metric_mape}')

# Mean Squared Error
mlr_metric_mse = mean_squared_error(y_test, mlr_y_predicted)
print(f'Mean Squared Error {mlr_metric_mse}')

# Mean Squared Log Error
mlr_metric_msle = mean_squared_log_error(y_test, mlr_y_predicted)
print(f'Mean Squared Log Error {mlr_metric_msle}')

# Root Mean Squared Error
mlr_metric_rmse = np.sqrt(mean_squared_error(y_test, mlr_y_predicted))
print(f'Root Mean Squared Error {mlr_metric_rmse}')

# Explained Variance Score
mlr_metric_evs = explained_variance_score(y_test, mlr_y_predicted)
print(f'Explained Variance Score {mlr_metric_evs}')

# R2 score
mlr_metric_r2 = r2_score(y_test, mlr_y_predicted)
print(f'R Squared Score {mlr_metric_r2}')


Mean Absolute Error 7.9100408151230575
Mean Absolute Percentage Error 0.3338653542629698
Mean Squared Error 98.115811061572
Mean Squared Log Error 0.12016603228350434
Root Mean Squared Error 9.905342551450303
Explained Variance Score 0.6360416556439641
R Squared Score 0.6359646484072214


In [101]:
print(mlr_model.intercept_)
print(mlr_model.coef_)

-8.588336120681141
[ 0.11435239  0.09812924  0.07706773 -0.16950172  0.31739452  0.01450578
  0.01343895  0.11158763]


# Regression Tree (Decision Tree Regression)

In [102]:
regression_model = DecisionTreeRegressor(criterion="squared_error", min_samples_leaf=3, max_depth=10)
regression_model.fit(X_train, y_train)

DecisionTreeRegressor(max_depth=10, min_samples_leaf=3)

In [103]:
rt_y_predicted = regression_model.predict(X_test)

# Mean Absolute Error
rt_metric_mae = mean_absolute_error(y_test, rt_y_predicted)
print(f'Mean Absolute Error {rt_metric_mae}')

# Mean Absolute Percentage Error
rt_metric_mape = mean_absolute_percentage_error(y_test, rt_y_predicted)
print(f'Mean Absolute Percentage Error {rt_metric_mape}')

# Mean Squared Error
rt_metric_mse = mean_squared_error(y_test, rt_y_predicted)
print(f'Mean Squared Error {rt_metric_mse}')

# Mean Squared Log Error
rt_metric_msle = mean_squared_log_error(y_test, rt_y_predicted)
print(f'Mean Squared Log Error {rt_metric_msle}')

# Root Mean Squared Error
rt_metric_rmse = np.sqrt(mean_squared_error(y_test, rt_y_predicted))
print(f'Root Mean Squared Error {rt_metric_rmse}')

# Explained Variance Score
rt_metric_evs = explained_variance_score(y_test, rt_y_predicted)
print(f'Explained Variance Score {rt_metric_evs}')

# R2 score
rt_metric_r2 = r2_score(y_test, rt_y_predicted)
print(f'R Squared Score {rt_metric_r2}')


Mean Absolute Error 5.144296760581774
Mean Absolute Percentage Error 0.1887846259289491
Mean Squared Error 46.72217407859316
Mean Squared Log Error 0.05444350502881591
Root Mean Squared Error 6.83536202981182
Explained Variance Score 0.8272173269313471
R Squared Score 0.8266484995246478


In [104]:
results = { 'Multiple Linear Regression': [ mlr_metric_mae, mlr_metric_mape, mlr_metric_mse, mlr_metric_msle, mlr_metric_rmse, mlr_metric_evs, mlr_metric_r2],
            'Regression Trees': [ rt_metric_mae,  rt_metric_mape, rt_metric_mse, rt_metric_msle, rt_metric_rmse, rt_metric_evs, rt_metric_r2 ]}

results_df = pd.DataFrame(results).set_axis(['Mean Absolute Error', 'Mean Absolute Percentage Error', 'Mean Squared Error',  'Mean Squared Log Error' , 'Root Mean Squared Error', 'Explained Variance Score', 'R Squared Score'])

print(results_df)

                                Multiple Linear Regression  Regression Trees
Mean Absolute Error                               7.910041          5.144297
Mean Absolute Percentage Error                    0.333865          0.188785
Mean Squared Error                               98.115811         46.722174
Mean Squared Log Error                            0.120166          0.054444
Root Mean Squared Error                           9.905343          6.835362
Explained Variance Score                          0.636042          0.827217
R Squared Score                                   0.635965          0.826648
