In [2]:
import pandas as pd

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

import numpy as np

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        water_cement_ratio = X[:, 3] / X[:, 0]     
        return np.c_[X, 
                     water_cement_ratio,
                    ]
    

pipeline = Pipeline([('attribs_adder', CombinedAttributesAdder())])

s_pipeline = Pipeline(
    [('attribs_adder', CombinedAttributesAdder()), ('min_max_scaler', MinMaxScaler())]
) 

#### Load training and testing data

In [4]:
df_train = pd.read_csv('concrete_data_train.csv')
df_test = pd.read_csv('concrete_data_test.csv')

#### Seperate data into features (train and test) and labels(train and test)

In [5]:
features = ['cement', 'BFS', 'FA', 'water', 'SPZ', 'coarse', 'fine', 'age']

X_train = df_train[features]
y_train = df_train.strength

X_test = df_test[features]
y_test = df_test.strength

In [35]:
# Function to save the predictions
def save_prediction(y_true, y_pred, filename: str) -> None:
    values = {'Actual': y_true,
              'Predicted': y_pred,
             }
    df = pd.DataFrame(values)
    df.to_csv(f'{filename}.csv', index=False)
    print("saving completed")

#### Import machine learning algorithms

In [7]:
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn_rvm import EMRVR

In [21]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

#### Model Performance on test dataset

In [24]:
model = XGBRegressor(n_estimators=500)
# model = RandomForestRegressor(n_estimators=500)
# model = EMRVR(kernel='rbf', gamma='scale') 
# model = SVR(kernel='rbf', C=100, epsilon=0.1)

# Do not change any code below unless you know what you are doing

if (type(model) == RandomForestRegressor or type(model) == XGBRegressor):
    print("No Data Normalization")
    model.fit(pipeline.fit_transform(X_train.values), y_train)
    print(f"{str(model)[:10]} --> Training Done")
    predictions = model.predict(pipeline.transform(X_test.values))
    
    print(f'{str(model)[:10]} --> R2={r2_score(y_test, predictions):.2f}')
    print(f'{str(model)[:10]} --> MAE={mean_absolute_error(y_test, predictions):.2f}')
    print(f'{str(model)[:10]} --> MSE={mean_squared_error(y_test, predictions):.2f}')
    print(f'{str(model)[:10]} --> RMSE={mean_squared_error(y_test, predictions, squared=False):.2f}')
    
else:
    print("Data Normalization")
    model.fit(s_pipeline.fit_transform(X_train.values), y_train)
    print(f"{str(model)} --> Training Done")
    predictions = model.predict(s_pipeline.transform(X_test.values))
    
    print(f'{str(model)} --> R2={r2_score(y_test, predictions):.2f}')
    print(f'{str(model)} --> MAE={mean_absolute_error(y_test, predictions):.2f}')
    print(f'{str(model)} --> MSE={mean_squared_error(y_test, predictions):.2f}')
    print(f'{str(model)} --> RMSE={mean_squared_error(y_test, predictions, squared=False):.2f}')
    
print("Done")

No Data Normalization
XGBRegress --> Training Done
XGBRegress --> R2=0.92
XGBRegress --> MAE=2.80
XGBRegress --> MSE=22.06
XGBRegress --> RMSE=4.70
Done


#### Model performance of training dataset

In [46]:
# model = XGBRegressor(n_estimators=500)
# model = RandomForestRegressor(n_estimators=500)
# model = EMRVR(kernel='rbf', gamma='scale') 
model = SVR(kernel='rbf', C=100, epsilon=0.1)

# Do not change any code below unless you know what you are doing

if (type(model) == RandomForestRegressor or type(model) == XGBRegressor):
    print("No Data Normalization")
    model.fit(pipeline.fit_transform(X_train.values), y_train)
    print(f"{str(model)[:10]} --> Training Done")
    predictions = model.predict(pipeline.transform(X_train.values))
    
    print(f'{str(model)[:10]} --> R2={r2_score(y_train, predictions):.2f}')
    print(f'{str(model)[:10]} --> MAE={mean_absolute_error(y_train, predictions):.2f}')
    print(f'{str(model)[:10]} --> MSE={mean_squared_error(y_train, predictions):.2f}')
    print(f'{str(model)[:10]} --> RMSE={mean_squared_error(y_train, predictions, squared=False):.2f}')
    
else:
    print("Data Normalization")
    model.fit(s_pipeline.fit_transform(X_train.values), y_train)
    print(f"{str(model)} --> Training Done")
    predictions = model.predict(s_pipeline.transform(X_train.values))
    
    print(f'{str(model)} --> R2={r2_score(y_train, predictions):.2f}')
    print(f'{str(model)} --> MAE={mean_absolute_error(y_train, predictions):.2f}')
    print(f'{str(model)} --> MSE={mean_squared_error(y_train, predictions):.2f}')
    print(f'{str(model)} --> RMSE={mean_squared_error(y_train, predictions, squared=False):.2f}')
    
print("Done")

Data Normalization
SVR(C=100) --> Training Done
SVR(C=100) --> R2=0.90
SVR(C=100) --> MAE=3.43
SVR(C=100) --> MSE=26.56
SVR(C=100) --> RMSE=5.15
Done


In [47]:
# save_prediction(y_test, predictions, '_predictions_on_test_data')
save_prediction(y_train, predictions, 'PREDICTIONS/svm_predictions_on_train_data')

saving completed
