# Import

In [3]:
# Imports
import numpy as np
import pandas as pd

import tensorflow as tf
# from tensorflow.keras.utils import split_dataset
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Normalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# from tensorflow.keras.losses import CategoricalCrossentropy
# from tensorflow.keras.optimizers import Adam
import xgboost as xgb

from sklearn.svm import SVR
# from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.linear_model import HuberRegressor
from skrvm import RVR



In [10]:
# Pd_Display
def pdisplay(df):
    display(pd.DataFrame(df))

# Import Data

In [4]:
data_path = "../data/"
eps_total_effective_path =  data_path + "eps_total_effective/"

## Train `eps_total_effective`

In [5]:
# load csv file
X = pd.read_csv(eps_total_effective_path + "X.csv")
y_eps = pd.read_csv(eps_total_effective_path + "y_eps_total_effective.csv")

In [6]:
# Show X
X.head()

Unnamed: 0,0.42408,0.02653,0.53492,0.4036,0.29359,0.30995,0.03648,0.43068,0.2183,0.09941,...,0.0.94,0.0.95,0.0.96,0.0.97,0.0.98,0.0.99,0.0.100,0.0.101,0.0.102,37.21017
0,0.46773,0.03565,0.46849,0.30076,0.2213,0.4546,-0.20362,0.38685,0.29387,0.01156,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.50477
1,0.39218,0.01064,0.49112,0.2798,0.1686,0.35325,0.03549,0.43068,0.17308,0.16276,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.26854
2,0.45832,0.00521,0.62223,0.43571,0.10061,0.31507,0.05092,0.5,0.3642,0.00619,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.21506
3,0.63705,0.00245,0.32378,0.20314,0.02169,0.0,0.59293,0.46534,0.23639,0.0005,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.18492
4,0.39218,0.01773,0.53827,0.47093,0.1686,0.15436,0.05573,0.43068,0.18903,0.00206,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.61617


In [7]:
# Show y_eps
y_eps.head()

Unnamed: 0,12.911986587385947
0,9.129604
1,13.638275
2,8.520153
3,7.111387
4,9.719387


- Information of **X** data

In [15]:
# Data size
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1227 entries, 0 to 1226
Columns: 197 entries, 0.42408 to 37.21017
dtypes: float64(197)
memory usage: 1.8 MB


- Information of **y_eps** data

In [16]:
y_eps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1227 entries, 0 to 1226
Data columns (total 1 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   12.911986587385947  1227 non-null   float64
dtypes: float64(1)
memory usage: 9.7 KB


- converting to **numpy array**

In [17]:
X_np = X.to_numpy()
X_np.shape

(1227, 197)

In [18]:
y_eps_np = y_eps.to_numpy()
y_eps_np.shape

(1227, 1)

# Train & Test Data

- Train (80%)
- Test (20%)

In [19]:
# Split Data Set into train (80%) and test (20%)
x_train , x_test , y_eps_train , y_eps_test = train_test_split(
    X_np , y_eps_np , 
    train_size=0.8 , 
    random_state=118
    )

In [20]:
print(f"X_train Size : {x_train.shape}")
print(f"X_test  Size : {x_test.shape}")
print(f"Y_train Size : {y_eps_train.shape}")
print(f"Y_test  Size : {y_eps_test.shape}")

X_train Size : (981, 197)
X_test  Size : (246, 197)
Y_train Size : (981, 1)
Y_test  Size : (246, 1)


## Normalizing **X** data (`X_np`)

In [21]:
# Define Normalization Function
norm_func = Normalization( axis = -1 )
norm_func.adapt(X_np)

- Custom Function to calculate error. ( **RMSE** , **MARE** , **R2** )

In [29]:
# Error Function
def M_error(Model, x_test , y_test , norm_func = None ):
    if norm_func is None:
        norm_func = lambda x : x    # noqa: E731
    
    Model.RMSE = root_mean_squared_error( y_test , Model.predict( norm_func(x_test) ) )
    Model.MARE = mean_absolute_percentage_error( y_test , Model.predict( norm_func(x_test) ) ) * 100 # in percentage (%)
    Model.R2 = r2_score( y_test , Model.predict( norm_func(x_test) ) )
    
    return Model

# Display Error of model
def error_disp(model):
    print('Model Error:')
    print(' RMSE =', model.RMSE)
    print(' MARE =', model.MARE)
    print(' R2   =', model.R2)



# Define Neural Network

In [23]:
def NN_Model_func(x , y , norm_func = None , Epochs = 200):
    if norm_func is None:
        norm_func = lambda x : x  # noqa: E731
    
    # Model Structure 
    NN_model = Sequential(
        [
            Dense( units = 15 , activation = 'sigmoid' , name = 'L1'),
            Dense( units = 1 , activation = 'linear' , name = 'L2')
        ]
    )
    
    # Model Compilation
    NN_model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Model Training
    NN_model.fit( 
             x = norm_func(x),
             y = y,
             epochs = Epochs
             )
    
    return NN_model

## Training the Neural Network Model

In [24]:
# Train NN Model and test them
NN_model = NN_Model_func(x_train , y_eps_train , norm_func = norm_func , Epochs = 200)
NN_model.summary()

Epoch 1/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 272.5566
Epoch 2/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 259.7606
Epoch 3/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 240.5562
Epoch 4/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 209.5329
Epoch 5/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 222.4416
Epoch 6/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 220.2792
Epoch 7/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 222.4894
Epoch 8/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 202.7256
Epoch 9/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 217.8925
Epoch 10/200
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/s

## Calculating Error of the Model

In [25]:
# Error Calculation
NN_model = M_error(NN_model , x_test , y_eps_test , norm_func = norm_func)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step  
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [30]:
error_disp(NN_model)

Model Error:
 RMSE = 4.653062330342252
 MARE = 25.934679699944475
 R2   = 0.6770650490479062


# Define Random Forest

In [31]:
def RF_Model_func(x , y , norm_func = None , Epochs = 200):
    if norm_func is None:
        norm_func = lambda x : x    # noqa: E731
    
    # Initialize the Random Forest Regressor with maximum 500 trees
    RF_model =  RandomForestRegressor(n_estimators = 500, random_state=118)
    
    # Train the model
    RF_model.fit(norm_func(x), y )
    
    return RF_model

## Training the Random Forest Model

In [32]:
RF_model = RF_Model_func(x_train , y_eps_train , norm_func = norm_func , Epochs = 200)
RF_model

  return fit_method(estimator, *args, **kwargs)


## Calculating Error of the Model

In [33]:
# Error Calculation
RF_model = M_error(RF_model , x_test , y_eps_test , norm_func = norm_func)

In [34]:
error_disp(RF_model)

Model Error:
 RMSE = 4.232145694012576
 MARE = 23.380425181000696
 R2   = 0.732847937937128


# Define XGBoost

In [35]:
def XGB_Model_func(x , y , norm_func = None , n_estimators = 400):
    if norm_func is None:
        norm_func = lambda x : x    # noqa: E731
    
    # Define the XGBoost model with a linear objective function
    xgb_model = xgb.XGBRegressor(
        objective='reg:linear',  # Linear objective function
        # max_depth=6,
        # learning_rate=0.1,
        # n_estimators=n_estimators,
        # n_jobs=-1
    )
    
    # Train the model on the training data
    xgb_model.fit(norm_func(x_train), y_eps_train)
    
    return xgb_model

## Training the XGBoost Model

In [36]:
XGB_model = XGB_Model_func(x_train , y_eps_train , norm_func = norm_func )
XGB_model



## Calculating Error of the Model

In [37]:
# Error Calculation
XGB_model = M_error(XGB_model , x_test , y_eps_test , norm_func = norm_func)

In [38]:
error_disp(XGB_model)

Model Error:
 RMSE = 4.810284483269455
 MARE = 23.331190794440136
 R2   = 0.6548730816385939


# Define SVM Model

In [39]:
def SVM_Model_func(x_train , y_eps_train , norm_func = None ):
    if norm_func is None:
        norm_func = lambda x : x    # noqa: E731
    
    # Define the SVM model with RBF kernel, C=1, and epsilon=0.1
    svm_model = SVR(
        kernel='rbf',  # Radial basis function kernel
        C=1,  # Regularization parameter
        epsilon=0.1,  # Epsilon value for the RBF kernel
        # gamma='auto'  # Gamma value for the RBF kernel
    )
    
    # Train the model on the training data
    svm_model.fit(norm_func(x_train), y_eps_train )
    
    return svm_model

## Training the SVM Model

In [40]:
SVM_model = SVM_Model_func(x_train , y_eps_train , norm_func = norm_func )
SVM_model

  y = column_or_1d(y, warn=True)


## Calculating Error of the Model

In [41]:
# Error Calculation
SVM_model = M_error(SVM_model , x_test , y_eps_test , norm_func = norm_func)

In [42]:
error_disp(SVM_model)

Model Error:
 RMSE = 6.509828014005609
 MARE = 26.576768428658454
 R2   = 0.36791375404594084


# Define RVM Model

In [44]:
def RVM_Model_func(x_train , y_eps_train , norm_func = None ):
    if norm_func is None:
        norm_func = lambda x : x    # noqa: E731
    
    # Define the RVM model with RBF kernel, C=1, and epsilon=0.1
    rvm_model = RVR(kernel='rbf')
    
    # Train the model on the training data
    rvm_model.fit(norm_func(x_train), y_eps_train )
    
    return rvm_model

## Training the RVM Model

In [45]:
RVM_model = RVM_Model_func(x_train , y_eps_train , norm_func = norm_func )
RVM_model

  y = column_or_1d(y, warn=True)


## Calculating Error of the Model

In [46]:
# Error Calculation
RVM_model = M_error(RVM_model , x_test , y_eps_test , norm_func = norm_func)

In [47]:
error_disp(RVM_model)

Model Error:
 RMSE = 4.964423069147642
 MARE = 26.36885284053187
 R2   = 0.6324005265630701


# Define Huber Regression Algorithm

In [48]:
def Huber_Model_func(x_train , y_eps_train , norm_func = None ):
    if norm_func is None:
        norm_func = lambda x : x    # noqa: E731
    
    # Define the Huber Regressor Algorithm
    huber_model = HuberRegressor()
    
    # Train the model on the training data
    huber_model.fit(norm_func(x_train), y_eps_train )
    
    return huber_model

## Training the Model with Huber Regression Algorithm

In [49]:
Huber_model = Huber_Model_func(x_train , y_eps_train , norm_func = norm_func )
Huber_model

  y = column_or_1d(y, warn=True)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


## Calculating Error of the Model

In [51]:
# Error Calculation
Huber_model = M_error(Huber_model , x_test , y_eps_test , norm_func = norm_func)

In [52]:
error_disp(Huber_model)

Model Error:
 RMSE = 4.632832982599544
 MARE = 21.258204438434664
 R2   = 0.6798668867418873


# Error Table

In [53]:
# Panda table for MARE , RMSE, R2
# import pandas as pd
Models = ['NN' , 'RF' , 'XGB' , 'SVM' , 'RVM' , 'Huber']
MARE = np.array([NN_model.MARE, RF_model.MARE, XGB_model.MARE, SVM_model.MARE, RVM_model.MARE, Huber_model.MARE])
RMSE = np.array([NN_model.RMSE, RF_model.RMSE, XGB_model.RMSE, SVM_model.RMSE, RVM_model.RMSE, Huber_model.RMSE])
R2 = np.array([NN_model.R2, RF_model.R2, XGB_model.R2, SVM_model.R2, RVM_model.R2, Huber_model.R2])
table = pd.DataFrame({ 'Models' : Models , 'MARE' : MARE , 'RMSE' : RMSE , 'R2' : R2 })
table

Unnamed: 0,Models,MARE,RMSE,R2
0,NN,25.93468,4.653062,0.677065
1,RF,23.380425,4.232146,0.732848
2,XGB,23.331191,4.810284,0.654873
3,SVM,26.576768,6.509828,0.367914
4,RVM,26.368853,4.964423,0.632401
5,Huber,21.258204,4.632833,0.679867
