In [1]:
import numpy as np

In [2]:
def get_distance(x1, x2):
    sum = 0
    for i in range(len(x1)):
        sum += (x1[i] - x2[i]) ** 2
    return np.sqrt(sum)


def kmeans(X, k, max_iters):
    centroids = X[np.random.choice(range(len(X)), k, replace=True)]
    converged = False
    current_iter = 0

    while (not converged) and (current_iter < max_iters):
        cluster_list = [[] for i in range(len(centroids))]
        for x in X:  # Go through each data point
            distances_list = []
            for c in centroids:
                distances_list.append(get_distance(c, x))
            cluster_list[int(np.argmin(distances_list))].append(x)
        cluster_list = list((filter(None, cluster_list)))
        prev_centroids = centroids.copy()
        centroids = []
        
        for j in range(len(cluster_list)):
            centroids.append(np.mean(cluster_list[j], axis=0))
            
        pattern = np.abs(np.sum(prev_centroids) - np.sum(centroids))
        converged = (pattern == 0)
        current_iter += 1

    return np.array(centroids), [np.std(x) for x in cluster_list]

In [3]:
class RBF:

    def __init__(self, X, y, tX, ty, num_of_classes,
                 k, epochs, std_from_clusters=True):
        self.X = X
        self.y = y
        self.tX = tX
        self.ty = ty
#         self.number_of_classes = num_of_classes
        self.k = k
        self.std_from_clusters = std_from_clusters
#         self.initial_epoch = 0
#         self.epochs = epochs

    def rbf(self, x, c, s):
        distance = get_distance(x, c)
        return 1 / np.exp(-distance / s ** 2)

    def rbf_list(self, X, centroids, std_list):
        RBF_list = []
        for x in X:
            RBF_list.append([self.rbf(x, c, s) for (c, s) in zip(centroids, std_list)])
        return np.array(RBF_list)
    
    def fit(self):
        self.centroids, self.std_list = kmeans(self.X, self.k, max_iters=1000)
        if not self.std_from_clusters:
            dMax = np.max([get_distance(c1, c2) for c1 in self.centroids for c2 in self.centroids])
            self.std_list = np.repeat(dMax / np.sqrt(2 * self.k), self.k)
#         print(self.std_list, len(self.std_list))
        RBF_X = self.rbf_list(self.X, self.centroids, self.std_list)
#         print(RBF_X)
        self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.y 
#         print(self.w)
        RBF_list_tst = self.rbf_list(self.tX, self.centroids, self.std_list)
        self.pred_ty = RBF_list_tst @ self.w

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from scipy.linalg import pinv2

In [5]:
X_ = pd.read_csv(r'C:\Users\Saravanan\Desktop\maybe\paper work\syncrel\x_2.csv', header = None).values[:, :]
Y_ = pd.read_csv(r'C:\Users\Saravanan\Desktop\maybe\paper work\syncrel\y_2.csv', header = None).values[:, :]

scaler_x = MinMaxScaler()
scaler_y1 = MinMaxScaler()
scaler_y2 = MinMaxScaler()
scaler_y3 = MinMaxScaler()

X = scaler_x.fit_transform(X_)
Y = np.zeros(shape=Y_.shape)
Y[:, 0] = scaler_y1.fit_transform(np.expand_dims(Y_[:, 0], -1))[:, 0]
Y[:, 1] = scaler_y2.fit_transform(np.expand_dims(Y_[:, 1], -1))[:, 0]
Y[:, 2] = scaler_y3.fit_transform(np.expand_dims(Y_[:, 2], -1))[:, 0]

x_train = X[:120, :]
x_test = X#[120:, :]
y_train = Y[:120, :]
y_test = Y#[120:, :]

In [6]:
# y_train[:, 0]

In [7]:
RBF_CLASSIFIER_list = []
n_clusters = [20, 25, 25] # no of clusters, parameter to tune
for i in range(Y.shape[1]):
    if i == 3:
        std_from_clusters = True
    else:
        std_from_clusters = False
    RBF_CLASSIFIER = RBF(x_train, np.expand_dims(y_train[:, i], -1), x_test, np.expand_dims(y_test[:, i], -1),
                         num_of_classes=10, k=n_clusters[i], epochs=100, std_from_clusters=False)
    RBF_CLASSIFIER.fit()
    RBF_CLASSIFIER_list.append(RBF_CLASSIFIER)

In [8]:
pred1 = scaler_y1.inverse_transform(np.expand_dims(RBF_CLASSIFIER_list[0].pred_ty[:, 0], -1))
pred2 = scaler_y2.inverse_transform(np.expand_dims(RBF_CLASSIFIER_list[1].pred_ty[:, 0], -1))
pred3 = scaler_y3.inverse_transform(np.expand_dims(RBF_CLASSIFIER_list[2].pred_ty[:, 0], -1))

pred = np.concatenate([pred1, pred2, pred3], axis=1)
actual = Y_.copy()#[120:, :].copy()

# result = np.concatenate([actual, pred], axis=1)
# df = pd.DataFrame(data=result, index=list(range(result.shape[0])), columns=list(range(result.shape[1])))

result = pd.DataFrame(np.concatenate([actual, pred, scaler_x.inverse_transform(X)], axis=1))

print(result)
for i in range(pred.shape[1]):
    mse = np.mean((actual[:, i] - pred[:, i]) ** 2)
    print("Mean squared error:", mse)

            0          1         2         3          4         5         6  \
0    0.186646  91.275503  7.320161  0.153170  90.872095  7.219592  3.025501   
1    0.158204  91.042365  7.482942  0.141279  90.838859  7.477795  3.025501   
2    0.211328  91.027887  7.586356  0.133013  90.791067  7.636525  3.025501   
3    0.088008  90.827035  7.762458  0.127618  90.735296  7.739076  3.025501   
4    0.103785  90.869638  7.833571  0.124612  90.675805  7.813244  3.025501   
..        ...        ...       ...       ...        ...       ...       ...   
126  0.111707  90.667962  8.103729  0.139235  90.520599  8.110865  3.053684   
127  0.126629  90.389588  8.281747  0.142193  90.436180  8.221774  3.053684   
128  0.143911  90.398711  8.354857  0.147601  90.349996  8.369812  3.053684   
129  0.178287  90.096464  8.515829  0.155963  90.259279  8.576018  3.053684   
130  0.181692  90.204390  8.585448  0.168073  90.162896  8.870343  3.053684   

            7        8         9        10        1

In [43]:
# result.to_csv("rbfnn_result_3.csv")
# # rbfnn_result_1 - n_clusters = [15, 25, 15]
# # rbfnn_result_2 - n_clusters = [15, 25, 20]
# # rbfnn_result_3 - n_clusters = [20, 25, 25]

In [None]:
# n_clusters = [15, 25, 15]
#            0          1         2         3          4         5
# 0   0.186407  91.275761  7.321435  0.158461  91.196125  7.218367
# 1   0.157900  91.042594  7.484215  0.155860  91.001530  7.422894
# 2   0.211904  91.028190  7.587714  0.153521  90.857307  7.593293
# 3   0.088861  90.827742  7.765061  0.151441  90.747872  7.739331
# 4   0.104405  90.865288  7.836106  0.149780  90.661762  7.869005
# 5   0.149177  90.623488  8.021199  0.148611  90.589626  7.989132
# 6   0.111707  90.667962  8.103729  0.147887  90.522482  8.105046
# 7   0.126629  90.389588  8.281747  0.147511  90.449944  8.220184
# 8   0.143911  90.398711  8.354857  0.147395  90.357951  8.337280
# 9   0.178287  90.096464  8.515829  0.147431  90.225513  8.458655
# 10  0.181692  90.204390  8.585448  0.147478  90.019652  8.586513
# Mean squared error: 0.001277198506377448
# Mean squared error: 0.014857053281845601
# Mean squared error: 0.002232528651335694

# n_clusters = [10, 25, 15]
#            0          1         2         3          4         5
# 0   0.186407  91.275761  7.321435  0.154332  91.240509  7.309939
# 1   0.157900  91.042594  7.484215  0.151817  91.070069  7.487201
# 2   0.211904  91.028190  7.587714  0.149865  90.923636  7.632071
# 3   0.088861  90.827742  7.765061  0.148528  90.800882  7.755338
# 4   0.104405  90.865288  7.836106  0.147841  90.699322  7.865317
# 5   0.149177  90.623488  8.021199  0.147728  90.614950  7.968808
# 6   0.111707  90.667962  8.103729  0.148133  90.541985  8.071681
# 7   0.126629  90.389588  8.281747  0.149049  90.471724  8.179313
# 8   0.143911  90.398711  8.354857  0.150514  90.390198  8.297083
# 9   0.178287  90.096464  8.515829  0.152602  90.273960  8.430376
# 10  0.181692  90.204390  8.585448  0.155314  90.082744  8.584846
# Mean squared error: 0.0012356609341042314
# Mean squared error: 0.01002356373874888
# Mean squared error: 0.0025419482595847695
    
# n_clusters = [10, 25, 20]
#            0          1         2         3          4         5
# 0   0.186407  91.275761  7.321435  0.195805  91.079732  7.347002
# 1   0.157900  91.042594  7.484215  0.167686  90.952075  7.508094
# 2   0.211904  91.028190  7.587714  0.147040  90.842231  7.647960
# 3   0.088861  90.827742  7.765061  0.132930  90.751710  7.773981
# 4   0.104405  90.865288  7.836106  0.124649  90.678430  7.890820
# 5   0.149177  90.623488  8.021199  0.121790  90.617995  8.001317
# 6   0.111707  90.667962  8.103729  0.124242  90.563721  8.107961
# 7   0.126629  90.389588  8.281747  0.132166  90.505688  8.212155
# 8   0.143911  90.398711  8.354857  0.146030  90.428644  8.311696
# 9   0.178287  90.096464  8.515829  0.166545  90.308064  8.399298
# 10  0.181692  90.204390  8.585448  0.194709  90.103411  8.462726
# Mean squared error: 0.0007266390368745536
# Mean squared error: 0.018376469430209447
# Mean squared error: 0.003971438807189822

# _______________________________________________________________________________
# n_clusters = [25, 10, 25]
# [[ 6.32143489  0.90482649 18.64073389  6.49199851  0.903966   16.74632011]
#  [ 6.48421538  0.90228284 15.79001894  6.70836733  0.90195857 14.26839949]
#  [ 6.58771387  0.90212571 21.19043583  6.84177544  0.90051045 12.70691895]
#  [ 6.76506092  0.89993901  8.88612263  6.93260335  0.89954589 11.76965323]
#  [ 6.83610577  0.90034859 10.44046554  7.00474243  0.89885789 11.26536823]
#  [ 7.02119884  0.89771078 14.91772976  7.07254895  0.89811624 11.06053437]
#  [ 7.10372884  0.89819594 11.17074525  7.14519621  0.89705964 11.11355727]
#  [ 7.28174719  0.89515915 12.66288722  7.22937959  0.89597376 11.49054761]
#  [ 7.35485686  0.89525866 14.39108248  7.32978373  0.89505026 12.33103989]
#  [ 7.51582938  0.89196143 17.82873233  7.44864887  0.89440842 13.86644659]
#  [ 7.58544814  0.8931388  18.16916605  7.5849007   0.894038   16.47402441]]
# Mean squared error: 0.019330276226511355
# Mean squared error: 1.3445757571735118e-06
# Mean squared error: 11.449272457983664

# n_clusters = [25, 15, 25]
# [[ 6.32143489  0.90482649 18.64073389  6.5549642   0.90413914 20.07182796]
#  [ 6.48421538  0.90228284 15.79001894  6.68499857  0.90175813 17.08043336]
#  [ 6.58771387  0.90212571 21.19043583  6.78666926  0.90002798 14.99781244]
#  [ 6.76506092  0.89993901  8.88612263  6.87305915  0.89876092 13.60848905]
#  [ 6.83610577  0.90034859 10.44046554  6.95410607  0.89780734 12.76531571]
#  [ 7.02119884  0.89771078 14.91772976  7.03847049  0.89704121 12.37791394]
#  [ 7.10372884  0.89819594 11.17074525  7.13507691  0.89635511 12.40279412]
#  [ 7.28174719  0.89515915 12.66288722  7.25496095  0.89567235 12.83772147]
#  [ 7.35485686  0.89525866 14.39108248  7.41392688  0.89491891 13.72048536]
#  [ 7.51582938  0.89196143 17.82873233  7.63606146  0.89399757 15.12996829]
#  [ 7.58544814  0.8931388  18.16916605  7.95983955  0.8927681  17.18673469]]
# Mean squared error: 0.029103020137389714
# Mean squared error: 1.9539281649196403e-06
# Mean squared error: 7.860439268170398

In [17]:
actual = pd.read_csv("rbfnn_result_3.csv").values[1:, 0:3].astype(np.float32)
pred = pd.read_csv("rbfnn_result_3.csv").values[1:, 3:6].astype(np.float32)

metrics_df = pd.DataFrame(columns=['metric', 'tripple', 'efficiency', 'tav'])

from sklearn.metrics import r2_score
# R^2
print("R2 tripple", r2_score(actual[:, 0], pred[:, 0]))
print("R2 efficiency", r2_score(actual[:, 1], pred[:, 1]))
print("R2 tav", r2_score(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'R2',
            'tripple': r2_score(actual[:, 0], pred[:, 0]),
            'efficiency': r2_score(actual[:, 1], pred[:, 1]),
            'tav': r2_score(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

def relative_squared_error(y_true, y_pred):
    return np.sum((y_true - y_pred)**2) / np.sum((y_pred - np.mean(y_true))**2)

print("RSE tripple", relative_squared_error(actual[:, 0], pred[:, 0]))
print("RSE efficiency", relative_squared_error(actual[:, 1], pred[:, 1]))
print("RSE tav", relative_squared_error(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'RSE',
            'tripple': relative_squared_error(actual[:, 0], pred[:, 0]),
            'efficiency': relative_squared_error(actual[:, 1], pred[:, 1]),
            'tav': relative_squared_error(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

from sklearn.metrics import mean_absolute_error
print("MAE tripple", mean_absolute_error(actual[:, 0], pred[:, 0]))
print("MAE efficiency", mean_absolute_error(actual[:, 1], pred[:, 1]))
print("MAE tav", mean_absolute_error(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'MAE',
            'tripple': mean_absolute_error(actual[:, 0], pred[:, 0]),
            'efficiency': mean_absolute_error(actual[:, 1], pred[:, 1]),
            'tav': mean_absolute_error(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

def relative_abs_error(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_pred - np.mean(y_true)))

print("RAE tripple", relative_abs_error(actual[:, 0], pred[:, 0]))
print("RAE efficiency", relative_abs_error(actual[:, 1], pred[:, 1]))
print("RAE tav", relative_abs_error(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'RAE',
            'tripple': relative_abs_error(actual[:, 0], pred[:, 0]),
            'efficiency': relative_abs_error(actual[:, 1], pred[:, 1]),
            'tav': relative_abs_error(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

from sklearn.metrics import mean_squared_error
print("MSE tripple", mean_squared_error(actual[:, 0], pred[:, 0]))
print("MSE efficiency", mean_squared_error(actual[:, 1], pred[:, 1]))
print("MSE tav", mean_squared_error(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'MSE',
            'tripple': mean_squared_error(actual[:, 0], pred[:, 0]),
            'efficiency': mean_squared_error(actual[:, 1], pred[:, 1]),
            'tav': mean_squared_error(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

import math
print("RMSE tripple", math.sqrt(mean_squared_error(actual[:, 0], pred[:, 0])))
print("RMSE efficiency", math.sqrt(mean_squared_error(actual[:, 1], pred[:, 1])))
print("RMSE tav", math.sqrt(mean_squared_error(actual[:, 2], pred[:, 2])))
metrics_df = metrics_df.append({'metric': 'RMSE',
            'tripple': math.sqrt(mean_squared_error(actual[:, 0], pred[:, 0])),
            'efficiency': math.sqrt(mean_squared_error(actual[:, 1], pred[:, 1])),
            'tav': math.sqrt(mean_squared_error(actual[:, 2], pred[:, 2]))}, ignore_index=True)
print()

def nrmse_mean(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred)) / np.mean(y_true)

print("NRMSE-mean tripple", nrmse_mean(actual[:, 0], pred[:, 0]))
print("NRMSE-mean efficiency", nrmse_mean(actual[:, 1], pred[:, 1]))
print("NRMSE-mean tav", nrmse_mean(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'NRMSE-mean',
            'tripple': nrmse_mean(actual[:, 0], pred[:, 0]),
            'efficiency': nrmse_mean(actual[:, 1], pred[:, 1]),
            'tav': nrmse_mean(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

def nrmse_min_max(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred)) / (np.max(y_true) - np.min(y_true))

print("NRMSE-min_max tripple", nrmse_min_max(actual[:, 0], pred[:, 0]))
print("NRMSE-min_max efficiency", nrmse_min_max(actual[:, 1], pred[:, 1]))
print("NRMSE-min_max tav", nrmse_min_max(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'NRMSE-min_max',
            'tripple': nrmse_min_max(actual[:, 0], pred[:, 0]),
            'efficiency': nrmse_min_max(actual[:, 1], pred[:, 1]),
            'tav': nrmse_min_max(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

def nrmse_sd(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred)) / np.std(y_true)

print("NRMSE-sd tripple", nrmse_sd(actual[:, 0], pred[:, 0]))
print("NRMSE-sd efficiency", nrmse_sd(actual[:, 1], pred[:, 1]))
print("NRMSE-sd tav", nrmse_sd(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'NRMSE-sd',
            'tripple': nrmse_sd(actual[:, 0], pred[:, 0]),
            'efficiency': nrmse_sd(actual[:, 1], pred[:, 1]),
            'tav': nrmse_sd(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

def nrmse_iq(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred)) / (np.percentile(y_true, 75, interpolation = 'midpoint') - np.percentile(y_true, 25, interpolation = 'midpoint'))

print("NRMSE-iq tripple", nrmse_iq(actual[:, 0], pred[:, 0]))
print("NRMSE-iq efficiency", nrmse_iq(actual[:, 1], pred[:, 1]))
print("NRMSE-iq tav", nrmse_iq(actual[:, 2], pred[:, 2]))
metrics_df = metrics_df.append({'metric': 'NRMSE-iq',
            'tripple': nrmse_iq(actual[:, 0], pred[:, 0]),
            'efficiency': nrmse_iq(actual[:, 1], pred[:, 1]),
            'tav': nrmse_iq(actual[:, 2], pred[:, 2])}, ignore_index=True)
print()

print(metrics_df)
metrics_df.to_csv("rbfnn_metrics.csv")

R2 tripple 0.4280362858309096
R2 efficiency 0.8071536844297009
R2 tav 0.9390295172816733

RSE tripple 1.2905767
RSE efficiency 0.19569312
RSE tav 0.062077273

MAE tripple 0.02559235
MAE efficiency 0.13194513
MAE tav 0.07494376

RAE tripple 1.1688336
RAE efficiency 0.43312323
RAE tav 0.21486735

MSE tripple 0.0010051113
MSE efficiency 0.027032722
MSE tav 0.011299399

RMSE tripple 0.03170348974801138
RMSE efficiency 0.16441630633149232
RMSE tav 0.10629862994279378

NRMSE-mean tripple 0.20205434572528436
NRMSE-mean efficiency 0.0018112874704940814
NRMSE-mean tav 0.013047889024056169

NRMSE-min_max tripple 0.1612514507748165
NRMSE-min_max efficiency 0.11479573693578668
NRMSE-min_max tav 0.06033562435175796

NRMSE-sd tripple 0.7562828035461899
NRMSE-sd efficiency 0.4391427385330579
NRMSE-sd tav 0.24692202620813294

NRMSE-iq tripple 0.503894201807218
NRMSE-iq efficiency 0.2782758059654758
NRMSE-iq tav 0.15406986734908204

          metric   tripple  efficiency       tav
0             R2  0.4