<a href="https://colab.research.google.com/github/soumilhooda/MLDLNNtoCV/blob/main/Q4_NNFL_Assignment2_SoumilHooda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

CONST_GAUSS=0
CONST_LINEAR=1
CONST_QUAD=2

def get_dist(x1, x2):
    sum = 0
    for i in range(len(x1)):
        sum += (x1[i] - x2[i]) ** 2
    return np.sqrt(sum)

def kmeans(X, k, max_itrs):
  
    centroids = X[np.random.choice(range(len(X)), k, replace=False)]

    converged = False
    
    current_itr = 0

    while (not converged) and (current_itr < max_itrs):

        cluster_list = [[] for i in range(len(centroids))]

        for x in X:  # Go through each Dataset point
            dists_list = []
            for c in centroids:
                dists_list.append(get_dist(c, x))
            cluster_list[int(np.argmin(dists_list))].append(x)

        cluster_list = list((filter(None, cluster_list)))

        prev_centroids = centroids.copy()

        centroids = []

        for j in range(len(cluster_list)):
            centroids.append(np.mean(cluster_list[j], axis=0))

        pattern = np.abs(np.sum(prev_centroids) - np.sum(centroids))

        print('K-MEANS: ', int(pattern))

        converged = (pattern == 0)

        current_itr += 1

    return np.array(centroids), [np.std(x) for x in cluster_list]

In [2]:
class RBF:

    def __init__(self, X, y, tX, ty, num_of_classes,
                 k, mode=CONST_GAUSS, std_from_clusters=True):
        self.X = X
        self.y = y

        self.tX = tX
        self.ty = ty

        self.number_of_classes = num_of_classes
        self.k = k
        self.mode=mode
        self.std_from_clusters = std_from_clusters

    def convert_to_one_hot(self, x, num_of_classes):
        arr = np.zeros((len(x), num_of_classes))
        for i in range(len(x)):
            c = int(x[i])
            arr[i][c] = 1
        return arr

    def rbf(self, x, c, s):
       dist = get_dist(x, c)
       if self.mode == CONST_GAUSS:
          return 1 / np.exp(-dist / s ** 2)
       elif self.mode == CONST_LINEAR:
          return dist
       elif self.mode == CONST_QUAD:
          return np.sqrt(dist + (s ** 2))

    def rbf_list(self, X, centroids, std_list):
        RBF_list = []
        for x in X:
            RBF_list.append([self.rbf(x, c, s) for (c, s) in zip(centroids, std_list)])
        return np.array(RBF_list)

    def fit(self):

      self.centroids, self.std_list = kmeans(self.X, self.k, max_itrs=1000)

      if not self.std_from_clusters:
          dMax = np.max([get_dist(c1, c2) for c1 in self.centroids for c2 in self.centroids])
          self.std_list = np.repeat(dMax / np.sqrt(2 * self.k), self.k)

      RBF_X = self.rbf_list(self.X, self.centroids, self.std_list)

      self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.convert_to_one_hot(self.y, self.number_of_classes)

      RBF_list_tst = self.rbf_list(self.tX, self.centroids, self.std_list)

      self.pred_ty = RBF_list_tst @ self.w

      self.pred_ty = np.array([np.argmax(x) for x in self.pred_ty])

      diff = self.pred_ty - self.ty

      if self.mode == CONST_GAUSS:
        print('Gaussian Accuracy: ', len(np.where(diff == 0)[0]) / len(diff))
      elif self.mode == CONST_LINEAR:
        print('Linear Accuracy: ', len(np.where(diff == 0)[0]) / len(diff))
      elif self.mode == CONST_QUAD:
        print('Multi Quadratic Accuracy: ', len(np.where(diff == 0)[0]) / len(diff))    

In [3]:
from google.colab import files
uploaded=files.upload()

Saving data5.xlsx to data5.xlsx


In [4]:
df = pd.read_excel('data5.xlsx')

Dataset = df.to_numpy()

#print(Dataset)
np.random.shuffle(Dataset)
train_y = Dataset[22:209, -1]-1
train_x = Dataset[22:209, 0:5]

test_y = Dataset[0:21, -1]-1
test_x = Dataset[0:21, 0:5]

#===== Gaussian
RBF_GAUSS = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_GAUSS, std_from_clusters=False)

RBF_GAUSS.fit()

#==== Linear
RBF_LINEAR = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_LINEAR, std_from_clusters=False)

RBF_LINEAR.fit()

#===== Multi Quadratic
RBF_QUAD = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_QUAD, std_from_clusters=False)

RBF_QUAD.fit()

K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Gaussian Accuracy:  0.9047619047619048
K-MEANS:  1
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Linear Accuracy:  0.9047619047619048
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Multi Quadratic Accuracy:  0.9047619047619048


In [5]:
fold1 = Dataset[0:41, :]
fold2 = Dataset[42:83, :]
fold3 = Dataset[84:125, :]
fold4 = Dataset[126:167, :]
fold5 = Dataset[168:209, :]
#print(fold2)

In [6]:
train_Dataset=np.vstack((fold2, fold3, fold4, fold5))
train_y = train_Dataset[:,-1]-1
train_x = train_Dataset[:,0:5]

test_y = fold1[:, -1]-1
test_x = fold1[:, 0:5]

#===== Gaussian
RBF_GAUSS = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_GAUSS, std_from_clusters=False)

RBF_GAUSS.fit()

#==== Linear
RBF_LINEAR = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_LINEAR, std_from_clusters=False)

RBF_LINEAR.fit()

#===== Multi Quadratic
RBF_QUAD = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_QUAD, std_from_clusters=False)

RBF_QUAD.fit()

K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Gaussian Accuracy:  0.8292682926829268
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Linear Accuracy:  0.8292682926829268
K-MEANS:  1
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Multi Quadratic Accuracy:  0.8292682926829268


In [7]:
train_Dataset=np.vstack((fold1, fold3, fold4, fold5))
train_y = train_Dataset[:,-1]-1
train_x = train_Dataset[:,0:5]

test_y = fold2[:, -1]-1
test_x = fold2[:, 0:5]

#===== Gaussian
RBF_GAUSS = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_GAUSS, std_from_clusters=False)

RBF_GAUSS.fit()

#==== Linear
RBF_LINEAR = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_LINEAR, std_from_clusters=False)

RBF_LINEAR.fit()

#===== Multi Quadratic
RBF_QUAD = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_QUAD, std_from_clusters=False)

RBF_QUAD.fit()

K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Gaussian Accuracy:  0.7560975609756098
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Linear Accuracy:  0.8536585365853658
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Multi Quadratic Accuracy:  0.8292682926829268


In [8]:
train_Dataset=np.vstack((fold1, fold2, fold4, fold5))
train_y = train_Dataset[:,-1]-1
train_x = train_Dataset[:,0:5]

test_y = fold3[:, -1]-1
test_x = fold3[:, 0:5]

#===== Gaussian
RBF_GAUSS = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_GAUSS, std_from_clusters=False)

RBF_GAUSS.fit()

#==== Linear
RBF_LINEAR = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_LINEAR, std_from_clusters=False)

RBF_LINEAR.fit()

#===== Multi Quadratic
RBF_QUAD = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_QUAD, std_from_clusters=False)

RBF_QUAD.fit()

K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Gaussian Accuracy:  0.6585365853658537
K-MEANS:  1
K-MEANS:  0
K-MEANS:  0
Linear Accuracy:  0.8048780487804879
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Multi Quadratic Accuracy:  0.8292682926829268


In [9]:
train_Dataset=np.vstack((fold1, fold2, fold3, fold5))
train_y = train_Dataset[:,-1]-1
train_x = train_Dataset[:,0:5]

test_y = fold4[:, -1]-1
test_x = fold4[:, 0:5]

#===== Gaussian
RBF_GAUSS = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_GAUSS, std_from_clusters=False)

RBF_GAUSS.fit()

#==== Linear
RBF_LINEAR = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_LINEAR, std_from_clusters=False)

RBF_LINEAR.fit()

#===== Multi Quadratic
RBF_QUAD = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_QUAD, std_from_clusters=False)

RBF_QUAD.fit()

K-MEANS:  1
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Gaussian Accuracy:  0.8292682926829268
K-MEANS:  1
K-MEANS:  0
K-MEANS:  0
Linear Accuracy:  0.7804878048780488
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Multi Quadratic Accuracy:  0.7560975609756098


In [10]:
train_Dataset=np.vstack((fold1, fold2, fold3, fold4))
train_y = train_Dataset[:,-1]-1
train_x = train_Dataset[:,0:5]

test_y = fold5[:, -1]-1
test_x = fold5[:, 0:5]

#===== Gaussian
RBF_GAUSS = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_GAUSS, std_from_clusters=False)

RBF_GAUSS.fit()

#==== Linear
RBF_LINEAR = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_LINEAR, std_from_clusters=False)

RBF_LINEAR.fit()

#===== Multi Quadratic
RBF_QUAD = RBF(train_x, train_y, test_x, test_y, num_of_classes=3,
                     k=100, mode=CONST_QUAD, std_from_clusters=False)

RBF_QUAD.fit()

K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Gaussian Accuracy:  0.926829268292683
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Linear Accuracy:  0.9024390243902439
K-MEANS:  0
K-MEANS:  0
K-MEANS:  0
Multi Quadratic Accuracy:  0.8536585365853658
