In [6]:
##RBF Implementation
from scipy.io import loadmat
import pandas as pd
import numpy as np
import math
import random
from scipy.spatial import distance

#Load matlab file using loadmat, available in scipy library 
data_test = loadmat('data_test.mat')
data_train = loadmat('data_train.mat')
label_train = loadmat('label_train.mat')

#Gt the data and the labels from the file
db_data_train = data_train["data_train"]
db_label_train = label_train["label_train"]
db_data_test = data_test["data_test"]

#Randomly select the no of centers . Test with different values to get different results
num_centers = 200

#Initialize the weight and center matrix to zero
db_weight = np.zeros([num_centers,1])
centers = np.zeros([num_centers,33])
index_seed = random.sample(range(0,329), num_centers)

#Calculate the centers based on the index seed calculated
for i in range(num_centers):
    centers[i,:] = db_data_train[index_seed[i]]
    
#Calculate the eucledian distance needed to calculate sigma.
d_max = 0
for i in range(num_centers):
    for j in range(num_centers):
        if distance.euclidean(centers[i], centers[j]) > d_max:
            d_max = distance.euclidean(centers[i], centers[j])

#print(db_data_train)
#print(db_data_test)
#print(db_label_train)
#print(db_weight)

#For Only 2 Centers. Each Center for each Class
db_class1 = np.zeros([1,33])
db_class2 = np.zeros([1,33])
num_class_1 = 0
num_class_2 = 0

for i in range(len(db_data_train)):
    if(db_label_train[i] == 1):
        db_class1 += db_data_train[i,:]
        num_class_1 += 1
    elif(db_label_train[i] == -1):
        db_class2 += db_data_train[i,:]
        num_class_2 += 1

db_avg1 = db_class1/num_class_1
db_avg2 = db_class2/num_class_2

#Calculate Sigma based on Centers. sigma = dmax/sqrt(2m). m is number of centers. dmax is max distance between centers
ligma = d_max/math.sqrt(2*num_centers)

#Calculate phi (F=phi*w)
def fi(x,c,sigma):
    db_phi = np.zeros([len(x),len(c)])
    for i in range(len(x)):
        for j in range(len(c)):
            xcj = np.linalg.norm(x[i]-c[j])
            db_phi[i,j] = math.exp(-(xcj*xcj)/(2*sigma*sigma))
            
    return db_phi

db_fi = fi(db_data_train, centers, ligma)

#Calculate the weight using the pseudo inverse function available in numpy
db_weight = np.matmul(np.linalg.pinv(db_fi),db_label_train)

#print(db_weight)

#Evaluation of accuracy on the train data set
def acc(x,lablez, centers, sigma, w):
    num_corr = 0
    predicts = np.matmul(fi(x, centers, sigma), w) 
    threshold = np.average(np.unique(lablez))
    for i in range(len(predicts)):
      if predicts[i] > threshold:
        predicts[i] = 1
      else:
        predicts[i] = -1
    for i in range(len(predicts)):
      if (predicts[i] == lablez[i]):
        num_corr +=1 
    acc = num_corr/len(x)
    return acc

print("Accuracy:",acc(db_data_train,db_label_train, centers, ligma, db_weight))


#Prediction on the test data set to get the class labels
prediction = np.matmul(fi(db_data_test, centers, ligma), db_weight)
for i in range(len(prediction)):
      if prediction[i] > 0:
        prediction[i] = 1
      else:
        prediction[i] = -1
print("Class labels: \n",prediction.astype(int))

Accuracy: 0.9363636363636364
Class labels: 
 [[ 1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [ 1]]


In [5]:
#SVM

from scipy.io import loadmat

##Load file 
data_test = loadmat('data_test.mat')
data_train = loadmat('data_train.mat')
label_train = loadmat('label_train.mat')
label_test = loadmat('label_test.mat')

## Get the data and the labels from the file 
db_data_train = data_train["data_train"]
db_label_train = label_train["label_train"]
db_data_test = data_test["data_test"]
db_label_test = label_test["label_test"]

##Train and Test variables

X_train = db_data_train
y_train = db_label_train
X_test = db_data_test
y_test = db_label_test

##Import SVM library from sklearn

from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf', gamma='auto') #For Gaussian kernel, use 'rbf' for the Kernel parameter of the SVC class.
##Hyperparamater gamma can be changed between scale and auto

svclassifier.fit(X_train, y_train.ravel())

from sklearn.metrics import classification_report, confusion_matrix

y_val = svclassifier.predict(X_train)
##Evaluation of accuracy and classification report
print("Confusion Matrix & Classification Report : \n", confusion_matrix(y_train, y_val))
print(classification_report(y_train, y_val))

##Prediction
y_pred = svclassifier.predict(X_test)
print("Predicted labels:",y_pred)

#print(confusion_matrix(y_test, y_pred))
#print(classification_report(y_test, y_pred))
#The output of the Kernel SVM with Gaussian kernel looks like this:


Confusion Matrix & Classification Report : 
 [[100  16]
 [  2 212]]
              precision    recall  f1-score   support

          -1       0.98      0.86      0.92       116
           1       0.93      0.99      0.96       214

    accuracy                           0.95       330
   macro avg       0.96      0.93      0.94       330
weighted avg       0.95      0.95      0.94       330

Predicted labels: [ 1 -1  1 -1  1 -1  1 -1  1 -1  1 -1  1  1  1 -1  1 -1  1 -1  1]


In [None]:
from scipy.io import loadmat
#import pandas as pd
import numpy as np
import math
import random

data_test = loadmat('data_test.mat')
data_train = loadmat('data_train.mat')
label_train = loadmat('label_train.mat')
db_data_train = data_train["data_train"]
db_label_train = label_train["label_train"]
db_data_test = data_test["data_test"]
num_centers = 200
db_weight = np.zeros([num_centers,1])
centers = np.zeros([num_centers,33])
index_seed = random.sample(range(0,329), num_centers)
print(index_seed)
for i in range(num_centers):
    centers[i,:] = db_data_train[index_seed[i]]
#print(db_data_train)
#print(db_data_test)
#print(db_label_train)
#print(db_weight)

db_class1 = np.zeros([1,33])
db_class2 = np.zeros([1,33])
num_class_1 = 0
num_class_2 = 0

for i in range(len(db_data_train)):
    if(db_label_train[i] == 1):
        db_class1 += db_data_train[i,:]
        num_class_1 += 1
    elif(db_label_train[i] == -1):
        db_class2 += db_data_train[i,:]
        num_class_2 += 1

db_avg1 = db_class1/num_class_1
db_avg2 = db_class2/num_class_2



ligma = 0.707
#print(num_class_1)
db_phi = np.zeros([330,num_centers])

def fi(x,c,sigma):
    for i in range(len(x)):
        for j in range(len(c)):
            xcj = np.linalg.norm(x[i]-c[j])
            db_phi[i,j] = math.exp(-(xcj*xcj)/(2*sigma*sigma))
    return db_phi

db_fi = fi(db_data_train, centers, ligma)
db_weight = np.matmul(np.linalg.pinv(db_fi),db_label_train)

print(db_weight)

[311, 189, 95, 299, 100, 312, 314, 202, 96, 206, 234, 197, 104, 306, 183, 250, 102, 43, 56, 278, 284, 272, 283, 247, 27, 246, 10, 112, 61, 218, 15, 72, 146, 124, 168, 219, 193, 98, 210, 232, 130, 174, 175, 71, 276, 169, 109, 265, 40, 108, 273, 154, 274, 228, 235, 328, 239, 305, 226, 48, 293, 180, 269, 153, 4, 222, 308, 190, 116, 144, 51, 319, 1, 288, 114, 78, 303, 178, 18, 238, 67, 156, 231, 258, 17, 164, 117, 223, 264, 110, 267, 177, 242, 152, 125, 181, 266, 89, 315, 233, 259, 199, 318, 92, 179, 5, 187, 198, 286, 326, 34, 295, 211, 161, 74, 60, 149, 281, 325, 236, 268, 207, 302, 30, 290, 142, 62, 23, 88, 162, 0, 248, 41, 217, 150, 97, 8, 111, 59, 309, 323, 241, 261, 126, 194, 320, 243, 84, 131, 141, 208, 36, 64, 301, 21, 73, 212, 16, 24, 245, 46, 279, 289, 57, 118, 230, 122, 244, 249, 105, 158, 121, 113, 170, 55, 6, 237, 132, 173, 69, 277, 275, 32, 253, 77, 282, 79, 83, 133, 106, 159, 160, 54, 75, 94, 31, 35, 151, 38, 143]
[[ 3.12844952e+00]
 [-1.00000002e+00]
 [-2.34679362e+00]
 [-2.

In [4]:
data_test

{'__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Sun Sep 12 22:22:01 2021',
 '__version__': '1.0',
 'data_test': array([[ 1.0000e+00,  9.9539e-01, -5.8890e-02,  8.5243e-01,  2.3060e-02,
          8.3398e-01, -3.7708e-01,  1.0000e+00,  3.7600e-02,  8.5243e-01,
         -1.7755e-01,  5.9755e-01, -4.4945e-01,  6.0536e-01, -3.8223e-01,
          8.4356e-01, -3.8542e-01,  5.8212e-01, -3.2192e-01,  5.6971e-01,
         -2.9674e-01,  3.6946e-01, -4.7357e-01,  5.6811e-01, -5.1171e-01,
          4.1078e-01, -4.6168e-01,  2.1266e-01, -3.4090e-01,  4.2267e-01,
         -5.4487e-01,  1.8641e-01, -4.5300e-01],
        [ 1.0000e+00,  1.0000e+00, -1.8829e-01,  9.3035e-01, -3.6156e-01,
         -1.0868e-01, -9.3597e-01,  1.0000e+00, -4.5490e-02,  5.0874e-01,
         -6.7743e-01,  3.4432e-01, -6.9707e-01, -5.1685e-01, -9.7515e-01,
          5.4990e-02, -6.2237e-01,  3.3109e-01, -1.0000e+00, -1.3151e-01,
         -4.5300e-01, -1.8056e-01, -3.5734e-01, -2.0332e-01

In [13]:
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 22 19:54:17 2021

@author: Shiva
"""

from scipy.io import loadmat
#import pandas as pd
import numpy as np
import math
import random
from scipy.spatial import distance

data_test = loadmat('data_test.mat')
data_train = loadmat('data_train.mat')
label_train = loadmat('label_train.mat')
db_data_train = data_train["data_train"]
db_label_train = label_train["label_train"]
db_data_test = data_test["data_test"]
num_centers = 200
db_weight = np.zeros([num_centers,1])
centers = np.zeros([num_centers,33])
index_seed = random.sample(range(0,329), num_centers)
print(index_seed)
for i in range(num_centers):
    centers[i,:] = db_data_train[index_seed[i]]
    
#Calculate Sigma based on Centers. sigma = dmax/sqrt(2m). m is number of centers. dmax is max distance between centers
d_max = 0
for i in range(num_centers):
    for j in range(num_centers):
        if distance.euclidean(centers[i], centers[j]) > d_max:
            d_max = distance.euclidean(centers[i], centers[j])

#print(db_data_train)
#print(db_data_test)
#print(db_label_train)
#print(db_weight)

db_class1 = np.zeros([1,33])
db_class2 = np.zeros([1,33])
num_class_1 = 0
num_class_2 = 0

for i in range(len(db_data_train)):
    if(db_label_train[i] == 1):
        db_class1 += db_data_train[i,:]
        num_class_1 += 1
    elif(db_label_train[i] == -1):
        db_class2 += db_data_train[i,:]
        num_class_2 += 1

db_avg1 = db_class1/num_class_1
db_avg2 = db_class2/num_class_2



#ligma = 0.707
#print(num_class_1)
ligma = d_max/math.sqrt(2*num_centers)
print(d_max)
print(ligma)

db_phi = np.zeros([330,num_centers])

def fi(x,c,sigma):
    for i in range(len(x)):
        for j in range(len(c)):
            xcj = np.linalg.norm(x[i]-c[j])
            db_phi[i,j] = math.exp(-(xcj*xcj)/(2*sigma*sigma))
    return db_phi

db_fi = fi(db_data_train, centers, ligma)
db_weight = np.matmul(np.linalg.pinv(db_fi),db_label_train)

print(db_weight)

def acc(x,lablez, centers, sigma, w):
    num_corr = 0
    predicts = np.matmul(fi(x, centers, sigma), w) 
    threshold = np.average(np.unique(lablez))
    for i in range(len(predicts)):
      if predicts[i] > threshold:
        predicts[i] = 1
      else:
        predicts[i] = -1
    for i in range(len(predicts)):
      if (predicts[i] == lablez[i]):
        num_corr +=1 
    acc = num_corr/len(x)
    return acc

print(acc(db_data_train,db_label_train, centers, ligma, db_weight))

[144, 96, 73, 222, 241, 308, 16, 51, 42, 249, 254, 102, 123, 304, 118, 160, 139, 29, 150, 1, 55, 301, 172, 79, 121, 207, 277, 48, 318, 38, 111, 14, 76, 229, 293, 312, 44, 146, 106, 290, 267, 93, 95, 9, 183, 120, 109, 101, 270, 281, 193, 127, 177, 266, 22, 246, 175, 27, 212, 0, 159, 92, 105, 33, 35, 208, 61, 218, 132, 45, 82, 119, 158, 223, 258, 305, 325, 328, 36, 224, 283, 248, 271, 5, 84, 206, 138, 126, 97, 227, 237, 40, 100, 98, 149, 252, 279, 178, 214, 161, 311, 188, 168, 324, 53, 19, 143, 179, 282, 47, 192, 189, 303, 225, 236, 217, 41, 299, 156, 43, 200, 287, 260, 253, 52, 104, 62, 245, 323, 130, 197, 235, 170, 60, 70, 256, 87, 259, 54, 272, 152, 309, 297, 58, 198, 57, 80, 315, 11, 110, 203, 182, 210, 268, 134, 276, 181, 28, 228, 125, 59, 112, 164, 190, 122, 85, 280, 13, 24, 114, 196, 20, 103, 326, 285, 261, 269, 108, 7, 69, 145, 3, 166, 184, 83, 199, 86, 135, 155, 291, 49, 215, 77, 220, 12, 274, 265, 25, 169, 116]
8.717797887081348
0.4358898943540674
[[ 0.7316032 ]
 [ 0.99001725]


In [1]:
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 22 22:39:00 2021

@author: Shiva
"""

#from enum import auto
#import numpy as np
#from numpy.core.numeric import correlate
###import the scipy for loading .mat matlab data
from scipy.io import loadmat
### loading file and taking out data and label
data_test = loadmat('data_test.mat')
data_train = loadmat('data_train.mat')
label_train = loadmat('label_train.mat')
label_test = loadmat('label_test.mat')
db_data_train = data_train["data_train"]
db_label_train = label_train["label_train"]
db_data_test = data_test["data_test"]
db_label_test = label_test["label_test"]

#### printing the type and size of the variables
#tbd


X_train = db_data_train
y_train = db_label_train
#X_test = db_data_test
#y_test = db_label_test

X_test = db_data_train
y_test = db_label_train

from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf', gamma='auto')
svclassifier.fit(X_train, y_train)
#To use Gaussian kernel, you have to specify 'rbf' as value for the Kernel parameter of the SVC class.

#Prediction and Evaluation
y_pred = svclassifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
#The output of the Kernel SVM with Gaussian kernel looks like this:

[[100  16]
 [  2 212]]
              precision    recall  f1-score   support

          -1       0.98      0.86      0.92       116
           1       0.93      0.99      0.96       214

    accuracy                           0.95       330
   macro avg       0.96      0.93      0.94       330
weighted avg       0.95      0.95      0.94       330



  y = column_or_1d(y, warn=True)


In [2]:
y_pred

array([-1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1, -1,  1, -1,
        1,  1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,
       -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1, -1,  1, -1,  1, -1,
        1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1, -1,  1, -1,  1,  1, -1,  1, -1,  1, -1,  1, -1,  1,
       -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1, -1,  1,  1,  1, -1,
        1, -1,  1,  1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,
       -1,  1,  1, -1,  1,  1, -1,  1,  1,  1, -1,  1, -1,  1, -1,  1, -1,
        1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,
        1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,
        1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1,
       -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,  1, -1,
        1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1,  1,  1, -1,  1, -1,  1,
       -1,  1, -1,  1, -1

In [None]:
### With pred