In [None]:
import pandas as pd
import numpy as np
import random

## **Support Vector Machines**

**(a) Load IRIS dataset. Create a linearly separable dataset with two features of petal
length and petal width and two classes ‘versicolor’ and ‘virginica’.**

In [None]:
irisDataSet=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header=None);
irisDataSet.columns=['Sepal-length(cm)','Sepal-width(cm)','Petal-length(cm)','Petal-width(cm)','Class']
irisDataSet[50:150]

Unnamed: 0,Sepal-length(cm),Sepal-width(cm),Petal-length(cm),Petal-width(cm),Class
50,7.0,3.2,4.7,1.4,Iris-versicolor
51,6.4,3.2,4.5,1.5,Iris-versicolor
52,6.9,3.1,4.9,1.5,Iris-versicolor
53,5.5,2.3,4.0,1.3,Iris-versicolor
54,6.5,2.8,4.6,1.5,Iris-versicolor
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [None]:
irisData=irisDataSet.to_numpy()[50:150,:-1]
target=irisDataSet['Class'][50:150]
target=[(-1 if val=='Iris-versicolor' else 1)for val in target]
target=np.array(target)

**(b) Perform a classification using SVM**

In [None]:
class SVM(object):

  def __init__(self,nInputs,dataSet,target,lmbda=0.001,nIters=400,lr=0.01):

    self.dataSet=dataSet
    self.target=target
    self.nInputs=nInputs
    self.lmbda=lmbda
    self.nIters=nIters
    self.lr=lr

    self.w=np.zeros(self.nInputs)
    self.b=0

  def fit(self):

    for _ in range(self.nIters):
      for xi,yi in zip(self.dataSet,self.target):
        cond = yi * (np.dot(xi, self.w) - self.b) >= 1
        if cond:
          self.w -= self.lr * (2 * self.lmbda * self.w)
        else:
          self.w = self.w - self.lr * (2 * self.lmbda * self.w - xi*yi)
          self.b -= self.lr * yi

  def predict(self, X):
    res = np.dot(X, self.w) - self.b
    return np.sign(res)

  def accuracy(self,dataSet,target):
    c=0.0
    for xi,yi in zip(dataSet,target):
      if yi==self.predict(xi):
        c+=1
    return (c/len(target))*100


In [None]:
svm=SVM(4,irisData,target)
svm.fit()
svm.accuracy(irisData,target)

93.0

**(c) Perform 5-fold cross-validation and report the class-wise and average accuracies.**

In [None]:


def kFoldSplit(dataset, folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / folds)
	for i in range(folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return np.array(dataset_split)


def kFoldCrossValidation(dataSet,target,folds):

  dataSetAndTarget=np.column_stack((dataSet,target))

  kFoldData=kFoldSplit(dataSetAndTarget,folds)
  acc=[]

  for i in range(len(kFoldData)):
    testData=kFoldData[i]
    testX=testData[:,:-1]
    testY=testData[:,-1:]
    c=0
    for k in range(len(kFoldData)):
      if i!=k:
        if c == 0:
          trainData=kFoldData[k]
        else:
          trainData=np.row_stack((trainData,kFoldData[k]))
        c+=1

    trainX=trainData[:,:-1]
    trainY=trainData[:,-1:]

    svm=SVM(4,trainX,trainY)
    svm.fit()
    acc.append(svm.accuracy(testX,testY))
  
  return acc

In [None]:
print("Avg accuracy after 5-fold cross validation using SVM : ",np.mean(kFoldCrossValidation(irisData,target,5)))

Avg accuracy after 5-fold cross validation using SVM :  93.0


In [None]:
svm=SVM(4,irisData,target)
svm.fit()
class1Acc=svm.accuracy(irisData[0:50],target[0:50])
class2Acc=svm.accuracy(irisData[50:100],target[50:100])

print("Class wise Accuracy (Class 1) : ",class1Acc)
print("Class wise Accuracy (Class 2) : ",class2Acc)

Class wise Accuracy (Class 1) :  86.0
Class wise Accuracy (Class 2) :  100.0


## **2. Perform classification over IRIS dataset using radial basis function neural network (no use of any library functions). For the identification of the initial cluster centers, use the k-means algorithm (reuse your previous assignment’s code). Report the individual class-wise accuracy, average accuracy and overall accuracy.**

In [None]:
class Cluster(object):
  def __init__(self,nodeIds,centroidNode):
    self.nodeIds=nodeIds
    self.centroidNode=centroidNode

In [None]:
class KMeans(object):

  def __init__(self,dataSet,k):
    self.k=k
    self.dataSet=dataSet
    pnt=np.random.randint(0,len(self.dataSet),size=self.k)
    self.clusters=[(Cluster([],self.dataSet[i]))for i in pnt]
    self.meanSqDevs=[]


  def Clustering(self):
    for cluster in self.clusters:
      cluster.nodeIds=[]
    for i,node in enumerate(self.dataSet):
      minIndx,minD=self.MinDistanceCluster(node)
      self.clusters[minIndx].nodeIds.append(i)
      self.clusters[minIndx].centroidNode=self.UpdateCentroid(self.clusters[minIndx],node)
  
  def fit(self):
    for i in range(20):

      self.Clustering()
      self.meanSqDevs.append(self.AvgSqDev())




  def AvgSqDev(self):
    nodes=self.dataSet
    stdDev=[]
    for cluster in self.clusters: 
      s=0.0
      for nodeId in cluster.nodeIds:
        s+=sum((nodes[nodeId]-cluster.centroidNode)**2)
      # s/=len(cluster.nodeIds)
      # s=(s)**0.5
      stdDev.append(s)
    return np.mean(stdDev)

  

  def UpdateCentroid(self,cluster,node):
    nodes=len(cluster.nodeIds)*np.array(cluster.centroidNode)+np.array(node) 
    centroid = nodes/(len(cluster.nodeIds)+1)
    return centroid

  def MinDistanceCluster(self,node):
    minD=1e9
    minIndx=1e9
    for i,cluster in enumerate(self.clusters):
      d=math.sqrt(sum((cluster.centroidNode-node)**2))
      if d <= minD:
        minD=d
        minIndx=i
    return int(minIndx),minD

In [None]:
_irisData=irisDataSet.to_numpy()[:,:-1]
kmeans=KMeans(_irisData,3)
kmeans.fit()

In [None]:
centres=[]
for cluster in kmeans.clusters:
  print(cluster.centroidNode)
  centres.append(cluster.centroidNode)

[5.518518518518518 2.6222222222222222 3.951851851851852 1.2185185185185188]
[6.5369863013698595 2.9643835616438365 5.258904109589041
 1.8452054794520547]
[5.005999999999999 3.418 1.4640000000000006 0.24399999999999994]


In [None]:
class RBFN(object):

  def __init__(self,dataSet,target,K = 3,lr = 0.01,sigma=1.0,nIters=100):
    
    self.dataSet=dataSet
    self.target=target
    self.K = K
    self.lr = lr
    self.nIters=nIters
    self.sigma=sigma

    self.bias = random.uniform(0.0, 0.5)
    self.weights = []
    for i in range(self.K):
      self.weights.append(random.uniform(0.0, 0.5))
    
    self.centres=[]


  def fit (self):

    kmeans = KMeans(self.dataSet,self.K)
    kmeans.fit()
    self.centres=[]
    for cluster in kmeans.clusters:
      self.centres.append(cluster.centroidNode)

    for ep in range(self.nIters): 
      for xi,yi in zip(self.dataSet,self.target):
        y = 0.0
        for i in range(len(self.centres)):
          y += self.kernelFunc(self.centres[i],xi) * self.weights[i]
        
        y += self.bias
               
        err = (yi - y)

        for j in range(self.K):
          g = err * self.kernelFunc(self.centres[j],xi)
          self.weights[j] += self.lr * g
        self.bias += self.lr * err

  def predict(self, X):
    y = []
    for x_i in X:
      y_i = 0.0
      for j in range( len(self.centres) ):
        y_i += self.kernelFunc(self.centres[j],x_i) * self.weights[j]
      y_i +=  self.bias
      y.append(y_i)
    return y

  def accuracy(self,X,Y):
    c=0.0
    _Y=self.predict(X)
    _Y=[((-1 if val <= -0.5 else 0) if val <= 0.5 else 1)for val in _Y]

    for i in range(len(_Y)):
      if _Y[i]==Y[i]:
        c+=1

    return (c/len(_Y))*100

  def kernelFunc(self, center, data_point):
    return np.exp(-self.sigma*np.linalg.norm(center-data_point)**2)

In [None]:
_iris=irisDataSet.to_numpy()[:,:-1]
_target=irisDataSet['Class'].to_numpy()
_target=[((-1 if i=="Iris-setosa" else 0) if i!="Iris-virginica" else 1)for i in _target]
_target=np.array(_target)
rbfn=RBFN(_iris,_target)
rbfn.fit()

In [None]:
class1Acc=rbfn.accuracy(_iris[0:50],_target[0:50])
class2Acc=rbfn.accuracy(_iris[50:100],_target[50:100])
class3Acc=rbfn.accuracy(_iris[100:150],_target[100:150])

In [None]:
print("Class Wise Accuracy :- ")
print("Class 1 : ",class1Acc)
print("Class 2 : ",class2Acc)
print("Class 3 : ",class3Acc)

Class Wise Accuracy :- 
Class 1 :  88.0
Class 2 :  94.0
Class 3 :  74.0


In [None]:
overallAcc=rbfn.accuracy(_iris,_target)
print("Overall Accuracy : ",overallAcc )

Overall Accuracy :  85.33333333333334
