In [1]:

import pandas as pd
import os,time
import numpy as np
from tqdm import tqdm_notebook as tqdm


import scipy
from scipy import sparse
from prettytable import PrettyTable
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

from sklearn import datasets
from sklearn.datasets import make_classification
from sklearn.datasets import make_blobs
from sklearn.datasets import make_gaussian_quantiles
from sklearn import svm

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

### Utility fuctions for accuracy calculation etc.

In [3]:

def accuracy_score(predicted, real):
    assert(len(predicted) == len(real))
    matches=0
    for index in range(0,len(predicted)):
        if(real[index] == predicted[index]):
            matches += 1
    return matches/len(predicted)

def timing(f):
    def timer(*args, **kwargs):
        start = time.time()
        ret = f(*args, **kwargs)
        finish = time.time()
        print('%s took %0.3f seconds' % (f.__name__, (finish-start)))
        return ret
    return timer

    

### Simultaneous Multiclass SVM Algorithm

In [4]:
class SMCSVM:
    def __init__(self,C=10,kernel='rbf',degree=1,sigma=1.0):
        self.kernel = kernel
        self.C=C
        self.degree = degree
        self.sigma=sigma
        self.alphas2=None
        self.fullXmatrix=None
        self.IntermXmatrix=None
        self.train_X_data = None
        self.num_classes = None
        
    def fit(self, X_data, Y_label):
        #self.kernel = kernel
        #self.C= C
        #self.degree = degree
        #self.sigma= sigma 
        self.train_X_data = X_data
        X = X_data
        trainLabelVector = Y_label
        num_classes = len(np.unique(Y_label))
        self.num_classes = num_classes
        
        fullXmatrix = np.zeros(((X_data.shape[0]*(num_classes-1)),num_classes*((X_data.shape[1])+1)))

        count5 = 0
        
        print('\n Calculating fullXmatrix')
        for i in tqdm(range(0,X.shape[0])):
        
            for p in range(0,num_classes):
                if(trainLabelVector[i]!=p):   
                    for j in range(0,num_classes):
                        
                        if(j==trainLabelVector[i]):
                            fullXmatrix[count5,(j*(X_data.shape[1]+1)):((j+1)*(X_data.shape[1]+1))-1] = X[i,:]
                            fullXmatrix[count5,((j+1)*(X_data.shape[1]+1))-1] = 1.0
                        elif(j==p):
                            fullXmatrix[count5,(j*(X_data.shape[1]+1)):((j+1)*(X_data.shape[1]+1))-1] = -1*X[i,:]
                            fullXmatrix[count5,((j+1)*(X_data.shape[1]+1))-1] = -1.0
                            
                    count5 += 1
                        
                    
        m = fullXmatrix.shape[0]
        M1 = sparse.csr_matrix(fullXmatrix) 
        IntermXmatrix = np.zeros(((X_data.shape[0]*(num_classes-1)),num_classes))
        if(kernel=='polynomial'):   
            H = sparse.csr_matrix.dot(M1,M1.T) 
            H = (1+H.todense())**degree   
        elif(kernel=='linear'):
            H = sparse.csr_matrix.dot(M1,M1.T)
        else:
            H,IntermXmatrix = self.getKernelizedProduct(X_data,Y_label,kernel,sigma)        

        from cvxopt import matrix as cvxopt_matrix
        from cvxopt import solvers as cvxopt_solvers
        if(kernel=='linear'):
            P = cvxopt_matrix(H.todense())
        else:
            P = cvxopt_matrix(H)
        q = cvxopt_matrix(-1*np.ones((m, 1)))
        G = cvxopt_matrix(np.vstack((np.eye(m)*-1,np.eye(m))))
        h = cvxopt_matrix(np.hstack((np.zeros(m), np.ones(m) * C)))
        opts = {'maxiters' : 30,'solver':'mosek'}
        sol = cvxopt_solvers.qp(P, q, G, h ,options = opts)
        alphas2 = np.array(sol['x']) 
        self.alphas2=alphas2
        self.fullXmatrix=fullXmatrix
        self.IntermXmatrix=IntermXmatrix
   
    def predict(self,val_X_data):
        num_classes = self.num_classes
        train_X_data = self.train_X_data
        kernel = self.kernel
        degree = self.degree
        sigma = self.sigma
        alphas2=self.alphas2
        fullXmatrix=self.fullXmatrix
        IntermXmatrix=self.IntermXmatrix

        predictedLabelVector = []
        if(kernel == 'linear'):
            w = fullXmatrix.T@alphas2
            wForEachLabel = {}
            for i in range(0,num_classes):
                wForEachLabel[i] = np.array_split(w,num_classes)[i]
            print('\n Calculating prediction values')
            for i in tqdm(range(0,val_X_data.shape[0])):
                scoresForEachLabel = []
                for j in range(0,len(wForEachLabel)):
                    score = (wForEachLabel[j].T@np.append(val_X_data[i,:],1.0).reshape(-1,1))
                    scoresForEachLabel.append(score)
                maxScoreLabel = np.argmax(scoresForEachLabel)
                predictedLabelVector.append(maxScoreLabel)
        elif(kernel == 'polynomial'):
            print('\n Calculating prediction values')
            for i in tqdm(range(0,val_X_data.shape[0])):
                tempMatrix1 = np.zeros((num_classes*(val_X_data.shape[1]+1),num_classes))
                for j in range(0,num_classes):
                    tempMatrix1[j*(val_X_data.shape[1]+1):(j+1)*(val_X_data.shape[1]+1),j] = np.append(val_X_data[i,:],1.0)
                scoresForEachLabel = (alphas2.T)@((1+(fullXmatrix@tempMatrix1))**degree)
                maxScoreLabel = np.argmax(scoresForEachLabel)
                predictedLabelVector.append(maxScoreLabel)
        elif(kernel == 'rbf'):
            print('\n Calculating prediction values')
            for i in tqdm(range(0,val_X_data.shape[0])):
                scoresForEachLabel = (alphas2.T)@((1+(self.getKernelizedProductforPrediction(train_X_data,val_X_data,IntermXmatrix,i,num_classes,kernel,sigma)))**degree)
                maxScoreLabel = np.argmax(scoresForEachLabel)
                predictedLabelVector.append(maxScoreLabel)
        return predictedLabelVector
    
    def getKernelizedProductforPrediction(self,X_data,val_X_data,IntermXmatrix,currentValIndex,num_classes,kernel,sigma):
        X = X_data
        m = (X_data.shape[0]*(num_classes-1))
        H = np.zeros((m,num_classes))    
        matrix2 = (currentValIndex+1)*np.eye(num_classes)

        if(kernel == 'rbf'): 
            for i in (range(0,m)):
                for j in range(0,num_classes):
                    count1 = 0
                    for k in range(0,num_classes):
                        if(count1==2):
                            break
                        else:
                            temp3 = int(IntermXmatrix[i,k])
                            temp4 = int(matrix2[k,j])
                            if(temp3>0):
                                count1+=1
                                if(temp4>0):                             
                                    H[i,j] += np.exp(-1*(np.linalg.norm(X[temp3-1,:]-val_X_data[temp4-1,:]))/(2*sigma**2))                        
                            elif(temp3<0):
                                count1+=1
                                if(temp4>0):
                                    H[i,j] += np.exp(-1*(np.linalg.norm(np.append(val_X_data[temp4-1,:],1.0)-np.append(-1*X[-temp3-1,:],-1.0)))/(2*sigma**2))                    
        return H      
    
    def getKernelizedProduct(self,X_data,Y_label,kernel,sigma):
        X = X_data
        trainLabelVector = Y_label
        num_classes = len(np.unique(Y_label))
        IntermXmatrix = np.zeros(((X_data.shape[0]*(num_classes-1)),num_classes))
        count6 = 0
        print('\n Calculating Kernalized Product')
        for i in tqdm(range(0,X.shape[0])):
            for p in range(0,num_classes):
                if(trainLabelVector[i]!=p):   
                    for j in range(0,num_classes):
                        if(j==trainLabelVector[i]):
                            IntermXmatrix[count6,j] = i+1
                        elif(j==p):
                            IntermXmatrix[count6,j] = -(i+1)
                    count6 += 1

        matrix2 = IntermXmatrix.T
        M2 = scipy.sparse.coo_matrix(IntermXmatrix)
        M3 = scipy.sparse.coo_matrix(IntermXmatrix.T)
        m = (X_data.shape[0]*(num_classes-1))
        H = np.zeros((m,m)) 

        if(kernel == 'rbf'):
            print('\n Calculating Kernalized Product')
            for i in tqdm(range(0,m)):
                for j in range(0,i+1):
                    count1 = 0
                    #count2 = 0
                    for k in range(0,IntermXmatrix.shape[1]):
                        if(count1==2):
                            break
                        else:
                            temp3 = int(IntermXmatrix[i,k])
                            temp4 = int(matrix2[k,j])
                            #print(temp3,temp4)
                            if(temp3>0):
                                count1+=1
                                if(temp4>0):                             
                                    H[i,j] += np.exp(-1*(np.linalg.norm(X[temp3-1,:]-X[temp4-1,:]))/(2*sigma**2))
                                elif(temp4<0):
                                    H[i,j] += np.exp(-1*(np.linalg.norm(np.append(X[temp3-1,:],1.0)-np.append(-1*X[-temp4-1,:],-1.0)))/(2*sigma**2))
                            elif(temp3<0):
                                count1+=1
                                if(temp4>0):
                                    H[i,j] += np.exp(-1*(np.linalg.norm(np.append(X[temp4-1,:],1.0)-np.append(-1*X[-temp3-1,:],-1.0)))/(2*sigma**2))                    
                                elif(temp4<0):
                                    H[i,j] += np.exp(-1*(np.linalg.norm(-1*X[-temp3-1,:]+X[-temp4-1,:]))/(2*sigma**2))
        print('\n Calculating Kernalized Product')
        
        for i in tqdm(range(0,m)):
            for j in range(0,i+1):
                if(j>i):
                    H[i,j]=H[j,i]                 

        return H,IntermXmatrix

### Plotting Function

In [5]:
def plot_decision_regions(classifier,train_X_data,train_Y_data,resolution=1):
    
   # setup marker generator and color map
   markers = ('s', 'x', 'o', '^', 'v')
   colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
   X = train_X_data
   y = train_Y_data
   cmap = ListedColormap(colors[:len(np.unique(y))])
    
   # plot the decision surface
   x1_min, x1_max = X[:, 0].min() - 1 , X[:, 0].max() + 1
   x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
   xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
   np.arange(x2_min, x2_max, resolution))
   Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
   Z = np.array(Z) 
   Z = Z.reshape(xx1.shape)
   plt.contourf(xx1, xx2, Z, alpha=0.1, cmap=cmap)
   plt.xlim(xx1.min(), xx1.max())
   plt.ylim(xx2.min(), xx2.max())
   

   # plot all samples
   #X_test, y_test = X[test_idx, :], y[test_idx]
   for idx, cl in enumerate(np.unique(y)):
      plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
               alpha=0.8, c=cmap(idx),
               marker=markers[idx], label=cl)
   plt.legend(loc='upper left')
   plt.show()

### Cross validation with Bias variance Estimation Plot

In [6]:
@timing
def performCrossValidation(X1,Y1,num_folds,classifier):
    print('-------------Performing %d-fold cross validation------------------ '%(num_folds))
    fullXmatrix1 = X1
    fullYvector1 = Y1
    from sklearn.model_selection import KFold
    kf = KFold(n_splits=num_folds)
    kf.get_n_splits(fullXmatrix1)
    
    ValErrorForEachFold = []
    ValAccuForEachFold=[]
    fold = 0
    for train_index, test_index in kf.split(fullXmatrix1):
        
        fold += 1
        
        X_train, X_test = fullXmatrix1[train_index], fullXmatrix1[test_index]
        y_train, y_test = fullYvector1[train_index], fullYvector1[test_index]
        
        classifier.fit(X_train, y_train)
        predictedLabels = classifier.predict(X_test)
        
        accuracy = accuracy_score(predictedLabels,y_test)
        error = 1- accuracy
        ValErrorForEachFold.append(error)
        ValAccuForEachFold.append(accuracy)
        
    mean_accuracy = np.mean(ValAccuForEachFold)
    mean_error = np.mean(ValErrorForEachFold)
    
    return mean_error, mean_accuracy

In [7]:
pt = PrettyTable()
columns = ['dataset', '# points', '# attributess', '#classes', '1-v-r', '1-v-1', 'SMCSVM']
pt.field_names = columns

### Testing the Algorithm on Iris Dataset

In [8]:
results = []
dataset = datasets.load_iris()
results.append('IRIS')
results.append(dataset.data.shape[0])
results.append(dataset.data.shape[1])
results.append(len(dataset.target_names))
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size = 0.2, random_state = 42)

In [9]:
kernel = 'rbf'
degree = 1
sigma = 0.8
C = 10

In [10]:
clf = SMCSVM(C,kernel,degree,sigma)
clf.fit(X_train,y_train)
predictedLabels = clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix


 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=120), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=120), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=240), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=240), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.8383e+02 -3.3531e+03  7e+03  2e-01  3e-15
 1:  1.3197e+02 -2.7660e+02  5e+02  4e-03  3e-15
 2:  1.7634e+01 -3.6319e+01  5e+01  2e-16  3e-15
 3: -3.7038e-01 -7.8933e+00  8e+00  2e-16  1e-15
 4: -2.0448e+00 -3.2767e+00  1e+00  2e-16  4e-16
 5: -2.2518e+00 -2.5468e+00  3e-01  2e-16  2e-16
 6: -2.3103e+00 -2.4006e+00  9e-02  2e-16  2e-16
 7: -2.3333e+00 -2.3588e+00  3e-02  2e-16  2e-16
 8: -2.3406e+00 -2.3452e+00  5e-03  2e-16  2e-16
 9: -2.3423e+00 -2.3427e+00  3e-04  2e-16  2e-16
10: -2.3424e+00 -2.3425e+00  3e-05  2e-16  2e-16
11: -2.3425e+00 -2.3425e+00  7e-07  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))




col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10,0,0
1,0,9,0
2,0,0,11


In [11]:
num_folds =4
mean_error_SMCSVM, mean_accuracy_SMCSVM= performCrossValidation(dataset.data,dataset.target,num_folds,clf)

-------------Performing 4-fold cross validation------------------ 

 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=112), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=112), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=224), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=224), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.7725e+02 -3.2150e+03  7e+03  2e-01  3e-15
 1:  1.2296e+02 -2.6228e+02  4e+02  4e-03  3e-15
 2:  1.6401e+01 -3.4242e+01  5e+01  2e-16  3e-15
 3: -4.5145e-01 -7.5074e+00  7e+00  2e-16  1e-15
 4: -2.0053e+00 -3.1713e+00  1e+00  2e-16  4e-16
 5: -2.1832e+00 -2.4797e+00  3e-01  2e-16  3e-16
 6: -2.2328e+00 -2.3360e+00  1e-01  2e-16  3e-16
 7: -2.2567e+00 -2.2872e+00  3e-02  2e-16  2e-16
 8: -2.2645e+00 -2.2713e+00  7e-03  2e-16  3e-16
 9: -2.2667e+00 -2.2672e+00  6e-04  2e-16  3e-16
10: -2.2669e+00 -2.2669e+00  2e-05  2e-16  2e-16
11: -2.2669e+00 -2.2669e+00  7e-07  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=38), HTML(value='')))



 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=112), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=112), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=224), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=224), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.7911e+02 -3.1474e+03  6e+03  2e-01  3e-15
 1:  1.2525e+02 -2.6303e+02  4e+02  4e-03  3e-15
 2:  1.6615e+01 -3.4654e+01  5e+01  2e-16  3e-15
 3: -4.4103e-01 -7.5792e+00  7e+00  2e-16  1e-15
 4: -2.0144e+00 -3.2042e+00  1e+00  2e-16  5e-16
 5: -2.2105e+00 -2.4946e+00  3e-01  2e-16  3e-16
 6: -2.2600e+00 -2.3590e+00  1e-01  2e-16  2e-16
 7: -2.2831e+00 -2.3139e+00  3e-02  2e-16  3e-16
 8: -2.2920e+00 -2.2971e+00  5e-03  2e-16  3e-16
 9: -2.2938e+00 -2.2941e+00  3e-04  2e-16  3e-16
10: -2.2939e+00 -2.2939e+00  1e-05  2e-16  3e-16
11: -2.2939e+00 -2.2939e+00  4e-07  2e-16  3e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=38), HTML(value='')))



 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=113), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=113), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=226), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=226), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.7954e+02 -3.2067e+03  6e+03  2e-01  3e-15
 1:  1.2563e+02 -2.5439e+02  4e+02  4e-03  3e-15
 2:  1.6174e+01 -3.4044e+01  5e+01  2e-16  3e-15
 3: -4.9193e-01 -7.4777e+00  7e+00  2e-16  1e-15
 4: -2.0415e+00 -3.0341e+00  1e+00  2e-16  5e-16
 5: -2.2102e+00 -2.4830e+00  3e-01  1e-16  3e-16
 6: -2.2585e+00 -2.3456e+00  9e-02  2e-16  3e-16
 7: -2.2799e+00 -2.3043e+00  2e-02  2e-16  3e-16
 8: -2.2855e+00 -2.2913e+00  6e-03  2e-16  3e-16
 9: -2.2872e+00 -2.2879e+00  7e-04  2e-16  3e-16
10: -2.2874e+00 -2.2874e+00  4e-05  2e-16  3e-16
11: -2.2874e+00 -2.2874e+00  8e-07  2e-16  3e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=37), HTML(value='')))



 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=113), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=113), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=226), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=226), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.7066e+02 -3.2061e+03  7e+03  2e-01  3e-15
 1:  1.1862e+02 -2.6439e+02  4e+02  5e-03  3e-15
 2:  1.6425e+01 -3.4084e+01  5e+01  2e-16  3e-15
 3: -3.9568e-01 -7.4279e+00  7e+00  2e-16  2e-15
 4: -1.9957e+00 -2.9487e+00  1e+00  2e-16  5e-16
 5: -2.1622e+00 -2.4241e+00  3e-01  1e-16  3e-16
 6: -2.2118e+00 -2.2939e+00  8e-02  2e-16  2e-16
 7: -2.2333e+00 -2.2549e+00  2e-02  2e-16  2e-16
 8: -2.2380e+00 -2.2433e+00  5e-03  2e-16  2e-16
 9: -2.2395e+00 -2.2402e+00  6e-04  2e-16  3e-16
10: -2.2397e+00 -2.2398e+00  5e-05  2e-16  3e-16
11: -2.2397e+00 -2.2397e+00  8e-07  2e-16  3e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=37), HTML(value='')))


performCrossValidation took 4.980 seconds


In [12]:
print(pt)

+---------+----------+---------------+----------+-------+-------+--------+
| dataset | # points | # attributess | #classes | 1-v-r | 1-v-1 | SMCSVM |
+---------+----------+---------------+----------+-------+-------+--------+
+---------+----------+---------------+----------+-------+-------+--------+


In [13]:
clf = svm.SVC(C,gamma=0.78, decision_function_shape='ovo',kernel='rbf')
clf.fit(X_train, y_train) 
predictedLabels=clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix

col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10,0,0
1,0,9,0
2,0,0,11


In [14]:
mean_error_OVO, mean_accuracy_OVO = performCrossValidation(dataset.data,dataset.target,num_folds,clf)

-------------Performing 4-fold cross validation------------------ 
performCrossValidation took 0.009 seconds


In [15]:
lin_clf = svm.LinearSVC(C=C,multi_class='ovr',max_iter=10**5)
lin_clf.fit(X_train, y_train) 
predictedLabels= lin_clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix

col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10,0,0
1,0,9,0
2,0,0,11


In [16]:
mean_error_OVR, mean_accuracy_OVR = performCrossValidation(dataset.data,dataset.target,num_folds,lin_clf)

-------------Performing 4-fold cross validation------------------ 
performCrossValidation took 0.604 seconds


In [17]:
results.append(mean_error_OVR)
results.append(mean_error_OVO)
results.append(mean_error_SMCSVM)
pt.add_row(results)

### Testing the Algorithm on Wine Dataset

In [18]:
results = []
dataset = datasets.load_wine()
results.append('WINE')
results.append(dataset.data.shape[0])
results.append(dataset.data.shape[1])
results.append(len(dataset.target_names))
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size = 0.2, random_state = 42)

In [19]:
kernel = 'rbf'
degree = 1
sigma = 0.8
C = 10

In [20]:
clf = SMCSVM(C,kernel,degree,sigma)
clf.fit(X_train,y_train)
predictedLabels = clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix


 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=284), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=284), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.4365e+03 -5.3354e+03  7e+03  3e-17  2e-15
 1:  1.6906e+02 -4.9804e+02  7e+02  2e-16  2e-15
 2: -3.1474e+01 -1.2204e+02  9e+01  2e-16  7e-16
 3: -4.6698e+01 -5.4354e+01  8e+00  2e-16  3e-16
 4: -4.6946e+01 -4.7056e+01  1e-01  2e-16  2e-16
 5: -4.6946e+01 -4.6948e+01  1e-03  2e-16  2e-16
 6: -4.6946e+01 -4.6946e+01  1e-05  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=36), HTML(value='')))




col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,12,3,1
1,0,11,2
2,2,0,5


In [21]:
num_folds =4
mean_error_SMCSVM, mean_accuracy_SMCSVM=performCrossValidation(dataset.data,dataset.target,num_folds,clf)

-------------Performing 4-fold cross validation------------------ 

 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=133), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=133), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=266), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=266), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.3452e+03 -4.9863e+03  6e+03  3e-17  2e-15
 1:  1.5827e+02 -4.6574e+02  6e+02  1e-16  2e-15
 2: -2.9367e+01 -1.1409e+02  8e+01  2e-16  8e-16
 3: -4.3619e+01 -5.0787e+01  7e+00  2e-16  3e-16
 4: -4.3852e+01 -4.3955e+01  1e-01  2e-16  2e-16
 5: -4.3852e+01 -4.3853e+01  1e-03  2e-16  2e-16
 6: -4.3852e+01 -4.3852e+01  1e-05  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))



 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=133), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=133), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=266), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=266), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.3454e+03 -4.9814e+03  6e+03  3e-17  2e-15
 1:  1.5814e+02 -4.6586e+02  6e+02  2e-16  2e-15
 2: -2.9453e+01 -1.1417e+02  8e+01  2e-16  7e-16
 3: -4.3693e+01 -5.0854e+01  7e+00  2e-16  3e-16
 4: -4.3925e+01 -4.4028e+01  1e-01  2e-16  2e-16
 5: -4.3925e+01 -4.3926e+01  1e-03  2e-16  2e-16
 6: -4.3925e+01 -4.3925e+01  1e-05  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))



 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=134), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=134), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=268), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=268), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.3555e+03 -5.0339e+03  6e+03  3e-17  2e-15
 1:  1.5951e+02 -4.6977e+02  6e+02  2e-16  2e-15
 2: -2.9696e+01 -1.1514e+02  9e+01  2e-16  7e-16
 3: -4.4058e+01 -5.1282e+01  7e+00  2e-16  3e-16
 4: -4.4293e+01 -4.4396e+01  1e-01  2e-16  2e-16
 5: -4.4293e+01 -4.4294e+01  1e-03  2e-16  2e-16
 6: -4.4293e+01 -4.4293e+01  1e-05  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=44), HTML(value='')))



 Calculating fullXmatrix


HBox(children=(IntProgress(value=0, max=134), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=134), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=268), HTML(value='')))



 Calculating Kernalized Product


HBox(children=(IntProgress(value=0, max=268), HTML(value='')))


     pcost       dcost       gap    pres   dres
 0:  1.3554e+03 -5.0014e+03  6e+03  3e-17  2e-15
 1:  1.5916e+02 -4.6891e+02  6e+02  2e-16  2e-15
 2: -2.9611e+01 -1.1486e+02  9e+01  2e-16  7e-16
 3: -4.3943e+01 -5.1152e+01  7e+00  2e-16  3e-16
 4: -4.4178e+01 -4.4281e+01  1e-01  2e-16  2e-16
 5: -4.4178e+01 -4.4179e+01  1e-03  2e-16  2e-16
 6: -4.4178e+01 -4.4178e+01  1e-05  2e-16  2e-16
Optimal solution found.

 Calculating prediction values


HBox(children=(IntProgress(value=0, max=44), HTML(value='')))


performCrossValidation took 7.098 seconds


In [22]:
clf = svm.SVC(C,gamma=0.001, decision_function_shape='ovo',kernel='rbf')
clf.fit(X_train, y_train) 

predictedLabels=clf.predict(X_test)

conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix

col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,12,0,1
1,0,12,1
2,2,2,6


In [23]:
mean_error_OVO, mean_accuracy_OVO = performCrossValidation(dataset.data,dataset.target,num_folds,clf)

-------------Performing 4-fold cross validation------------------ 
performCrossValidation took 0.043 seconds


In [24]:
lin_clf = svm.LinearSVC(C=C,multi_class='ovr',max_iter=10**5)
lin_clf.fit(X_train, y_train) 
predictedLabelVector = lin_clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabelVector).ravel(),y_test.reshape(-1,1).ravel())
conMatrix



col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,13,0,0
1,1,14,0
2,0,0,8


In [25]:
mean_error_OVR, mean_accuracy_OVR = performCrossValidation(dataset.data,dataset.target,num_folds,lin_clf)

-------------Performing 4-fold cross validation------------------ 




performCrossValidation took 2.223 seconds




In [26]:
results.append(mean_error_OVR)
results.append(mean_error_OVO)
results.append(mean_error_SMCSVM)
pt.add_row(results)

In [27]:
path = './test_results.txt'
with open(path, 'a+') as thefile:
        thefile.write(pt.get_string())

### testing 

In [30]:
dataset=datasets.load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size = 0.2, random_state = 42)

In [31]:
dataset

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990842, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06832974, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286377, -0.02593034],
        ..., 
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04687948,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452837, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00421986,  0.00306441]]),
 'target': array([ 151.,   75.,  141.,  206.,  135.,   97.,  138.,   63.,  110.,
         310.,  101.,   69.,  179.,  185.,  118.,  171.,  166.,  144.,
          97.,  168.,   68.,   49.,   68.,  245.,  184.,  202.,  137.,
          85.,  131.,  283.,  129.,   59.,  341.,   87.,   65.,  102.,
         265.,  276.,  252.,   90.,  100.,   55.,   61.,   92.,  259.,

In [None]:
kernel = 'rbf'
degree = 1
sigma = 0.8
C = 10

In [None]:
clf = SMCSVM(C,kernel,degree,sigma)
clf.fit(X_train,y_train)
predictedLabels = clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix

In [None]:
num_folds =4
performCrossValidation(dataset.data,dataset.target,num_folds,clf)

In [None]:
clf = svm.SVC()
clf.fit(X_train, y_train) 

predictedLabels=clf.predict(X_test)

conMatrix = pd.crosstab(np.array(predictedLabels).ravel(),y_test.reshape(-1,1).ravel())
conMatrix

In [None]:
performCrossValidation(dataset.data,dataset.target,num_folds,clf)

In [None]:
lin_clf = svm.LinearSVC(C=C,multi_class='ovr',max_iter=10**5)
lin_clf.fit(X_train, y_train) 
predictedLabelVector = lin_clf.predict(X_test)
conMatrix = pd.crosstab(np.array(predictedLabelVector).ravel(),y_test.reshape(-1,1).ravel())
conMatrix

In [None]:
performCrossValidation(dataset.data,dataset.target,num_folds,lin_clf)