In [1]:

import numpy
class SVC:
    def __init__(self,C=1.0):
        self.C=C
        self.W_=0
        self.b_=0
        
    def hingeLoss(self,X,Y,W,b):
        loss=0.5*numpy.dot(W,W.T)
        
        m=X.shape[0]
        
        for i in range(m):
            ti=Y[i]*(numpy.dot(W,X[i].T)+b)
            loss+=self.C*max(0,1-ti)
            
        return loss[0][0]
    
    def fit(self,X,Y,batch_size=120,learning_rate=0.001,max_itr=400):
        n=X.shape[1] # no. of features
        m=X.shape[0] # no. of samplesimage_data,labels
        
        W=numpy.zeros((1,n))
        b=0
        
        #training
        losses=[]
        
        for _ in range(max_itr):
            
            l=self.hingeLoss(X,Y,W,b)
            losses.append(l)
            
            #ids for mini batch
            ids=numpy.arange(m)
            numpy.random.shuffle(ids)
             
            #mini-batch gradient descent
            for batch_start in range(0,m,batch_size):
                gradw=0
                gradb=0
                for j in range(batch_start,batch_start+batch_size):
                    if j<m:
                        i=ids[j]
                        ti=Y[i]*(numpy.dot(W,X[i].T)+b)

                        if ti>1:
                            gradw+=0
                            gradb+=0
                        else:
                            gradw+=self.C*X[i]*Y[i]
                            gradb+=self.C*Y[i]
                
                W= W - learning_rate*(W - gradw)
                b= b + learning_rate*gradb
            
        self.W_=W
        self.b_=b
            
        return self.W_,self.b_,losses


In [52]:

from sklearn.preprocessing import LabelEncoder
import MySVM
import numpy as np

class OneVsOneSVM:
    '''
    Parameters
    ----------
    C : float, optional (default=1.0)
    Penalty parameter C of the error term.
    
    max_iter : int, optional (default=6000)
    
    learning_rate : float, optional (default=0.00001)
    
    Callable Functions
    ------------------
    
    fit(X,Y) : take data as X, target_labels as Y as input and trains the SVM model
    
    score(X,Y) : take data as X, target_labels as Y as input and returns the accuracy of the model
    
    '''
    def __init__(self,C=1.0,max_iter=6000,learning_rate=0.00001):
        self.max_iter=max_iter
        self.learning_rate=learning_rate
        self.C=C
        self.svm_classifiers={}
        
    def generateClasswiseData(self,X,Y):
        data={}

        no_of_classes=len(np.unique(Y))
        no_of_samples=X.shape[0]

        for i in range(no_of_classes):
            data[i]=[]

        for i in range(no_of_samples):
            data[Y[i]].append(X[i])

        for k in range(no_of_classes):
            data[k]=np.array(data[k])

        return data


    def getPairData(self,d1,d2):

        l1=d1.shape[0]
        l2=d2.shape[0]
        data=np.zeros((l1+l2,d1.shape[1]))
        labels=np.zeros(l1+l2)

        data[:l1]=d1
        data[l1:]=d2

        labels[:l1]=1
        labels[l1:]=-1

        return data,labels

    def fit(self,X,Y):
        global le
        le=LabelEncoder()
        le.fit(Y)
        Y=le.transform(Y)
        
        data=self.generateClasswiseData(X,Y)
        svc=MySVM.SVC(self.C)
        for i in range(len(data)):
            self.svm_classifiers[i]={}
            for j in range(i+1,len(np.unique(Y))):
                x,y=self.getPairData(data[i],data[j])
                wts,b,losses=svc.fit(x,y, learning_rate=self.learning_rate, max_itr=self.max_iter)
                self.svm_classifiers[i][j]=(wts,b)      

    def predict(self,X):
        X=np.array(X)
        classes=len(self.svm_classifiers)
        count=np.zeros(classes,)
        for i in range(classes):
            for j in range(i+1,classes):
                W = self.svm_classifiers[i][j][0]
                b = self.svm_classifiers[i][j][1]
                if (np.dot(W,X.T)+ b)>=0:
                    count[i]+=1
                else:
                    count[j]+=1

        index=np.argmax(count)
        return le.inverse_transform([index])

    def score(self,X,Y):
        count=0
        for i in range(X.shape[0]):
            if Y[i]==self.predict(X[i]):
                count+=1

        return count/X.shape[0]


In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import math

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import wordnet

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import chi2
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [54]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_20newsgroups

# 1. Load dataset
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
newsgroups = fetch_20newsgroups(subset='all', categories=categories)

# 2. Preprocess text and vectorize using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(newsgroups.data).toarray()

# 3. Encode labels
y = newsgroups.target

# 4. Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Initialize and fit the OneVsOneSVM model
ovo_SVM = OneVsOneSVM(C=1.0, max_iter=6000, learning_rate=0.00001)
ovo_SVM.fit(X_train, y_train)

# 6. Evaluate the model
accuracy = ovo_SVM.score(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 83.51%


In [55]:
# Assuming you have already trained the OneVsOneSVM model as shown in the previous example.

# Custom text input
custom_text = "The latest advances in AI have revolutionized the tech industry."

# 1. Preprocess and Vectorize the Custom Text
custom_text_vectorized = vectorizer.transform([custom_text]).toarray()

# 2. Predict the category
predicted_category = ovo_SVM.predict(custom_text_vectorized)

# 3. Convert the predicted numerical label back to the original category name
predicted_category_name = newsgroups.target_names[predicted_category[0]]

print(f"Predicted Category: {predicted_category_name}")


Predicted Category: sci.med
