# Binary SVC

In [0]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from math import ceil

class BinarySVC(BaseEstimator, ClassifierMixin):
    def __init__(self, eta=0.1, max_iter=50, C=0.1,
                 shuffle=False, random_state=1, batch_size=32):
        self.eta = eta
        self.max_iter = max_iter
        self.C = C
        self.shuffle = shuffle
        self.random_state = random_state
        self.batch_size = batch_size
    
    def fit(self, X, y):
        """SGD algorithm (mini-batch)"""
        self._initialize_weights(X.shape[1])

        for k in range(self.max_iter):
            if self.shuffle:
                X, y = self._shuffle(X, y)
            
            for i in range(ceil(X.shape[0] / self.batch_size)):
                X_batch = X[self.batch_size * i : self.batch_size * (i + 1)]
                y_batch = y[self.batch_size * i : self.batch_size * (i + 1)]

                sum_w = np.zeros(X.shape[1])
                sum_b = 0.0

                for xi, yi in zip(X_batch, y_batch):
                    conf_lv = 1 - yi * (np.dot(self.w_, xi) + self.b_)
                    if conf_lv > 0:
                        sum_w -= yi * xi
                        sum_b -= yi
                    else:
                        sum_w -= 0.0
                        sum_b -= 0.0

                self.w_ = self.w_ - self.eta * sum_w / len(y_batch)
                self.b_ = self.b_ - self.eta * sum_b / len(y_batch)
            if k % 10 == 0:
                print(f'Iteration {k + 1} / {self.max_iter}')

        return self

    def _initialize_weights(self, m):
        """Initialize weights to small random numbers"""
        self.rgen = np.random.RandomState(self.random_state)
        self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=m)
        self.b_ = 0

    def _shuffle(self, X
                 , y):
        """Shuffle training data"""
        r = np.random.permutation(X.shape[0])
        return X[r], y[r]
    
    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_) + self.b_

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.net_input(X) >= 0.0, 1, -1)

## Test wine quality dataset

test binary classification

In [2]:
import pandas as pd

wine_url = '/content/winequality.csv'

wine = pd.read_csv(wine_url, sep=',', index_col=0)
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,color
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


In [0]:
y = wine['color'].values
X = wine.drop(['color'], axis=1).values

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1126)

In [0]:
clf = BinarySVC(shuffle=True, max_iter=50, random_state=1126)

In [6]:
clf.fit(X_train, y_train)

Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50


BinarySVC(C=0.1, batch_size=32, eta=0.1, max_iter=50, random_state=1126,
          shuffle=True)

In [7]:
from sklearn.metrics import accuracy_score

y_pred = clf.predict(X_test)

score = accuracy_score(y_test, y_pred)
print(score)

0.94


# SVC OvA

In [0]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np

class SVC(BaseEstimator, ClassifierMixin):
    """ ovr """
    def __init__(self, eta=0.1, max_iter=50, C=0.1,
                 shuffle=False, random_state=1, batch_size=32):
        self.eta = eta
        self.max_iter = max_iter
        self.C = C
        self.shuffle = shuffle
        self.random_state = random_state
        self.batch_size = batch_size

    def fit(self, X, y):
        self.classes_, y = np.unique(y, return_inverse=True)
        self.clfs_ = []

        for target in range(len(self.classes_)):
            print(f'Classifier{target} training start')
            clf = BinarySVC(self.eta, self.max_iter, self.C,
                            self.shuffle, self.random_state, self.batch_size)

            binary_y = np.where(y == target, 1, -1)

            clf.fit(X, binary_y)
            self.clfs_.append(clf)

        return self

    def predict(self, X):
        predict_list = []
        for clf in self.clfs_:
            predict_list.append(clf.net_input(X))
        
        pred = np.argmax(predict_list, axis=0)
        return self.classes_[pred]


## Test MNIST dataset

In [0]:
import struct
import numpy as np

fname_img = "/content/newtrain-images-idx3-ubyte"
fname_lbl = "/content/newtrain-labels-idx1-ubyte"

with open(fname_lbl, 'rb') as flbl:
        magic, num = struct.unpack(">II", flbl.read(8))
        y = np.fromfile(flbl, dtype=np.int8)

with open(fname_img, 'rb') as fimg:
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        X = np.fromfile(fimg, dtype=np.uint8).reshape(len(y), -1)

In [0]:
from sklearn.model_selection import train_test_split

random_state = 1126

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=random_state)

In [19]:
from datetime import datetime

start_time = datetime.now()

clf = SVC(shuffle=True, max_iter=50, random_state=1126)
clf.fit(X_train, y_train)

end_time = datetime.now()
print('learning time:', end_time - start_time)

Classifier0 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier1 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier2 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier3 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier4 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier5 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier6 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier7 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
Classifier8 training start
Iteration 1 / 50
Iteration 11 / 50
Iteration 

In [20]:
from sklearn.metrics import accuracy_score

y_pred = clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(score)

0.8482083333333333
