# SVM Objective
$\min_{w \in \mathbb{R}^p, b \in \mathbb{R}} F(w,b)$ where $F(w,b) := \frac{1}{n} \sum_{i=1}^{n} \max\left \{ 1 - y_i(< w,x_i >+ b), 0 \right \}{}$

## SGD
$\text{let} \space \lambda = \frac{1}{C}$

$\nabla_{w} \tilde{F}(w_k, b_k) = \frac{1}{|B_k|} \sum_{r \in B_k}
\begin{cases}
 -y_rx_r \qquad \text{if} \space y_i < w_k,x_r> +b_k \leq 1  \\ 
 0 \space \qquad\quad\space\space \text{o.w.}
\end{cases} + \lambda w_k$  

$\nabla_{b} \tilde{F}(w_k, b_k) = \frac{1}{|B_k|} \sum_{r \in B_k}
\begin{cases}
 -y_r \qquad \text{if} \space y_i < w_k,x_r> +b_k \leq 1  \\ 
 0 \space \quad\quad\space\space\space \text{o.w.}
\end{cases}$

### Update weight
$w_{k+1} \leftarrow w_k - \eta \nabla_w \tilde{F}(w_k,b_k)$  
$b_{k+1} \leftarrow b_k - \eta \nabla_b \tilde{F}(w_k,b_k)$


# Load data

In [0]:
random_state = 1126

## Iris dataset

In [0]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test =\
    train_test_split(X, y, test_size=0.3, random_state=random_state, stratify=y)

## Wine dataset

In [0]:
from sklearn.datasets import load_wine

data = load_wine()
X = data.data
y = data.target

X_train, X_test, y_train, y_test =\
    train_test_split(X, y, test_size=0.3, random_state=random_state, stratify=y)

## MNIST dataset

In [0]:
import struct
import numpy as np
from sklearn.model_selection import train_test_split

fname_img = "/content/drive/My Drive/2019/2019-2/인공지능/PBL2/data/newtrain-images-idx3-ubyte"
fname_lbl = "/content/drive/My Drive/2019/2019-2/인공지능/PBL2/data/newtrain-labels-idx1-ubyte"

with open(fname_lbl, 'rb') as flbl:
        magic, num = struct.unpack(">II", flbl.read(8))
        y = np.fromfile(flbl, dtype=np.int8)

with open(fname_img, 'rb') as fimg:
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        X = np.fromfile(fimg, dtype=np.uint8).reshape(len(y), -1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=random_state)

# SVC class

In [0]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from math import ceil

In [0]:
class SVC(BaseEstimator, ClassifierMixin):
    def __init__(self, eta=0.1, max_iter=50, C=0.1,
                 shuffle=True, randome_state=1, batch_size=32):
        self.eta = eta
        self.max_iter = max_iter
        self.C = C
        self.lambda_ = 1.0 / C
        self.shuffle = shuffle
        self.random_state = random_state
        self.batch_size = batch_size
    
    def fit(self, X, y):
        self.classes_, y = np.unique(y, return_inverse=True)
        num_class, p = len(self.classes_), X.shape[1]
        self._initialize_weights(num_class, p)
        
        r = np.arange(X.shape[0])

        for k in range(self.max_iter):
            if self.shuffle:
                self.rgen.shuffle(r)

            for i in range(ceil(X.shape[0] / self.batch_size)):
                batch_r = r[self.batch_size * i : self.batch_size * (i + 1)]
                sum_w = np.zeros((num_class, p))
                sum_b = np.zeros(num_class)

                for idx in batch_r:
                    xi = X[idx]
                    yi = -1 * np.ones(num_class)
                    yi[y[idx]] = 1

                    conf = yi * (np.dot(self.w_, xi) + self.b_)
                    conf_idx = np.where(conf <= 1)

                    yt = yi.reshape(yi.shape[0], -1)
                    xt = xi.reshape(-1, xi.shape[0])

                    sum_w[conf_idx] -= np.dot(yt, xt)[conf_idx]
                    sum_b[conf_idx] -= yi[conf_idx]

                # Update
                self.w_ = self.w_ - self.eta *\
                            (sum_w / len(batch_r) + self.lambda_ * self.w_)
                self.b_ = self.b_ - self.eta * sum_b / len(batch_r)
    
            if k % 10 == 0:
                print(f'Iteration {k + 1} / {self.max_iter}')

        return self

    def _initialize_weights(self, n_class, p):
        """
        Initialize weights to small random numbers.
        """
        self.rgen = np.random.RandomState(self.random_state)
        self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=(n_class, p))
        self.b_ = np.zeros(n_class)

    def predict(self, X):
        dist = np.dot(X, self.w_.T) + self.b_
        pred = np.argmax(dist, axis=1)

        return self.classes_[pred]


In [119]:
from sklearn.metrics import accuracy_score
from datetime import datetime

# classifier class
clf = SVC(max_iter=50, eta=0.001, C=10, randome_state=random_state)

start_time = datetime.now()
clf.fit(X_train, y_train)
end_time = datetime.now()

y_pred = clf.predict(X_test)

score = accuracy_score(y_test, y_pred)
print('learning time:', end_time - start_time)
print('accuracy:', score)

Iteration 1 / 50
Iteration 11 / 50
Iteration 21 / 50
Iteration 31 / 50
Iteration 41 / 50
learning time: 0:01:45.708174
accuracy: 0.8659166666666667
