In [5]:
from sklearn.datasets import make_classification
import pandas as pd 
import numpy as np 

In [6]:
X, y = make_classification(n_samples=400, n_features=4)
X = pd.DataFrame(X, columns=['f1', 'f2', 'f3', 'f4'])
y = pd.Series(y)

Мягкий зазор (разработка)

In [20]:
class MySVM():
    def __init__(self, n_iter=10, learning_rate=0.001, C=1, weights=None, b=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.C = C
        self.__weights = weights
        self.__b = b # отступ гиперплоскости
        
    def __repr__(self):
        return f'MySVM class: n_iter={self.n_iter}, learning_rate={self.learning_rate}'
        
    def __calculate_loss(self, X, y):
        '''Cчитает LOSS'''
        scores = np.dot(X, self.__weights) + self.__b
        loss = np.dot(self.__weights, self.__weights) + self.C * np.sum(np.maximum(0, 1 - y * scores)) / len(y)
        return loss

    def __log_report(self, X, y, iteration):
        '''Выводит логи'''
        if iteration == 0:
            print(f'start | loss: {self.__calculate_loss(X, y).round(2)}')
        else:
            print(f'{iteration} | loss: {self.__calculate_loss(X, y).round(2)}')
        
    def fit(self, X:pd.DataFrame, y:pd.Series, verbose=False):
        X_copy = X.copy().to_numpy()
        y_copy = y.replace(0, -1).to_numpy() # меняем метки таргетов {0, 1} -> {-1, 1}
        self.__weights = np.ones(X_copy.shape[1])
        self.__b = 1
        # Начнем цикл обчения
        for j in range(self.n_iter):      
            for i in range(X_copy.shape[0]): 
                if y_copy[i]*(np.dot(self.__weights, X_copy[i]) + self.__b) >= 1: 
                    grad_w = 2 * self.__weights
                    grad_b = 0 
                else:
                    grad_w = 2 * self.__weights - self.C * y_copy[i] * X_copy[i] 
                    grad_b = -self.C * y_copy[i]

                self.__weights -= self.learning_rate * grad_w
                self.__b -= self.learning_rate * grad_b

            if verbose and (j % 10 == 0 or j == self.n_iter-1):
                self.__log_report(X_copy, y_copy, j)

    def predict(self, X:pd.DataFrame):
        '''Метод для предсказания.'''
        y = pd.Series(np.sign(np.dot(X, self.__weights) + self.__b))
        y = y.replace(-1, 0)
        return y.to_numpy().astype(int)

    def get_coef(self):
        return self.__weights, self.__b
        


In [22]:
model = MySVM(n_iter=200, C=0.5)

In [24]:
model.fit(X, y, verbose=True)

start | loss: 1.26
10 | loss: 0.41
20 | loss: 0.4
30 | loss: 0.4
40 | loss: 0.4
50 | loss: 0.4
60 | loss: 0.39
70 | loss: 0.39
80 | loss: 0.39
90 | loss: 0.39
100 | loss: 0.39
110 | loss: 0.39
120 | loss: 0.39
130 | loss: 0.39
140 | loss: 0.39
150 | loss: 0.39
160 | loss: 0.39
170 | loss: 0.39
180 | loss: 0.39
190 | loss: 0.39
199 | loss: 0.39


In [26]:
model.predict(X)

array([0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,