In [1]:
import pandas as pd 
import numpy as np 
from sklearn.datasets import make_classification

In [2]:
X, y = make_classification(n_samples=400, n_features=4)
X = pd.DataFrame(X, columns=['f1', 'f2', 'f3', 'f4'])
y = pd.Series(y)

Нужно реализовать метод `predict()`

In [11]:
class MySVM():
    def __init__(self, n_iter=10, learning_rate=0.001, weights=None, b=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.__weights = weights
        self.__b = b # отступ гиперплоскости
        
    def __repr__(self):
        return f'MySVM class: n_iter={self.n_iter}, learning_rate={self.learning_rate}'
        
    def __calculate_loss(self, X, y):
        '''Cчитает LOSS'''
        scores = np.dot(X, self.__weights) + self.__b
        loss = np.dot(self.__weights, self.__weights) + np.sum(np.maximum(0, 1 - y * scores)) / len(y)
        return loss

    def __log_report(self, X, y, iteration):
        '''Выводит логи'''
        if iteration == 0:
            print(f'start | loss: {self.__calculate_loss(X, y).round(2)}')
        else:
            print(f'{iteration} | loss: {self.__calculate_loss(X, y).round(2)}')
        
    def fit(self, X:pd.DataFrame, y:pd.Series, verbose=False):
        X_copy = X.copy().to_numpy()
        y_copy = y.replace(0, -1).to_numpy() # меняем метки таргетов {0, 1} -> {-1, 1}
        self.__weights = np.ones(X_copy.shape[1])
        self.__b = 1
        # Начнем цикл обчения
        for j in range(self.n_iter):      
            for i in range(X_copy.shape[0]): 
                if y_copy[i]*(np.dot(self.__weights, X_copy[i]) + self.__b) >= 1: 
                    grad_w = 2 * self.__weights
                    grad_b = 0 
                else:
                    grad_w = 2*self.__weights - y_copy[i]*X_copy[i]
                    grad_b = -y_copy[i]

                self.__weights -= self.learning_rate * grad_w
                self.__b -= self.learning_rate * grad_b

            if verbose and (j % 10 == 0 or j == self.n_iter-1):
                self.__log_report(X_copy, y_copy, j)

    def predict(self, X:pd.DataFrame):
        y = pd.Series(np.sign(np.dot(X, self.__weights) + self.__b))
        y = y.replace(-1, 0)
        return y.to_numpy().astype(int)

    def get_coef(self):
        return self.__weights, self.__b
        

In [14]:
model = MySVM(n_iter=100)
model.fit(X, y, verbose=True)

start | loss: 1.66
10 | loss: 0.66
20 | loss: 0.66
30 | loss: 0.66
40 | loss: 0.66
50 | loss: 0.66
60 | loss: 0.66
70 | loss: 0.66
80 | loss: 0.66
90 | loss: 0.66
99 | loss: 0.66


In [16]:
model.predict(X)

array([0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1,

In [18]:
model.predict(X.iloc[0:5])

array([0, 1, 1, 1, 0])

In [20]:
y.iloc[0:5].to_list()

[0, 1, 1, 1, 0]

In [22]:
model.get_coef()

(array([-0.19043469, -0.15463078, -0.04401787,  0.30181713]),
 0.12399999999999911)