In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets

import warnings
warnings.filterwarnings('ignore')

# Подготовка датасета

## Выбор двух классов: versicolor и virginica

In [2]:
iris = datasets.load_iris()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
target = pd.DataFrame(iris.target)
target.rename(columns={0: 'target'}, inplace=True)
data = data.merge(target, how='inner', left_index=True, right_index=True)
data_vv = data[data['target'] >= 1]
data_vv.target = data_vv.target.replace({1: 'versicolor', 2: 'virginica'})

data_to_regression = data_vv.copy()

In [3]:
y = data_to_regression.target
del data_to_regression['target']

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [5]:
le.fit(y)

In [6]:
y_ = np.array(pd.Series(data=le.transform(y)))

Получаем готовый датасет для дальнейшей работы.  
Его характеристики: 100 строк, 4 параметра.  
Отдельно выделяем Y (предсказание)

In [7]:
data_to_regression.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
50,7.0,3.2,4.7,1.4
51,6.4,3.2,4.5,1.5
52,6.9,3.1,4.9,1.5
53,5.5,2.3,4.0,1.3
54,6.5,2.8,4.6,1.5


In [8]:
X = np.array(data_to_regression)

## Создаем класс Логистической регрессии  
Исходя из определения логистической регрессии (линейная регрессия + сигмоида)

In [22]:
class LinearRegression:

    def __init__(self):
        self.w = None
        self.lr = .1
        self.eps = .0001
        self.max_iter = 100
        self.X = None

    def _X_train_prep(self, X_):
        self.X = np.column_stack([X_, np.ones(X_.shape[0])])

    def _x_test_prep_(self, X):
        return np.column_stack([X, np.ones(X.shape[0])])

    def _mse(self, y):
        y_ = self.X @ self.w
        return np.sum((y - y_) ** 2) / len(y_)

    def _gr_mse(self, y):
        y_ = self.X @ self.w
        return 2 / len(self.X) * (y - y_) @ (-self.X)

    def _init_w(self):
        return np.zeros(self.X.shape[1])

    def fit(self, X, y):
        X = X.copy()
        self._X_train_prep(X)

        self.w = self._init_w()

        next_w = self.w

        for _ in range(self.max_iter):
            cur_w = next_w

            next_w = cur_w - self.lr * self._gr_mse(y)
            self.w = cur_w

            if np.linalg.norm(cur_w - next_w, ord=2) <= self.eps:
                break

    def get_coefs(self):
        return self.w[:-1]

    def get_w0(self):
        return self.w[-1]

    def predict(self, X):
        x = self._x_test_prep_(X)
        return x @ self.w


class LogRegression(LinearRegression):

    def __init__(self):
        super().__init__()
        self.type = 'grad'

    def _gr_log_loss(self, y):
        y_prob = self._sigmoid(self.X @ self.w)
        return self.X.T @ (y_prob - y)
    
    def _sgr_log_loss(self, y):
        

    def fit(self, X, y):
        X = X.copy()
        self._X_train_prep(X)

        self.w = self._init_w()

        next_w = self.w

        for _ in range(self.max_iter):
            cur_w = next_w
            
            if self.type = 'grad'
                next_w = cur_w - self.lr * self._gr_log_loss(y)
                self.w = cur_w

            if np.linalg.norm(cur_w - next_w, ord=2) <= self.eps:
                break

    @staticmethod
    def _sigmoid(L):
        return 1 / (1 + np.exp(-L))

    def predict_proba(self, X):
        X = self._x_test_prep_(X)
        return self._sigmoid(X @ self.w)

    def predict(self, X):
        x = self.predict_proba(X)
        return np.where(x > .5, 1, 0)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_, test_size=.2, random_state=42)

In [34]:
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score, roc_auc_score, recall_score

In [23]:
log_reg = LogRegression()

In [24]:
log_reg.fit(X_train, y_train)

In [26]:
mean_squared_error(y_true=y_test, y_pred=log_reg.predict(X_test))

0.25

In [30]:
accuracy_score(y_true=y_test, y_pred=log_reg.predict(X_test))

0.75

In [33]:
f1_score(y_true=y_test, y_pred=log_reg.predict(X_test))

0.6153846153846154

In [36]:
roc_auc_score(y_test, log_reg.predict(X_test))

0.7083333333333334

In [37]:
recall_score(y_test, log_reg.predict(X_test))

0.5