# Логическая регрессия
Рассмотрим логистическую регрессию на примере набора данных цветов ириса.
Для простоты будем использовать только два признака Sepal Width и Sepal Length, а также два класса Setosa и Versicolor. Итак, наша задача построить разделяющую границу (decision boundary), которая бы позволила нам отделить наблюдения одного класса (Setosa) от другого (Versicolor). Мы хотим построить модель, которая позволит нам прогнозировать бинарный отклик, а точнее вероятность отнесения наблюдения к одному из классов.

In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split


class LogisticRegression:
    
    def __init__(self, alpha=0.01, max_iter=1000):
        self.alpha = alpha
        self.max_iter = max_iter

    def fit(self, X_train, y_train):
        X_train = X_train.tolist()
        y_train = y_train.tolist()
        
        X_matrix = []
        for i in range(len(X_train)):
            X_train[i].insert(0, 1)
            X_matrix.append(X_train[i])
        X_as_matrix = np.asmatrix(X_matrix)
        
        y_matrix = []
        for i in range(len(y_train)):
            y_matrix.append([y_train[i]])
            
        m = len(y_train)
        self.theta = np.zeros((len(X_matrix[0]), 1))

        for i in range(self.max_iter):
            self.theta -= self.alpha * (1 / m) * (np.matmul(X_as_matrix.T,(np.matmul(X_matrix, self.theta) - y_matrix)))

        theta_all = np.matrix.tolist(self.theta)
        self.coef_ = theta_all[1:]
        self.intercept_ = theta_all[0]
        return theta_all
    
    def predict(self, X_test):
        X_test = X_test.tolist()
        X_matrix_test = []
        for i in range(len(X_test)):
            X_test[i].insert(0, 1)
            X_matrix_test.append(X_test[i])
        z = np.matmul(X_matrix_test, self.theta)
        answers = 1/(1 + math.e **(-z))
        return answers

    def predict_proba(self, X_test):
        
        X_test = X_test.tolist()
        X_matrix_test = []
        for i in range(len(X_test)):
            X_test[i].insert(0, 1)
            X_matrix_test.append(X_test[i])
        z = np.matmul(X_matrix_test, self.theta)
        answers = np.round(1/(1 + math.e **(-z)))
        return answers

In [2]:
df = datasets.load_iris()
X = df.data[:100, :2]
y = df.target[:100]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=18)
model = LogisticRegression()
model.fit(X_train, y_train)
result = model.fit(X_train, y_train)
answers = model.predict(X_test)
print(answers)
answers = model.predict_proba(X_test)
print(answers)

[[0.73387368]
 [0.51854271]
 [0.5178275 ]
 [0.76087074]
 [0.46815018]
 [0.52104188]
 [0.52889292]
 [0.5703133 ]
 [0.48385427]
 [0.70262178]
 [0.52960663]
 [0.66426802]
 [0.78423862]
 [0.5811485 ]
 [0.74134998]
 [0.50280953]
 [0.79725872]
 [0.71560832]
 [0.48850225]
 [0.69387922]
 [0.55870415]
 [0.69721268]
 [0.72397716]
 [0.57661273]
 [0.5178275 ]
 [0.55552691]
 [0.72197186]
 [0.5020934 ]
 [0.76034915]
 [0.73022404]
 [0.5703133 ]
 [0.49172024]
 [0.72197186]]
[[1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]]


In [3]:
model.coef_, model.intercept_

([[0.4436033825235975], [-0.601019905805465]], [-0.04310699544373814])