# Логистическая регрессия

In [41]:
import numpy as np

class LogisticRegression():
    def __init__(self, learning_rate=0.001, iter=10000):
        self.learning_rate = learning_rate
        self.iter = iter
        self.weights = None
        self.bias = None

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        n_samples, n_features = X.shape

        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.iter):
            linear_model = np.dot(X, self.weights) + self.bias

            y_pred = self.sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_model)
        y_pred_class = [1 if i > 0.5 else 0 for i in y_pred]
        return np.array(y_pred_class)
    
    def calc_metric(self, y_true, y_pred):
        tp = np.sum((y_true == 1) & (y_pred == 1))
        fp = np.sum((y_true == 0) & (y_pred == 1))
        fn = np.sum((y_true == 1) & (y_pred == 0))

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        return precision, recall, f1_score



In [42]:
from sklearn import datasets

X, y = datasets.make_classification(
        n_samples=1000, 
        n_features=2,
        n_redundant=0,
        n_informative=2,
        random_state=41, 
        n_clusters_per_class=2)

print(X)
print(y)

[[-0.39725923 -1.28383503]
 [ 0.20136741  1.89272271]
 [ 0.67854907  0.90065516]
 ...
 [ 2.19074786 -0.32019593]
 [ 0.04553532  0.95589097]
 [ 2.71922675 -0.89232744]]
[0 1 0 0 1 1 1 0 1 0 1 1 0 1 1 0 1 1 0 0 1 0 0 1 0 1 1 1 1 0 0 1 1 0 1 1 1
 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 1 1 1 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1
 0 1 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 1 0 0 1 0 1 1 0 0 0 1 1 1 1 0 0 1 1 0 0
 0 0 1 1 0 0 1 0 0 1 1 1 0 1 1 1 0 1 1 0 0 0 1 0 1 0 1 1 0 0 0 1 1 1 0 0 0
 1 0 0 1 0 1 0 0 0 0 1 0 1 1 1 1 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1 0 1 0 1 1 1
 1 0 1 0 1 1 0 0 0 0 1 0 0 0 1 1 0 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 1 0 1 0
 1 1 0 1 0 1 0 1 1 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 0 1 0 1 0 1 0
 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1 1 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 1 1 1
 0 1 0 0 1 0 1 1 0 1 0 1 1 1 1 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 1
 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 1 0 0 0 0 0 1 1 1 1 0 0 1 0 1 1 1 0 0 1 0 0
 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 1 1 0 0 0 1 0 0 0 1 1 0
 0 1 0 

In [43]:
model = LogisticRegression()

model.fit(X, y)
y_pred = model.predict(X)

precision, recall, f1_score = model.calc_metric(y, y_pred)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1_score)

Precision:  0.8150807899461401
Recall:  0.9098196392785571
F1 Score:  0.8598484848484849


In [44]:
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

sklearn_model = SklearnLogisticRegression()
sklearn_model.fit(X, y)

y_pred_sklearn = sklearn_model.predict(X)

precision_sklearn = precision_score(y, y_pred_sklearn)
recall_sklearn = recall_score(y, y_pred_sklearn)
f1_score_sklearn = f1_score(y, y_pred_sklearn)

print("Sklearn Precision: ", precision_sklearn)
print("Sklearn Recall: ", recall_sklearn)
print("Sklearn F1 Score: ", f1_score_sklearn)

Sklearn Precision:  0.8278985507246377
Sklearn Recall:  0.9158316633266533
Sklearn F1 Score:  0.8696479543292103
