# Логистическая регрессия

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings( "ignore" )
from sklearn.linear_model import LogisticRegression

In [2]:
class LogitRegression() :
	def __init__( self, learning_rate, iterations ) :		
		self.learning_rate = learning_rate		
		self.iterations = iterations
		
	def fit( self, X, Y ) :		
		self.m, self.n = X.shape		
		# инициализация веса
		self.W = np.zeros( self.n )		
		self.b = 0		
		self.X = X		
		self.Y = Y
		
		# обучение градиентному спуску	
		for i in range( self.iterations ) :			
			self.update_weights()			
		return self
	
	# Вспомогательная функция для обновления весов при градиентном спуске
	def update_weights( self ) :		
		A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )
		
		# вычисление градиентов		
		tmp = ( A - self.Y.T )		
		tmp = np.reshape( tmp, self.m )		
		dW = np.dot( self.X.T, tmp ) / self.m		
		db = np.sum( tmp ) / self.m
		
		# обновление весов
		self.W = self.W - self.learning_rate * dW	
		self.b = self.b - self.learning_rate * db
		
		return self
	
	def predict( self, X ) :	
		Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )		
		Y = np.where( Z > 0.5, 1, 0 )		
		return Y

In [3]:
from sklearn.datasets import load_breast_cancer

X, Y = load_breast_cancer(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(X, Y)

In [4]:
from sklearn.metrics import f1_score

own_model = LogitRegression( learning_rate = 0.01, iterations = 1000 ).fit(x_train, y_train)
y_pred = own_model.predict(x_test)
own_f1 = f1_score(y_test, y_pred)

sklearn_model = LogisticRegression().fit(x_train, y_train)
y_pred = sklearn_model.predict(x_test)
sklearn_f1 = f1_score(y_test, y_pred)

print('f1 score in own model:', own_f1)
print('f1 score in sklearn:', sklearn_f1)

f1 score in own model: 0.9333333333333333
f1 score in sklearn: 0.9494949494949495


# Модель гребневой регресси

In [5]:
from sklearn.base import BaseEstimator, RegressorMixin

batch_size = 25

class LinRegRidge(BaseEstimator, RegressorMixin):
    
    def __init__(self, batch_size=25, num_steps=350, lr=1e-2, alpha=1.0):
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.lr = lr
        self.alpha = alpha

    def fit(self, X, Y):
        w = np.random.randn(X.shape[1])[:, None]
        n_objects = len(X)

        for i in range(self.num_steps):
            sample_indices = np.random.randint(0, n_objects, size=batch_size)
            w -= self.lr * (2 * np.dot(X[sample_indices].T, np.dot(X[sample_indices], w) - Y[sample_indices]) / self.batch_size + self.alpha * w/ n_objects)

        self.w = w
        return self

    def predict(self, X):
        return X@self.w

In [6]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

#Генерация данных для тестирования
n_features = 700
n_objects = 100000

w_true = np.random.uniform(-2, 2, (n_features, 1))

X = np.random.uniform(-100, 100, (n_objects, n_features)) * np.arange(n_features)
Y = X.dot(w_true) + np.random.normal(0, 10, (n_objects, 1))

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y)

scaler = StandardScaler()
scaler.fit(x_train)
x_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [8]:
own_model = LinRegRidge().fit(x_scaled, y_train)
y_pred = own_model.predict(x_test_scaled)
own_r2 = r2_score(y_test, y_pred)

sklearn_model = Ridge().fit(x_scaled, y_train)
y_pred = sklearn_model.predict(x_test_scaled)
sklearn_r2 = r2_score(y_test, y_pred)

print('R^2 in own model:', own_r2)
print('R^2 in sklearn Ridge:', sklearn_r2)

R^2 in own model: 0.9999489264154708
R^2 in sklearn Ridge: 0.9999999995982445
