# Логистическая регрессия

In [3]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [4]:
def sigm(s):
  return 1 / (1 + np.exp(-s))

In [5]:
from sklearn.base import BaseEstimator, RegressorMixin

class logreg(BaseEstimator, RegressorMixin):
  def __init__(self, batch_size=75, num_steps=10000, lr=1e-2):
    self.batch_size = batch_size
    self.num_steps = num_steps
    self.lr = lr

  def fit(self, X, Y):
    n_objects = len(X)
    w = np.random.uniform(-2, 2, (X.shape[1]+1, 1))
    Xtrain = np.concatenate((np.ones((n_objects, 1)), X), axis = 1)
    for i in range(self.num_steps):
      sample_indices = np.random.randint(0, n_objects, size = self.batch_size)
      w -= self.lr * (Xtrain[sample_indices].T @ (sigm(Xtrain[sample_indices] @ w)-Y[sample_indices]))/self.batch_size
    self.w = w
    return self

  def predict(self, X, a = 0.5):
    n_objects = len(X)
    X_=np.concatenate((np.ones((n_objects, 1)), X), axis=1)
    xs = sigm(X_@self.w)
    return xs >= a

# Тестируем

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

samples = 100000
features = 700
x, y = make_blobs(n_samples=samples, centers=[[-3.5,0.5],[3.5,-0.5]], cluster_std=1, random_state=42)

y = y.reshape(samples, 1)
w_true = np.random.uniform(-2, 2, (2, 1))

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y)

In [8]:
scaler = StandardScaler()
scaler.fit(x_train)
x_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [9]:
own_model = logreg().fit(x_scaled, y_train)
y_pred = own_model.predict(x_test_scaled)
own = accuracy_score(y_test, y_pred)
own_r2 = r2_score(y_test, y_pred)

sklearn_model = LogisticRegression().fit(x_scaled, y_train)
y_pred = sklearn_model.predict(x_test_scaled)

sklearn = accuracy_score(y_test, y_pred)
sklearn_r2 = r2_score(y_test, y_pred)

print('accuracy_score in own model:', own)
print('accuracy_score in sklearn loss:', sklearn)
print('\nR^2 in own model:', own_r2)
print('R^2 in sklearn loss:', sklearn_r2)

accuracy_score in own model: 0.99964
accuracy_score in sklearn loss: 0.99992

R^2 in own model: 0.998559994239977
R^2 in sklearn loss: 0.9996799987199949


# Ridge

In [10]:
class ridge(BaseEstimator, RegressorMixin):
  def __init__(self, batch_size=50, num_steps=10000, lr=1e-2, a = 0.1):
    self.batch_size = batch_size
    self.num_steps = num_steps
    self.lr = lr
    self.a = a

  def fit(self, X, Y):
    w = np.random.uniform(-2, 2, (X.shape[1],1))
    n_objects = len(X)
    for i in range(self.num_steps):
      sample_indices = np.random.randint(0, n_objects, size = self.batch_size)
      w -= (2 * self.lr * (X[sample_indices].T @ (X[sample_indices] @ w - Y[sample_indices])) / self.batch_size + (2 * self.a * w) / Y.size )
    self.w = w
    return self

  def predict(self, X):
    return X @ self.w

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

n_features = 700
n_objects = 100000

w_true = np.random.uniform(-2, 2, (n_features, 1))

X = np.random.uniform(-100, 100, (n_objects, n_features)) * np.arange(n_features)
Y = X.dot(w_true) + np.random.normal(0, 10, (n_objects, 1))

In [12]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y)

In [13]:
scaler = StandardScaler()
scaler.fit(x_train)
x_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [14]:
from sklearn.linear_model import Ridge
own_model = ridge().fit(x_scaled, y_train)
y_pred = own_model.predict(x_test_scaled)
own_r2 = r2_score(y_test, y_pred)

sklearn_model = Ridge(alpha = 0.1).fit(x_scaled, y_train)
y_pred = sklearn_model.predict(x_test_scaled)

sklearn_r2 = r2_score(y_test, y_pred)

print('R^2 in own model:', own_r2)
print('R^2 in sklearn loss:', sklearn_r2)

R^2 in own model: 0.9999961940226556
R^2 in sklearn loss: 0.9999999997872889
