In [2]:
import numpy as np
import torch
import random

In [83]:
class Logistic:
    def __init__(self):
        torch.manual_seed(0)
        self.beta = None
        self.b = torch.zeros(1,dtype = torch.float)

    def train(self,X : torch.Tensor,y : torch.Tensor,learning_rate=1e-3,num_iter:int = 1000):
        num_train, dim = X.shape
        if self.beta == None :
          self.beta = 1e-3 *torch.randn(dim)
        X = X.float()
        for i in range(num_iter) :
          p = torch.sigmoid(torch.matmul(X, self.beta) + self.b)
          y = y.float()
          loss = - torch.sum(y * torch.log(p+1e-6) + (1-y) * torch.log(1-p+1e-6))
          #gradient
          dbeta = X.t() @ (y-p)
          db = torch.sum(y-p)
          dH_beta = -X.t() @ torch.diag(p*(1-p)) @ X
          dH_b = -torch.sum(p*(1-p))

          self.beta -= learning_rate * torch.linalg.solve(dH_beta,dbeta) 
          self.b -= learning_rate * db/dH_b
        return self.beta, self.b
    def predict(self, X :torch.Tensor):
      X = X.float()
      p = torch.sigmoid(torch.matmul(X, self.beta) + self.b)
      y_pred = torch.where(p >= 0.5, torch.tensor(1), torch.tensor(0))
      return y_pred

In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# 데이터셋 불러오기
data = load_breast_cancer()

# 독립변수와 종속변수를 나누기
X = data.data
y = data.target

# train set과 test set으로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# train set과 test set의 크기 출력
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (398, 30)
y_train shape: (398,)
X_test shape: (171, 30)
y_test shape: (171,)


In [71]:
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_test = torch.tensor(X_test)
y_test = torch.tensor(y_test)

  X_train = torch.tensor(X_train)
  y_train = torch.tensor(y_train)
  X_test = torch.tensor(X_test)
  y_test = torch.tensor(y_test)


In [81]:
logi = Logistic()

In [84]:
logi.train(X_train,y_train)

(tensor([ 7.2602e-01,  3.1065e-02, -1.0733e-01,  1.1984e-03, -1.6962e+01,
          2.1756e+01, -8.0923e+00, -3.3690e+01,  5.8032e+00, -1.8825e+01,
         -2.8349e-01,  4.2576e-01,  3.3818e-02, -1.9389e-02, -8.4241e+01,
          1.8678e+01,  1.6518e+01, -7.1459e+01,  1.8767e+01,  1.2353e+00,
          2.7096e-01, -8.8964e-02,  4.1979e-02, -8.7437e-03,  1.4218e+01,
         -6.1035e+00, -1.7375e+00,  1.3231e+01, -7.1626e+00,  2.1891e+00]),
 tensor([1.4097]))

In [85]:
acc_train = (logi.predict(X_train) == y_train).float().mean()
acc_test = (logi.predict(X_test) == y_test).float().mean()
print(acc_train,acc_test)

tensor(0.9749) tensor(0.9591)
