# Logistic Regression

## PyTorch로 Logistic Regression 구현하기

In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [33]:
torch.manual_seed(1)

<torch._C.Generator at 0x10fcb0030>

In [34]:
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)

In [35]:
print(x_train.shape)
print(y_train.shape)

torch.Size([6, 2])
torch.Size([6, 1])


In [36]:
W = torch.zeros((2, 1), requires_grad=True) # X와 행렬곱해서 6 x 1 행렬이 나와야하므로 X는 2 x 1 행렬
b = torch.zeros(1, requires_grad=True)

In [37]:
hypothesis = 1 / (1 + torch.exp(-(x_train.matmul(W) + b))) # sigmoid 함수 / 여기서 H(x)는 x = 1 일 확률을 의미한다

In [38]:
print(hypothesis)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<MulBackward0>)


In [39]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b) # PyTorch 내장함수로 sigmoid 구현

In [40]:
print(hypothesis)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward0>)


In [41]:
# 원소에 대해서 오차를 구한다
losses = - (y_train * torch.log(hypothesis) + (1 - y_train) * torch.log(1 - hypothesis))
print(losses)

tensor([[0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931]], grad_fn=<NegBackward0>)


In [42]:
cost = losses.mean() # 오차의 평균을 구한 것이 cost
print(cost)

tensor(0.6931, grad_fn=<MeanBackward0>)


In [43]:
# 위에서 losses와 cost를 직접 구하지 않고, PyTorch에 내장된 함수를 이용해서 cost를 쉽게 구할 수 있다.
F.binary_cross_entropy(hypothesis, y_train) # 예측값, 실제값 순서로 입력

tensor(0.6931, grad_fn=<BinaryCrossEntropyBackward0>)

In [44]:
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data) 

In [45]:
# 모델 초기화
W = torch.zeros((2, 1), requires_grad=True)
b = torch.zeros((1), requires_grad=True)

# optimizer 설정
optimizer = optim.SGD([W, b], lr=1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # cost 계산
    hypothesis = torch.sigmoid(x_train.matmul(W) + b)
    cost = -(y_train * torch.log(hypothesis) + (1 - y_train) * torch.log(1 - hypothesis)).mean()
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print(f"Epoch {epoch:4d}/{nb_epochs} Cost : {cost.item():.6f}")



Epoch    0/1000 Cost : 0.693147
Epoch  100/1000 Cost : 0.134722
Epoch  200/1000 Cost : 0.080643
Epoch  300/1000 Cost : 0.057900
Epoch  400/1000 Cost : 0.045300
Epoch  500/1000 Cost : 0.037261
Epoch  600/1000 Cost : 0.031673
Epoch  700/1000 Cost : 0.027556
Epoch  800/1000 Cost : 0.024394
Epoch  900/1000 Cost : 0.021888
Epoch 1000/1000 Cost : 0.019852


In [46]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b)
print(hypothesis)

tensor([[2.7648e-04],
        [3.1608e-02],
        [3.8977e-02],
        [9.5622e-01],
        [9.9823e-01],
        [9.9969e-01]], grad_fn=<SigmoidBackward0>)


In [47]:
prediction = hypothesis >= torch.FloatTensor([0.5])
print(prediction)

tensor([[False],
        [False],
        [False],
        [ True],
        [ True],
        [ True]])


In [48]:
print(W)
print(b)

tensor([[3.2530],
        [1.5179]], requires_grad=True)
tensor([-14.4819], requires_grad=True)


### nn.Module로 구현하는 Logistic Regression

In [49]:
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim

In [50]:
torch.manual_seed(1)

<torch._C.Generator at 0x10fcb0030>

In [51]:
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data) 

In [52]:
model = nn.Sequential( # nn.Sequential은 nn.Module층을 쌓을 수 있게 해준다.
    nn.Linear(2,1), # input_dim=2, output_dim=1
    nn.Sigmoid() # 출력은 Sigmoid 함수를 거친다.
)

In [53]:
model(x_train) # W와 b는 임의의 값을 가지므로 아직은 예측값이 의미가 없다.

tensor([[0.4020],
        [0.4147],
        [0.6556],
        [0.5948],
        [0.6788],
        [0.8061]], grad_fn=<SigmoidBackward0>)

In [54]:
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=1)

In [55]:
nb_epochs = 1000

for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypothesis = model(x_train)
    
    # Cost 계산
    cost = F.binary_cross_entropy(hypothesis, y_train)
    
    # cost로 H(x) 계산
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 20번마다 로그 출력
    if epoch % 20 == 0:
        prediction = hypothesis >= torch.FloatTensor([0.5]) # 예측값이 0.5 이상일 때 True로 간주
        correct_prediction = prediction.float() == y_train  # 예측값이 실제값과 같을 때 True로 간주
        accuracy = correct_prediction.sum().item() / len(correct_prediction) # 정확도 계산
        
        print(f"Epoch {epoch:4d} / {nb_epochs} Cost: {cost.item():.6f} Accuracy: {accuracy:2.2f}")

Epoch    0 / 1000 Cost: 0.539713 Accuracy: 0.83
Epoch   20 / 1000 Cost: 0.441875 Accuracy: 0.67
Epoch   40 / 1000 Cost: 0.316358 Accuracy: 0.83
Epoch   60 / 1000 Cost: 0.220498 Accuracy: 1.00
Epoch   80 / 1000 Cost: 0.157299 Accuracy: 1.00
Epoch  100 / 1000 Cost: 0.134272 Accuracy: 1.00
Epoch  120 / 1000 Cost: 0.118297 Accuracy: 1.00
Epoch  140 / 1000 Cost: 0.105779 Accuracy: 1.00
Epoch  160 / 1000 Cost: 0.095704 Accuracy: 1.00
Epoch  180 / 1000 Cost: 0.087420 Accuracy: 1.00
Epoch  200 / 1000 Cost: 0.080486 Accuracy: 1.00
Epoch  220 / 1000 Cost: 0.074595 Accuracy: 1.00
Epoch  240 / 1000 Cost: 0.069526 Accuracy: 1.00
Epoch  260 / 1000 Cost: 0.065118 Accuracy: 1.00
Epoch  280 / 1000 Cost: 0.061247 Accuracy: 1.00
Epoch  300 / 1000 Cost: 0.057820 Accuracy: 1.00
Epoch  320 / 1000 Cost: 0.054764 Accuracy: 1.00
Epoch  340 / 1000 Cost: 0.052022 Accuracy: 1.00
Epoch  360 / 1000 Cost: 0.049546 Accuracy: 1.00
Epoch  380 / 1000 Cost: 0.047299 Accuracy: 1.00
Epoch  400 / 1000 Cost: 0.045251 Accurac

In [56]:
model(x_train)

tensor([[2.7616e-04],
        [3.1595e-02],
        [3.8959e-02],
        [9.5624e-01],
        [9.9823e-01],
        [9.9969e-01]], grad_fn=<SigmoidBackward0>)

In [57]:
print(list(model.parameters()))

[Parameter containing:
tensor([[3.2534, 1.5181]], requires_grad=True), Parameter containing:
tensor([-14.4839], requires_grad=True)]


앞서 nn.Module을 이용하지 않은 실습에서 얻었던 W, b 값과 거의 일치한다

### Class로 PyTorch 모델 구현하기

In [58]:
"""
model = nn.Sequential(
    nn.Linear(2, 1),
    nn.Sigmoid()
)
위의 모델을 class로 구현하면 다음과 같다
"""
class BinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(2, 1)
        self.sigmooid = nn.Sigmoid()
        
    def forward(self, x):
        return self.sigmooid(self.linear(x))


In [59]:
model = BinaryClassifier()

In [61]:
# optimizer 설정
optimzier = optim.SGD(model.parameters(), lr=1)

나머지는 위와 같음