# Pytorch로 Softmax Regression 구현

In [8]:
import torch

x_train = torch.FloatTensor([[1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], 
                             [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7]])

y_train = torch.FloatTensor([[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], 
                             [0, 1, 0], [1, 0, 0], [1, 0, 0] ]) # 찾으려는 클래스 개수 3개이군 !


 
W = torch.randn(4, 3, requires_grad=True) #(row수, column 수)
b = torch.randn(1, 3, requires_grad=True) # 클래스마다 b값이 존재해야하기 때문에 1, 3

# print(W)
# print(b)

# 뭘 학습시킬지랑 lr 설정
optim = torch.optim.Adam([W, b], lr=0.1)

for epoch in range(3001): #3000번 학습

  # 가설함수 dim-> 어떤 차원
  h = torch.softmax(torch.mm(x_train, W) + b, dim=1)

  # 각각의 값에 log 적용해서 y_train 곱하면 1인 애들만 나옴, 어떤 타입에 softmax를 적용할거냐 -> dim
  cost = -torch.mean(torch.sum(y_train * torch.log(h), dim=1)) # cross entropy 
  # dim이 0이면 세로로 1이면 가로로 더한 것

  # 위의 h, cost 아래처럼 사용가능
  # h = (torch.mm(x_train, W)+b).softmax(dim=1)
  # cost = -(y_train * torch.log(h)).sum(dim=1).mean()

  optim.zero_grad() # 기울기 초기화
  cost.backward()
  optim.step()

  with torch.no_grad():
    if epoch % 100 == 0 :
      print(f"epoch: {epoch}, cost: {cost.item()}")

  
 

epoch: 0, cost: 7.3163652420043945
epoch: 100, cost: 0.34471017122268677
epoch: 200, cost: 0.24629312753677368
epoch: 300, cost: 0.18048618733882904
epoch: 400, cost: 0.1371230036020279
epoch: 500, cost: 0.10741613805294037
epoch: 600, cost: 0.0862061008810997
epoch: 700, cost: 0.07054899632930756
epoch: 800, cost: 0.05868084356188774
epoch: 900, cost: 0.04948682710528374
epoch: 1000, cost: 0.042231157422065735
epoch: 1100, cost: 0.03641216456890106
epoch: 1200, cost: 0.0316784493625164
epoch: 1300, cost: 0.0277786273509264
epoch: 1400, cost: 0.024529235437512398
epoch: 1500, cost: 0.021794205531477928
epoch: 1600, cost: 0.019470909610390663
epoch: 1700, cost: 0.017481112852692604
epoch: 1800, cost: 0.01576409302651882
epoch: 1900, cost: 0.014272335916757584
epoch: 2000, cost: 0.012968292459845543
epoch: 2100, cost: 0.011821798048913479
epoch: 2200, cost: 0.010808685794472694
epoch: 2300, cost: 0.009909093379974365
epoch: 2400, cost: 0.009106887504458427
epoch: 2500, cost: 0.0083885928

In [9]:
# 새로운 테스트 값을 넣었을때 

x_test = torch.tensor([[1,11,10,9], [1,3,4,3], [1,1,0,1]], dtype=torch.float)
h_test = torch.softmax(torch.mm(x_test, W) + b, dim=1)

print(h_test)

# argmax 제일 큰 값 위치를 알려줌, dim을 1로 했으니 행렬마다의 제일 큰 값 위치를 리턴
print(torch.argmax(h_test, dim=1))

tensor([[1.0000e+00, 2.2018e-17, 2.3150e-34],
        [1.3654e-05, 7.4193e-01, 2.5805e-01],
        [6.6801e-31, 1.5667e-10, 1.0000e+00]], grad_fn=<SoftmaxBackward0>)
tensor([0, 1, 2])


# 조금 더 깔끔한 Softmax

In [10]:
import torch
import torch.nn.functional as F # cross_entropy를 사용하기 위해
import torch.nn as nn

x_train = torch.FloatTensor([[1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], 
                             [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7]])

# [1,0,0], [0,1,0], [0,0,1] 대신 0, 1, 2를 쓰기
y_train = torch.LongTensor([2, 2, 2, 1, 1 ,1 ,0, 0]) 
 
# W = torch.randn(4, 3, requires_grad=True) #(row수, column 수)
# b = torch.randn(1, 3, requires_grad=True) # 클래스마다 b값이 존재해야하기 때문에 1, 3
model = nn.Linear(4, 3) # W, b값을 랜덤하게 알아서 초기화 해줌
# nn.Linear(입력벡터의 차원, 출력하려는 클래스 개수)


# 뭘 학습시킬지랑 lr 설정
# optim = torch.optim.Adam([W, b], lr=0.1)
optim = torch.optim.Adam(model.parameters(), lr=0.1)

for epoch in range(3001): #3000번 학습

  # 가설함수
  # h = torch.mm(x_train, W) + b
  h = model(x_train)

  # 각각의 값에 log 적용해서 y_train 곱하면 1인 애들만 나옴, 어떤 타입에 softmax를 적용할거냐 -> dim
  # cost = -torch.mean(torch.sum(y_train * torch.log(h), dim=1)) # cross entropy 
  cost =  F.cross_entropy(h, y_train)
  # F.cross_entropy는 softmax와 cross entropy를 합친 것

  optim.zero_grad() # 기울기 초기화
  cost.backward()
  optim.step()

  with torch.no_grad():
    if epoch % 100 == 0 :
      print(f"epoch: {epoch}, cost: {cost.item()}")

  


epoch: 0, cost: 1.0027530193328857
epoch: 100, cost: 0.23076365888118744
epoch: 200, cost: 0.12131606042385101
epoch: 300, cost: 0.07509488612413406
epoch: 400, cost: 0.051318295300006866
epoch: 500, cost: 0.03743790090084076
epoch: 600, cost: 0.028604548424482346
epoch: 700, cost: 0.022617556154727936
epoch: 800, cost: 0.018360186368227005
epoch: 900, cost: 0.015216359868645668
epoch: 1000, cost: 0.012823299504816532
epoch: 1100, cost: 0.010955934412777424
epoch: 1200, cost: 0.00946823786944151
epoch: 1300, cost: 0.00826212763786316
epoch: 1400, cost: 0.007269493769854307
epoch: 1500, cost: 0.006442024372518063
epoch: 1600, cost: 0.005744324065744877
epoch: 1700, cost: 0.005150290206074715
epoch: 1800, cost: 0.004639991093426943
epoch: 1900, cost: 0.00419826153665781
epoch: 2000, cost: 0.003813134040683508
epoch: 2100, cost: 0.0034753456711769104
epoch: 2200, cost: 0.0031772726215422153
epoch: 2300, cost: 0.002912986557930708
epoch: 2400, cost: 0.002677504438906908
epoch: 2500, cost: 

In [12]:
print(model) #bias b가 있냐 없냐
for a in model.parameters():
  print(a)

Linear(in_features=4, out_features=3, bias=True)
Parameter containing:
tensor([[-23.1651,  -1.2998,  16.0987,  -4.6230],
        [  2.6404,  -0.1235,  -0.4500,   1.9029],
        [  5.0735,   0.6576,  -7.0886,   0.1015]], requires_grad=True)
Parameter containing:
tensor([-29.2442,  -7.4250,  17.8522], requires_grad=True)


# Softmax Regression with Sklearn
 

In [13]:
# sklearn에는 LogisticRegression에 Softmax regression이 함께 구현됨 
# ⇒ y에 두 종류 이상의 값이 있을 경우 softmax regression 실행
 
import numpy as np
from sklearn.linear_model import LogisticRegression

x_train = np.array([ [1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5],
                     [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7] ])

# y에 0, 1, 2 등 둘 이상의 class가 존재 => softmax regression
y_train = np.array([ 2, 2, 2, 1, 1, 1, 0, 0 ])

model = LogisticRegression(penalty='none') # 모델 만들기
model.fit(x_train, y_train) # 학습할거 넣어주고

x_test = np.array([[1,11,10,9], [1,3,4,3], [1,1,0,1]]) # test case (값 예측)
print(model.predict(x_test))

[0 1 2]
