## SoftMax 비용함수

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
z = torch.FloatTensor([1,2,3])

In [3]:
hypothesis = F.softmax(z, dim = 0)
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [4]:
z = torch.rand(3,5, requires_grad = True)

In [5]:
hypothesis = F.softmax(z, dim = 1)
print(hypothesis)

tensor([[0.1484, 0.1171, 0.2171, 0.2460, 0.2714],
        [0.2255, 0.1836, 0.1955, 0.1890, 0.2063],
        [0.1947, 0.2912, 0.2159, 0.1590, 0.1392]], grad_fn=<SoftmaxBackward0>)


In [6]:
y = torch.randint(5, (3,)).long()
print(y)

tensor([2, 0, 4])


In [7]:
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.]])

In [8]:
cost = (y_one_hot*-torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.6627, grad_fn=<MeanBackward0>)


In [9]:
# log_softmax로 cost 정의 가능
F.log_softmax(z, dim=1)

tensor([[-1.9081, -2.1445, -1.5273, -1.4026, -1.3040],
        [-1.4893, -1.6948, -1.6321, -1.6660, -1.5784],
        [-1.6361, -1.2339, -1.5329, -1.8391, -1.9716]],
       grad_fn=<LogSoftmaxBackward0>)

In [10]:
F.nll_loss(F.log_softmax(z, dim=1), y)

tensor(1.6627, grad_fn=<NllLossBackward0>)

In [11]:
F.cross_entropy(z, y)

tensor(1.6627, grad_fn=<NllLossBackward0>)

## SoftMax 구현 low

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [13]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [14]:
y_one_hot = torch.zeros(8,3)
y_one_hot.scatter_(1, y_train.unsqueeze(1),1)
print(y_one_hot)

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])


In [15]:
W = torch.zeros((4,3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optimizer = optim.SGD([W,b], lr=0.1)

In [16]:
nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    hypothesis = F.softmax(x_train@W + b, dim = 1)
    cost = (y_one_hot*-torch.log(hypothesis)).sum().mean()
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 8.788898
Epoch  100/1000 Cost: 16.677059
Epoch  200/1000 Cost: 66.587646
Epoch  300/1000 Cost: 76.232986
Epoch  400/1000 Cost: 71.594498
Epoch  500/1000 Cost: 10.260522
Epoch  600/1000 Cost: 63.315868
Epoch  700/1000 Cost: 6.840080
Epoch  800/1000 Cost: 20.714081
Epoch  900/1000 Cost: 14.241849
Epoch 1000/1000 Cost: 28.450081


## SoftMax 구현 High

In [17]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [18]:
x_train.shape

torch.Size([8, 4])

In [19]:
W = torch.zeros((4,3),requires_grad = True)
b = torch.zeros(1, requires_grad = True)

optimizer = optim.SGD([W,b], lr=0.1)

In [20]:
nb_epochs = 1000
for epoch in range(nb_epochs+1):
    predict = x_train@W + b
    cost = F.cross_entropy(predict, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.761050
Epoch  200/1000 Cost: 0.689991
Epoch  300/1000 Cost: 0.643229
Epoch  400/1000 Cost: 0.604117
Epoch  500/1000 Cost: 0.568255
Epoch  600/1000 Cost: 0.533922
Epoch  700/1000 Cost: 0.500291
Epoch  800/1000 Cost: 0.466908
Epoch  900/1000 Cost: 0.433507
Epoch 1000/1000 Cost: 0.399962


## nn.module 활용 구현

In [21]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [22]:
model = nn.Linear(4,3)

In [23]:
optimizer = optim.SGD(model.parameters(), lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs+1):
    prediction = model(x_train)
    cost = F.cross_entropy(prediction, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 20번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 2.755052
Epoch  100/1000 Cost: 0.711671
Epoch  200/1000 Cost: 0.618243
Epoch  300/1000 Cost: 0.558833
Epoch  400/1000 Cost: 0.507785
Epoch  500/1000 Cost: 0.459874
Epoch  600/1000 Cost: 0.413273
Epoch  700/1000 Cost: 0.367173
Epoch  800/1000 Cost: 0.321345
Epoch  900/1000 Cost: 0.276853
Epoch 1000/1000 Cost: 0.243149


In [24]:
class SoftMaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4,3)
        
    def forward(self, x):
        return self.linear(x)

In [25]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [26]:
model = SoftMaxClassifierModel()

In [27]:
optimizer = optim.SGD(model.parameters(), lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs):
    prediction = model(x_train)
    
    cost = F.cross_entropy(prediction, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 3.947660
Epoch  100/1000 Cost: 0.685024
Epoch  200/1000 Cost: 0.610695
Epoch  300/1000 Cost: 0.554636
Epoch  400/1000 Cost: 0.504525
Epoch  500/1000 Cost: 0.456942
Epoch  600/1000 Cost: 0.410495
Epoch  700/1000 Cost: 0.364513
Epoch  800/1000 Cost: 0.318842
Epoch  900/1000 Cost: 0.274773
