In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x257db5b3590>

![image-2.png](attachment:image-2.png)

![image.png](attachment:image.png)

# without Function

In [54]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self,m,d):
        super().__init__()
        self.linear = nn.Linear(d, m)

    def forward(self, x):
        return self.linear(x)

In [55]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [56]:
# model 정의   -> m개의 d차원
model = SoftmaxClassifierModel(x_train.shape[0],x_train.shape[1])

In [57]:
# optimizer 정의
optimizer = optim.SGD(model.parameters(),lr=0.1)

In [58]:
# 예측값에 softmax 적용
hypothesis = model(x_train)
y_predict = F.softmax(hypothesis,dim=1)
y_predict

tensor([[1.2110e-01, 9.6836e-02, 3.0231e-01, 6.2538e-02, 1.0614e-01, 1.0285e-01,
         9.5549e-02, 1.1267e-01],
        [4.9386e-02, 2.3149e-02, 5.6592e-01, 4.9458e-02, 1.1217e-01, 6.7960e-02,
         2.0447e-02, 1.1151e-01],
        [3.6944e-02, 6.2959e-03, 5.7628e-01, 6.5005e-02, 9.4623e-02, 5.2293e-02,
         7.5006e-03, 1.6106e-01],
        [9.5611e-03, 9.6305e-04, 8.1143e-01, 2.1968e-02, 5.7403e-02, 2.1398e-02,
         1.3521e-03, 7.5929e-02],
        [2.7443e-02, 1.8662e-03, 7.5592e-01, 3.7107e-03, 1.6986e-01, 1.3205e-02,
         1.5984e-02, 1.2014e-02],
        [3.7691e-02, 1.3772e-03, 3.6214e-01, 4.5196e-02, 4.5449e-01, 1.6480e-02,
         3.5574e-03, 7.9062e-02],
        [2.0169e-02, 7.2696e-04, 6.9074e-01, 4.7495e-03, 2.5502e-01, 8.8947e-03,
         5.7570e-03, 1.3943e-02],
        [1.1989e-02, 2.3288e-04, 7.1721e-01, 2.2295e-03, 2.5339e-01, 4.6152e-03,
         2.9552e-03, 7.3746e-03]], grad_fn=<SoftmaxBackward0>)

In [69]:
# one-hot 벡터 만들기
y_one_hot = torch.zeros_like(y_predict)
y_one_hot.scatter_(1,y_train.unsqueeze(1),1)
y_one_hot

tensor([[0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0.]])

In [68]:
epochs = 1000

for epoch in range(epochs+1):
    hypothesis = model(x_train)
    y_predict = F.softmax(hypothesis,dim=1)
    y_one_hot = torch.zeros_like(y_predict)
    y_one_hot.scatter_(1,y_train.unsqueeze(1),1)
    cost = (y_one_hot*-torch.log(y_predict)).sum(dim=1).mean()
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, epochs, cost.item()
        ))

Epoch    0/1000 Cost: 11.243940
Epoch  100/1000 Cost: 0.501289
Epoch  200/1000 Cost: 0.434768
Epoch  300/1000 Cost: 0.388907
Epoch  400/1000 Cost: 0.348694
Epoch  500/1000 Cost: 0.310113
Epoch  600/1000 Cost: 0.271956
Epoch  700/1000 Cost: 0.242135
Epoch  800/1000 Cost: 0.228653
Epoch  900/1000 Cost: 0.217235
Epoch 1000/1000 Cost: 0.206930


In [62]:
list(model.parameters())

[Parameter containing:
 tensor([[-2.8936, -0.0577,  2.7653, -0.2260],
         [ 0.6030,  0.0483,  0.0517,  1.3553],
         [ 3.1580,  0.9778, -1.4627, -0.3242],
         [-0.2826, -0.6196, -0.4050,  0.1132],
         [-0.6526, -0.4425, -0.0403, -0.0605],
         [ 0.0054, -0.2982, -0.2706, -0.3137],
         [-0.3000,  0.0158, -0.3169, -0.4182],
         [-0.1136, -0.5661, -0.4091, -0.0449]], requires_grad=True),
 Parameter containing:
 tensor([-1.9941e+00, -5.0993e-01,  3.3223e+00, -8.8475e-02, -2.6131e-01,
         -3.9537e-01, -3.7785e-01, -1.9842e-03], requires_grad=True)]

## with Function

In [63]:
model = SoftmaxClassifierModel(x_train.shape[0],x_train.shape[1])
optimizer = optim.SGD(model.parameters(),lr=0.1)

In [64]:
for epoch in range(epochs+1):
    y_predict = model(x_train)
    cost = F.cross_entropy(y_predict,y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, epochs, cost.item()
        ))

Epoch    0/1000 Cost: 6.650511
Epoch  100/1000 Cost: 0.667230
Epoch  200/1000 Cost: 0.565307
Epoch  300/1000 Cost: 0.506899
Epoch  400/1000 Cost: 0.461129
Epoch  500/1000 Cost: 0.421058
Epoch  600/1000 Cost: 0.383848
Epoch  700/1000 Cost: 0.347797
Epoch  800/1000 Cost: 0.311664
Epoch  900/1000 Cost: 0.274973
Epoch 1000/1000 Cost: 0.244040


In [66]:
list(model.parameters())

[Parameter containing:
 tensor([[-2.6914, -0.0809,  2.7444, -0.3065],
         [ 0.9330,  0.0894, -0.2139,  1.5188],
         [ 3.0426,  0.7871, -1.6859,  0.0522],
         [-0.1152, -0.4610, -0.5439, -0.0312],
         [ 0.0303, -0.1275, -0.5092, -0.4097],
         [-0.1561, -0.4355, -0.6476,  0.1517],
         [-0.2626, -0.1268, -0.2455, -0.7147],
         [-0.3765, -0.0834, -0.2694, -0.6370]], requires_grad=True),
 Parameter containing:
 tensor([-2.0094, -0.8236,  3.5881,  0.0568, -0.1221, -0.2647,  0.2927,  0.1391],
        requires_grad=True)]