<a href="https://colab.research.google.com/github/pmy0792/pytorch_tutorial/blob/main/lab_06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f943b8e12d0>

In [4]:
z=torch.FloatTensor([1,2,3])

In [5]:
hypothesis=F.softmax(z,dim=0)
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [6]:
hypothesis.sum()

tensor(1.)

## Cross Entropy Loss (Low-level)

In [7]:
z= torch.rand(3,5,requires_grad=True)
print(z)

tensor([[0.7576, 0.2793, 0.4031, 0.7347, 0.0293],
        [0.7999, 0.3971, 0.7544, 0.5695, 0.4388],
        [0.6387, 0.5247, 0.6826, 0.3051, 0.4635]], requires_grad=True)


In [8]:
hypothesis=F.softmax(z,dim=1)
print(hypothesis)

tensor([[0.2645, 0.1639, 0.1855, 0.2585, 0.1277],
        [0.2430, 0.1624, 0.2322, 0.1930, 0.1694],
        [0.2226, 0.1986, 0.2326, 0.1594, 0.1868]], grad_fn=<SoftmaxBackward0>)


In [9]:
y=torch.randint(5,(3,)).long()
print(y)

tensor([0, 2, 1])


In [11]:
y_one_hot=torch.zeros_like(hypothesis)
y_one_hot.scatter_(1,y.unsqueeze(1),1)
# y.unsqueeze(1): y의 size가 (3,) => (3,1)

tensor([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.]])

In [12]:
cost=(y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.4689, grad_fn=<MeanBackward0>)


## Cross Entropy Loss with torch.nn.functional

In [13]:
# Low level
torch.log(F.softmax(z,dim=1))

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward0>)

In [14]:
# High level
F.log_softmax(z,dim=1)

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward0>)

In [15]:
# Low level
(y_one_hot * -torch.log(F.softmax(z,dim=1))).sum(dim=1).mean()

tensor(1.4689, grad_fn=<MeanBackward0>)

In [16]:
# High level
F.nll_loss(F.log_softmax(z, dim=1),y)

# NLL : Negative Log Likelihood

tensor(1.4689, grad_fn=<NllLossBackward0>)

In [17]:
# F.cross_entropy: combines F.log_softmax() and F.nll_loss()
F.cross_entropy(z,y)

tensor(1.4689, grad_fn=<NllLossBackward0>)

## Training with Low-level Cross Entropy Loss

In [18]:
x_train=[[1,2,1,1],
         [2,1,3,2],
         [3,1,3,4],
         [4,1,5,5],
         [1,7,5,5],
         [1,2,5,6],
         [1,6,6,6],
         [1,7,7,7]]
y_train=[2,2,2,1,1,1,0,0]

x_train=torch.FloatTensor(x_train)
y_train=torch.LongTensor(y_train)

In [19]:
print(x_train)

tensor([[1., 2., 1., 1.],
        [2., 1., 3., 2.],
        [3., 1., 3., 4.],
        [4., 1., 5., 5.],
        [1., 7., 5., 5.],
        [1., 2., 5., 6.],
        [1., 6., 6., 6.],
        [1., 7., 7., 7.]])


In [25]:
# 모델 초기화
W = torch.zeros((4,3), requires_grad=True)
b = torch.zeros(1,requires_grad=True)

# optimizer 설정
optimizer=optim.SGD((W,b),lr=0.6)

nb_epochs=100
for epoch in range(nb_epochs+1):
  # Cost 계산
  hypothesis=F.softmax(x_train.matmul(W)+b, dim=1)
  y_one_hot=torch.zeros_like(hypothesis)
  y_one_hot.scatter_(1,y_train.unsqueeze(1),1)
  cost=(y_one_hot * -torch.log(F.softmax(hypothesis,dim=1))).sum(dim=1).mean()

  # optimizer로 H(x) 계산
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  if epoch % 10 ==0:
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs,cost.item()
    ))

Epoch    0/100 Cost: 1.098612
Epoch   10/100 Cost: 0.990097
Epoch   20/100 Cost: 0.893733
Epoch   30/100 Cost: 0.850376
Epoch   40/100 Cost: 0.826090
Epoch   50/100 Cost: 0.809265
Epoch   60/100 Cost: 0.797058
Epoch   70/100 Cost: 0.788881
Epoch   80/100 Cost: 0.782570
Epoch   90/100 Cost: 0.776580
Epoch  100/100 Cost: 0.771361


## Training with F.cross_entropy

In [28]:
# 모델 초기화
W = torch.zeros((4,3), requires_grad=True)
b = torch.zeros(1,requires_grad=True)

# optimizer 설정
optimizer=optim.SGD((W,b),lr=0.1)

nb_epochs=100
for epoch in range(nb_epochs+1):
  # Cost 계산
  z=x_train.matmul(W)+b
  cost=F.cross_entropy(z,y_train)
  '''
  hypothesis=F.softmax(x_train.matmul(W)+b, dim=1)
  y_one_hot=torch.zeros_like(hypothesis)
  y_one_hot.scatter_(1,y_train.unsqueeze(1),1)
  cost=(y_one_hot * -torch.log(F.softmax(hypothesis,dim=1))).sum(dim=1).mean()
  '''

  # optimizer로 H(x) 계산
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  if epoch % 10 ==0:
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs,cost.item()
    ))

Epoch    0/100 Cost: 1.098612
Epoch   10/100 Cost: 1.069288
Epoch   20/100 Cost: 0.960612
Epoch   30/100 Cost: 0.901738
Epoch   40/100 Cost: 0.863738
Epoch   50/100 Cost: 0.836334
Epoch   60/100 Cost: 0.815246
Epoch   70/100 Cost: 0.798249
Epoch   80/100 Cost: 0.784040
Epoch   90/100 Cost: 0.771814
Epoch  100/100 Cost: 0.761050


## High-level Implementation with nn.Module

In [29]:
class SoftmaxClassifierModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(4,3)

  def forward(self,x):
    return self.linear(x) # |x| = (m,4) -> (m,3)

In [30]:
model = SoftmaxClassifierModel()

In [31]:
# optimizer 설정 
optimizer =optim.SGD(model.parameters(),lr=0.1)

nb_epochs=100
for epoch in range(nb_epochs+1):

  # H(x) 계산
  prediction=model(x_train) # |x_train|=(m,4), |prediction=(m,3)

  # cost 계산
  cost = F.cross_entropy(prediction,y_train) # |y_train| =(m,ddd)

  # cost로 H(x) 개선
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  if epoch % 10 ==0:
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs,cost.item()
    ))

Epoch    0/100 Cost: 1.849513
Epoch   10/100 Cost: 0.859702
Epoch   20/100 Cost: 0.904911
Epoch   30/100 Cost: 0.835288
Epoch   40/100 Cost: 0.794997
Epoch   50/100 Cost: 0.767245
Epoch   60/100 Cost: 0.746041
Epoch   70/100 Cost: 0.728796
Epoch   80/100 Cost: 0.714160
Epoch   90/100 Cost: 0.701354
Epoch  100/100 Cost: 0.689894
