## Softmax regression cost function 구현하기

In [16]:
import torch
import torch.nn.functional as F

torch.manual_seed(1)

<torch._C.Generator at 0x1cea9a319f0>

### 1. 파이토치로 소프트맥스 비용함수 구현 ( Low - level )

In [17]:
z = torch.FloatTensor([1,2,3]) 

hypothesis = F.softmax(z, dim=0)
print(hypothesis)
hypothesis.sum() # 합이 1인지 확인

tensor([0.0900, 0.2447, 0.6652])


tensor(1.)

In [18]:
# 비용함수 직접 구현

z = torch.rand(3,5,requires_grad=True) #3x5 random data

hypothesis = F.softmax(z, dim=1) #합이 1이되는 vector들로 변경
print(hypothesis)

#임의의 실제값 만들기
y = torch.randint(5, (3,)).long()
print(y)

#one-hot encoding
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1),1) #연산뒤에 _를 붙이면 덮어쓰기가 됨 

#Softmax 함수 구현
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor([[0.2645, 0.1639, 0.1855, 0.2585, 0.1277],
        [0.2430, 0.1624, 0.2322, 0.1930, 0.1694],
        [0.2226, 0.1986, 0.2326, 0.1594, 0.1868]], grad_fn=<SoftmaxBackward>)
tensor([0, 2, 1])
tensor(1.4689, grad_fn=<MeanBackward0>)


### 파이토치로 소프트맥수의 비용함수 구현하기 ( High - level )

In [21]:
#low-level
torch.log(F.softmax(z, dim=1))



tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward>)

In [22]:
# High-level
F.log_softmax(z, dim=1)

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward>)

In [28]:
#Low-level
(y_one_hot*-torch.log(F.softmax(z,dim=1))).sum(dim=1).mean()



tensor(1.4689, grad_fn=<MeanBackward0>)

In [30]:
#High-level
(y_one_hot*-F.log_softmax(z,dim=1)).sum(dim=1).mean()

tensor(1.4689, grad_fn=<MeanBackward0>)

In [32]:
#더 간단히
F.nll_loss(F.log_softmax(z,dim=1),y) 

#F.nll_loss()는 Negative Log Likelihood의 약자로, F.log_softmax()를 수행한 후 남은 수식들을 수행함
#F.cross_entropy()는 F.log_softmax()와 F.nll_loss()를 포함하고 있음.

tensor(1.4689, grad_fn=<NllLossBackward>)

In [34]:
#제일 간단히
F.cross_entropy(z,y)

tensor(1.4689, grad_fn=<NllLossBackward>)

## Softmax 회귀 구현하기

소프트맥스 회귀를 low level과 F.cross_entropy를 사용해서 구현해보자

In [107]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]

y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

print(x_train.shape)
print(y_train.shape)

torch.Size([8, 4])
torch.Size([8])


### Low level 구현

In [108]:
# set Weight and get z
W = torch.zeros((4,3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
z = x_train.matmul(W)+b 


# Optimizer
optimizer = optim.SGD([W, b], lr = 0.1)

# one-hot encoding
y_one_hot = torch.zeros_like(z)
y_one_hot.scatter_(1,y_train.unsqueeze(1),1)
print(y_one_hot)

nb_epochs = 2000
for epoch in range(nb_epochs+1):
    
    #H(X)
    z = x_train.matmul(W)+b
    
    #Cost function
    cost = (y_one_hot*-torch.log(F.softmax(z, dim=1))).sum(dim=0).mean()
    
    #Update
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch: {}/{} Cost: {:.4f}'.format(epoch,nb_epochs,cost.item()))
    

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
Epoch: 0/2000 Cost: 2.9296
Epoch: 100/2000 Cost: 6.0195
Epoch: 200/2000 Cost: 5.6420
Epoch: 300/2000 Cost: 5.3287
Epoch: 400/2000 Cost: 5.0231
Epoch: 500/2000 Cost: 4.7239
Epoch: 600/2000 Cost: 4.4342
Epoch: 700/2000 Cost: 4.1555
Epoch: 800/2000 Cost: 3.8880
Epoch: 900/2000 Cost: 3.6309
Epoch: 1000/2000 Cost: 3.3833
Epoch: 1100/2000 Cost: 3.1438
Epoch: 1200/2000 Cost: 2.9103
Epoch: 1300/2000 Cost: 2.6809
Epoch: 1400/2000 Cost: 2.4345
Epoch: 1500/2000 Cost: 1.8837
Epoch: 1600/2000 Cost: 1.2294
Epoch: 1700/2000 Cost: 0.2150
Epoch: 1800/2000 Cost: 0.2075
Epoch: 1900/2000 Cost: 0.2006
Epoch: 2000/2000 Cost: 0.1942


### High level 구현

In [109]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]

y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

# Model Initialization
model = nn.Linear(4,3)

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epoch = 2000
for epoch in range(nb_epoch+1):
    
    #H(x)
    z = model(x_train)
    
    #cost
    cost = F.cross_entropy(z,y_train)
    
    #update
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print('Epoch: {}/{} Cost: {:.4f}'.format(epoch, nb_epoch, cost.item()))
    
    

Epoch: 0/2000 Cost: 1.6168
Epoch: 10/2000 Cost: 1.4127
Epoch: 20/2000 Cost: 1.2778
Epoch: 30/2000 Cost: 1.1718
Epoch: 40/2000 Cost: 1.0892
Epoch: 50/2000 Cost: 1.0252
Epoch: 60/2000 Cost: 0.9759
Epoch: 70/2000 Cost: 0.9375
Epoch: 80/2000 Cost: 0.9073
Epoch: 90/2000 Cost: 0.8830
Epoch: 100/2000 Cost: 0.8631
Epoch: 110/2000 Cost: 0.8465
Epoch: 120/2000 Cost: 0.8323
Epoch: 130/2000 Cost: 0.8199
Epoch: 140/2000 Cost: 0.8090
Epoch: 150/2000 Cost: 0.7991
Epoch: 160/2000 Cost: 0.7902
Epoch: 170/2000 Cost: 0.7819
Epoch: 180/2000 Cost: 0.7743
Epoch: 190/2000 Cost: 0.7671
Epoch: 200/2000 Cost: 0.7603
Epoch: 210/2000 Cost: 0.7540
Epoch: 220/2000 Cost: 0.7479
Epoch: 230/2000 Cost: 0.7421
Epoch: 240/2000 Cost: 0.7366
Epoch: 250/2000 Cost: 0.7312
Epoch: 260/2000 Cost: 0.7261
Epoch: 270/2000 Cost: 0.7212
Epoch: 280/2000 Cost: 0.7164
Epoch: 290/2000 Cost: 0.7118
Epoch: 300/2000 Cost: 0.7074
Epoch: 310/2000 Cost: 0.7030
Epoch: 320/2000 Cost: 0.6988
Epoch: 330/2000 Cost: 0.6947
Epoch: 340/2000 Cost: 0.6

### Softmax 회귀 클래스로 구현하기

In [112]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

class Softmax_classification(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4,3)
        
    def forward(self,x):
        return self.linear(x)

model = Softmax_classification()

optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs+1):
    
    #H(x)
    z = model(x_train)
    
    #cost
    cost = F.cross_entropy(z,y_train)
    
    #update
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print('Epoch: {}/{} Cost: {:.4f}'.format(epoch, nb_epochs, cost.item()))

Epoch: 0/1000 Cost: 1.6168
Epoch: 100/1000 Cost: 0.6589
Epoch: 200/1000 Cost: 0.5734
Epoch: 300/1000 Cost: 0.5182
Epoch: 400/1000 Cost: 0.4733
Epoch: 500/1000 Cost: 0.4335
Epoch: 600/1000 Cost: 0.3966
Epoch: 700/1000 Cost: 0.3609
Epoch: 800/1000 Cost: 0.3254
Epoch: 900/1000 Cost: 0.2892
Epoch: 1000/1000 Cost: 0.2541


## Softmax regression으로 MNIST Data 분류하기

In [117]:
# import library

import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import matplotlib.pyplot as plt
import random

USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
print("다음 기기로 학습합니다:", device)

다음 기기로 학습합니다: cuda


In [119]:
# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [120]:
# hyperparameters
training_epochs = 15
batch_size = 100

In [122]:
#MNIST classifier 구현하기

#dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                        train=True,
                        transform=transforms.ToTensor(),
                        download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                        train = False,
                        transform=transforms.ToTensor(),
                        download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST\raw\train-images-idx3-ubyte.gz


31.0%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

90.5%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

102.8%


Extracting MNIST_data/MNIST\raw\train-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting MNIST_data/MNIST\raw\t10k-images-idx3-ubyte.gz to MNIST_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw\t10k-labels-idx1-ubyte.gz


112.7%
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Extracting MNIST_data/MNIST\raw\t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw

Processing...
Done!


In [129]:
# Dataset loader
data_loader = DataLoader(dataset=mnist_train,
                        batch_size = batch_size,
                        shuffle=True,
                        drop_last=True)

#MNIST data image of shape : 28 * 28 = 784
linear = nn.Linear(784,10, bias = True).to(device)

#cost and optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(linear.parameters(), lr=0.1)

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        
        #배치크기가 100이므로 아래의 연산에서 X는 (100,784)의 텐서가 된다.
        X = X.view(-1,28*28).to(device)
        
        # 레이블은 one-hot encoding이 된 상태가 아니라 0~9의 정수.
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = linear(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch:', '%04d' % (epoch +1), 'cost =', '{:.9f}'.format(avg_cost))
    
print('Learning finished')



Epoch: 0001 cost = 0.536015809
Epoch: 0002 cost = 0.359202534
Epoch: 0003 cost = 0.331243694
Epoch: 0004 cost = 0.316479772
Epoch: 0005 cost = 0.306780636
Epoch: 0006 cost = 0.300162762
Epoch: 0007 cost = 0.295002848
Epoch: 0008 cost = 0.290735900
Epoch: 0009 cost = 0.287426829
Epoch: 0010 cost = 0.284311414
Epoch: 0011 cost = 0.281867415
Epoch: 0012 cost = 0.279607654
Epoch: 0013 cost = 0.277803063
Epoch: 0014 cost = 0.276044399
Epoch: 0015 cost = 0.274502218
Learning finished
