<a href="https://colab.research.google.com/github/suubkiim/Pytorch-practice/blob/master/%EC%A0%95%EB%B3%B4%EA%B2%80%EC%83%89_%EA%B3%BC%EC%A0%9C_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function

import torch
import numpy as np

class TwoLayerNet(object):
  """
  two-layer-perceptron.
  Input dimension : N
  Hidden layer dimension : H
  Output dimension : C

  Softmax loss function을 활용해 네트워크를 학습시킬 것입니다.
  Hidden layer의 activation function으로는 ReLU를 사용합니다.

  정리하자면, 네트워크는 다음과 같은 구조를 갖습니다.

  input - linear layer - ReLU - linear layer - output
  """

  def __init__(self, input_size, hidden_size, output_size, std=1e-4):
    """
    모델을 초기화하며 weight는 작은 랜덤값, bias는 0으로 초기화됩니다.
    Weight와 bias는 self.params라는 dictionary에 저장됩니다.

    W1: 첫 번째 layer의 weight; (D, H)
    b1: 첫 번째 layer의 biase; (H,)
    W2: 두 번째 layer의 weight; (H, C)
    b2: 두 번째 layer의 biase; (C,)

    Inputs:
    - input_size: input data의 dimension.
    - hidden_size: hidden layer의 neuron(node) 개수.
    - output_size: output dimesion.
    """
    self.params = {}
    self.params['W1'] = std * torch.randn(input_size, hidden_size)
    self.params['b1'] = torch.zeros(hidden_size)
    self.params['W2'] = std * torch.randn(hidden_size, output_size)
    self.params['b2'] = torch.zeros(output_size)

  def loss(self, X, y=None):
    """
    Neural network의 loss와 gradient를 계산합니다.

    Inputs:
    - X: Input data. shape (N, D). 각각의 X[i]가 하나의 training sample이며 총 N개의 sample이 input으로 주어짐.
    - y: Training label 벡터. y[i]는 X[i]에 대한 정수값의 label.
      y가 주어질 경우 loss와 gradient를 반환하며 y가 주어지지 않으면 output을 반환

    Returns:
    y가 주어지지 않으면, shape (N, C)인 score matrix 반환
    scores[i, c]는 input X[i]에 대한 class c의 score

    y가 주어지면 (loss, grads) tuple 반환
    loss: training batch에 대한 loss (scalar)
    grads: {parameter 이름: gradient} 형태의 dictionary (self.params와 같은 키여야 함)
    """
    # Dictionary에서 weight와 bias 불러오기
    W1, b1 = self.params['W1'], self.params['b1']
    W2, b2 = self.params['W2'], self.params['b2']
    N, D = X.size()

    # Forward path 계산
    scores = None
    #############################################################################
    # TODO: Forward path를 수행하고, 'scores'에 결과값을 저장 (shape : (N, C))  #
    #         input - linear layer - ReLU - linear layer - output               #
    #############################################################################

    def affine_relu_forward(x,w,b):
      out_1= torch.add(torch.mm(x,w),b).clamp(0)
      cache_1 = (x,w,b)
      return out_1, cache_1

    def affine_forward(x,w,b):
      out_2 = torch.add(torch.mm(x,w),b)
      cache_2 = (x,w,b)
      return out_2, cache_2

    out_1, cache_1 = affine_relu_forward(X, self.params['W1'], self.params['b1'])
    out_2, cache_2 = affine_forward(out_1, self.params['W2'], self.params['b2'])
    scores = out_2
    
    
    #############################################################################
    #                              END OF YOUR CODE                             #
    #############################################################################
    
    # 정답(target)이 주어지지 않은 경우 점수를 리턴하고 종료
    if y is None:
      return scores

    # Loss 계산
    loss = None
    e = torch.exp(scores)
    softmax = e / torch.sum(e, dim=1, keepdim=True)
    #############################################################################
    #       TODO: Output을 이용하여 loss값 계산하고, 'loss'에 저장(scalar)        #
    #                loss function : negative log likelihood                    #
    #              'softmax' 변수에 저장된 softmax값을 이용해서 계산              #
    #         'y'는 정답 index를 가리키며 정답 확률에 -log 적용하여 평균           #
    #############################################################################
    N = X.shape[0]
    loss = - torch.sum(torch.log(softmax[np.arange(N),y],dim=1,keepdim=True)) / N
    dscores = softmax.copy()
    dscores[np.arange(N),y] -=1
    dscores /= N
    
    #############################################################################
    #                              END OF YOUR CODE                             #
    #############################################################################

    # Backward path(Gradient 계산) 구현
    grads = {}
    #############################################################################
    # TODO: Weight와 bias에 대한 gradient를 계산하고 'grads' dictionary에 저장   #
    #               dictionary의 key는 self.params와 동일하게 설정.             #
    #          grads['W1']는 self.params['W1']과 같은 shape를 가져야 함.        #
    #              softmax의 gradient부터 차근차근 구해나가도록 함.              #
    #############################################################################
    def affine_backward(gd, cache):
      x, w, b = cache
      dx, dw, db = None, None, None
      
      x = cache[0]
      w = cache[1]
      b = cache[2]
      
      dw = torch.mm(x.t(),gd)
      dx = torch.mm(gd,w.t()).reshape(x.shape)
      db = np.sum(gd, axis=0)
      
      return dx, dw, db
    
    def relu_backward(gd, cache) :
      dx, x = None, cache
      
      x = cache
      out = x.clamp(0) #ReLU  performed again
      out[out>0] = 1
      dx = out*gd
      return dx
    
    def affine_relu_backward(gd, cache):
      cache_0 = TwoLayer.netrelu_backward(gd,chace)
      dx, dw, db = affine_backward(cache_0,cache)
      return dx, dw, db
    
    dx_2, grads['W2'], grads['b2'] = affine_backward(dscores, cache_2)
    dx_1, grads['W1'], grads['b1'] = affine_relu_backward(dx_2, cache_1) 
          
    #############################################################################
    #                              END OF YOUR CODE                             #
    #############################################################################

    return loss, grads

  def train(self, X, y,
            learning_rate=1e-3, learning_rate_decay=0.95,
            num_iters=100,
            batch_size=200, verbose=False):
    """
    SGD를 이용한 neural network training

    Inputs:
    - X: shape (N, D)의 numpy array (training data)
    - y: shape (N,)의 numpy array(training labels; y[i] = c
                                  c는 X[i]의 label, 0 <= c < C)
    - learning_rate: Scalar learning rate
    - num_iters: Number of steps
    - batch_size: Number of training examples in a mini-batch.
    - verbose: true일 경우 progress 출력
    """
    num_train = X.shape[0]
    iterations_per_epoch = max(num_train / batch_size, 1)

    # SGD를 이용한 optimization
    loss_history = []
    train_acc_history = []
    val_acc_history = []

    for it in range(num_iters):
      loss, grads = self.loss(X, y=y)
      loss_history.append(loss)

      #########################################################################
      # TODO: 'grads' dictionary에서 gradient를 불러와 SGD update 수행        #
      #########################################################################
      def SGD(w, dw, config=None):
        if config is None: config = {}
        config.setdefault('learning_rate', 1e-2)
        
        w -= config['learning_rate'] * dw
        return w, config
      
      self.params['W1'],_ = SGD(self.params['W1'],grads['W1'],None)
      self.params['W2'],_ = SGD(self.params['W2'],grads['W2'],None)
      
      #########################################################################
      #                             END OF YOUR CODE                          #
      #########################################################################

      if verbose and it % 100 == 0:
        print('iteration %d / %d: loss %f' % (it, num_iters, loss))


      if it % iterations_per_epoch == 0:
        # Accuracy
        train_acc = (self.predict(X) == y).float().mean()
        train_acc_history.append(train_acc)

        learning_rate *= learning_rate_decay

    return {
      'loss_history': loss_history,
      'train_acc_history': train_acc_history,
      'val_acc_history': val_acc_history,
    }

  def predict(self, X):
    return torch.argmax(self.loss(X),1)




In [14]:
# A bit of setup

import torch

from NN_SB import TwoLayerNet

# Create a small net and some toy data to check your implementations.
# Note that we set the random seed for repeatable experiments.

input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5

def init_toy_model():
    torch.manual_seed(0)
    return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)

def init_toy_data():
    torch.manual_seed(1)
    X = 10 * torch.randn(num_inputs, input_size)
    y = torch.LongTensor([0, 1, 2, 2, 1])
    return X, y

net = init_toy_model()
X, y = init_toy_data()

scores = net.loss(X) 
print('Your scores:')
print(scores)
print()
print('correct scores:')
correct_scores = torch.Tensor(
  [[ 0.24617445,  0.1261572,   1.1627575 ],
 [ 0.18364899, -0.0675799,  -0.21310908],
 [-0.2075074,  -0.12525336, -0.06508598],
 [ 0.08643292,  0.07172455,  0.2353122 ],
 [ 0.8219606,  -0.32560882, -0.77807254]]
)
print(correct_scores)
print()

print('Difference between your scores and correct scores:')
print(torch.sum(torch.abs(scores - correct_scores)))

loss, _ = net.loss(X, y)
correct_loss = 1.2444149

print('Difference between your loss and correct loss:')
print(torch.sum(torch.abs(loss - correct_loss)))

loss, grads = net.loss(X, y)

results = net.train(X, y, 0.05)
print("Train acc: %f -> %f\nTrain loss: %f -> %f" % (results['train_acc_history'][0], results['train_acc_history'][-1]
                                                , results['loss_history'][0],results['loss_history'][-1]))

NameError: ignored

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive
