### 1.아래의 참고비디오를 시청 후, 아래의 소스코드를 실행하시오. 각 line 별로 주석을 작성하시오.

In [4]:
import torch
import torch.nn as nn
from torch.autograd import Variable 

#랜덤 생성에 사용되는 시드 값은 777을 사용한다.
torch.manual_seed(777)  # reproducibility 

#index를 char 형태로 표시한 것.
idx2char = ['h', 'i', 'e', 'l', 'o'] 

# Teach hihell -> ihello
x_data = [[0, 1, 0, 2, 3, 3]]# hihell
y_data = [1, 0, 2, 3, 3, 4]    # ihello 

# As we have one batch of samples, we will change them to variables only once
inputs = Variable(torch.LongTensor(x_data))
labels = Variable(torch.LongTensor(y_data))

num_classes = 5 
input_size = 5 # 5개의 단어를 학습시켜야 함
embedding_size = 10  # embedding size
hidden_size = 5  # output from the LSTM. 5 to directly predict onehot
batch_size = 1   # one sentence
sequence_length = 6 # |ihello| == 6
num_layers = 1  # one-layer rnn 


#모델 정의
class Model(nn.Module): 
     def __init__(self):
            super(Model, self).__init__()
            #embedding vector 생성
            self.embedding = nn.Embedding(input_size, embedding_size)
            #값 초기화
            self.num_layers = num_layers
            self.hidden_size = hidden_size
            #RNN 모델, batch를 먼저 순서로 줄 것이다.
            self.rnn = nn.RNN(input_size=embedding_size, 
                          hidden_size=5, batch_first=True)
            #선형
            self.fc = nn.Linear(hidden_size, num_classes) 
    #함수 진행
     def forward(self, x):
            # Initialize hidden and cell states
            # (num_layers * num_directions, batch, hidden_size)
            h_0 = Variable(torch.zeros(
                self.num_layers, x.size(0), self.hidden_size)) 
            
            #embedding vector
            emb = self.embedding(x)
            #shape 조정
            emb = emb.view(batch_size, sequence_length, -1) 
            
            # Propagate embedding through RNN
            # Input: (batch, seq_len, embedding_size)
            # h_0: (num_layers * num_directions, batch, hidden_size)
            out, _ = self.rnn(emb, h_0)
            
            return self.fc(out.view(-1, num_classes)) 

# Instantiate RNN model
model = Model()
print(model) 

# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = torch.nn.CrossEntropyLoss()
#Adam 사용
optimizer = torch.optim.Adam(model.parameters(), lr=0.1) 

# Train the model
for epoch in range(100):
    outputs = model(inputs)
    #0으로 초기화
    optimizer.zero_grad()
    #loss 계산
    loss = criterion(outputs, labels)
    #역전파
    loss.backward()
    #스텝 진행
    optimizer.step()
    #max값 출력
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    #결과값
    result_str = [idx2char[c] for c in idx.squeeze()]
    
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

Model(
  (embedding): Embedding(5, 10)
  (rnn): RNN(10, 5, batch_first=True)
  (fc): Linear(in_features=5, out_features=5, bias=True)
)
epoch: 1, loss: 1.768
Predicted string:  eheohh
epoch: 2, loss: 1.396
Predicted string:  oheiii
epoch: 3, loss: 1.132
Predicted string:  iheloo
epoch: 4, loss: 0.949
Predicted string:  ihello
epoch: 5, loss: 0.798
Predicted string:  ihelll
epoch: 6, loss: 0.659
Predicted string:  ihelll
epoch: 7, loss: 0.547
Predicted string:  ihelll
epoch: 8, loss: 0.449
Predicted string:  ihelll
epoch: 9, loss: 0.384
Predicted string:  ihelll
epoch: 10, loss: 0.343
Predicted string:  ihello
epoch: 11, loss: 0.303
Predicted string:  ihello
epoch: 12, loss: 0.236
Predicted string:  ihello
epoch: 13, loss: 0.225
Predicted string:  ihello
epoch: 14, loss: 0.237
Predicted string:  iheloo
epoch: 15, loss: 0.226
Predicted string:  iheloo
epoch: 16, loss: 0.145
Predicted string:  ihello
epoch: 17, loss: 0.293
Predicted string:  ihelll
epoch: 18, loss: 0.152
Predicted string:

### 1)PyTorchZeroToAll 웹사이트의 12_1_rnn_basics.py,12_2_hello_rnn.py, 12_3_hello_rnn_seq.py 를 먼저 실행해 보시오. (아래의 코드는 12_4_hello_rnn_emb.py 임.) 

#### 12_1_rnn_basics.py

In [5]:
import torch

import torch.nn as nn

from torch.autograd import Variable



# One hot encoding for each char in 'hello'

h = [1, 0, 0, 0]

e = [0, 1, 0, 0]

l = [0, 0, 1, 0]

o = [0, 0, 0, 1]



# One cell RNN input_dim (4) -> output_dim (2). sequence: 5

cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)



# (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False

hidden = Variable(torch.randn(1, 1, 2))



# Propagate input through RNN

# Input: (batch, seq_len, input_size) when batch_first=True

inputs = Variable(torch.Tensor([h, e, l, l, o]))

for one in inputs:

    one = one.view(1, 1, -1)

    # Input: (batch, seq_len, input_size) when batch_first=True

    out, hidden = cell(one, hidden)

    print("one input size", one.size(), "out size", out.size())



# We can do the whole at once

# Propagate input through RNN

# Input: (batch, seq_len, input_size) when batch_first=True

inputs = inputs.view(1, 5, -1)

out, hidden = cell(inputs, hidden)

print("sequence input size", inputs.size(), "out size", out.size())





# hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False

hidden = Variable(torch.randn(1, 3, 2))



# One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3

# 3 batches 'hello', 'eolll', 'lleel'

# rank = (3, 5, 4)

inputs = Variable(torch.Tensor([[h, e, l, l, o],

                                [e, o, l, l, l],

                                [l, l, e, e, l]]))



# Propagate input through RNN

# Input: (batch, seq_len, input_size) when batch_first=True

# B x S x I

out, hidden = cell(inputs, hidden)

print("batch input size", inputs.size(), "out size", out.size())





# One cell RNN input_dim (4) -> output_dim (2)

cell = nn.RNN(input_size=4, hidden_size=2)



# The given dimensions dim0 and dim1 are swapped.

inputs = inputs.transpose(dim0=0, dim1=1)

# Propagate input through RNN

# Input: (seq_len, batch_size, input_size) when batch_first=False (default)

# S x B x I

out, hidden = cell(inputs, hidden)

print("batch input size", inputs.size(), "out size", out.size())

one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
one input size torch.Size([1, 1, 4]) out size torch.Size([1, 1, 2])
sequence input size torch.Size([1, 5, 4]) out size torch.Size([1, 5, 2])
batch input size torch.Size([3, 5, 4]) out size torch.Size([3, 5, 2])
batch input size torch.Size([5, 3, 4]) out size torch.Size([5, 3, 2])


#### 12_2_hello_rnn.py

In [10]:
import sys

import torch

import torch.nn as nn

from torch.autograd import Variable



torch.manual_seed(777)  # reproducibility

#            0    1    2    3    4

idx2char = ['h', 'i', 'e', 'l', 'o']



# Teach hihell -> ihello

x_data = [0, 1, 0, 2, 3, 3]   # hihell

one_hot_lookup = [[1, 0, 0, 0, 0],  # 0

                  [0, 1, 0, 0, 0],  # 1

                  [0, 0, 1, 0, 0],  # 2

                  [0, 0, 0, 1, 0],  # 3

                  [0, 0, 0, 0, 1]]  # 4



y_data = [1, 0, 2, 3, 3, 4]    # ihello

x_one_hot = [one_hot_lookup[x] for x in x_data]



# As we have one batch of samples, we will change them to variables only once

inputs = Variable(torch.Tensor(x_one_hot))

labels = Variable(torch.LongTensor(y_data))



num_classes = 5

input_size = 5  # one-hot size

hidden_size = 5  # output from the RNN. 5 to directly predict one-hot

batch_size = 1   # one sentence

sequence_length = 1  # One by one

num_layers = 1  # one-layer rnn





class Model(nn.Module):



    def __init__(self):

        super(Model, self).__init__()

        self.rnn = nn.RNN(input_size=input_size,

                          hidden_size=hidden_size, batch_first=True)



    def forward(self, hidden, x):

        # Reshape input (batch first)

        x = x.view(batch_size, sequence_length, input_size)



        # Propagate input through RNN

        # Input: (batch, seq_len, input_size)

        # hidden: (num_layers * num_directions, batch, hidden_size)

        out, hidden = self.rnn(x, hidden)

        return hidden, out.view(-1, num_classes)



    def init_hidden(self):

        # Initialize hidden and cell states

        # (num_layers * num_directions, batch, hidden_size)

        return Variable(torch.zeros(num_layers, batch_size, hidden_size))





# Instantiate RNN model

model = Model()

print(model)



# Set loss and optimizer function

# CrossEntropyLoss = LogSoftmax + NLLLoss

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.1)



# Train the model

for epoch in range(100):

    optimizer.zero_grad()

    loss = 0

    hidden = model.init_hidden()



    sys.stdout.write("predicted string: ")

    for input, label in zip(inputs, labels):

        # print(input.size(), label.size())

        hidden, output = model(hidden, input)

        val, idx = output.max(1)

        sys.stdout.write(idx2char[idx.data[0]])
        #label -> label.unsqueeze(0)
        loss += criterion(output, label.unsqueeze(0))



    print(", epoch: %d, loss: %1.3f" % (epoch + 1, loss.data.item()))



    loss.backward()

    optimizer.step()



print("Learning finished!")

Model(
  (rnn): RNN(5, 5, batch_first=True)
)
predicted string: llllll, epoch: 1, loss: 10.155
predicted string: llllll, epoch: 2, loss: 9.137
predicted string: llllll, epoch: 3, loss: 8.355
predicted string: llllll, epoch: 4, loss: 7.577
predicted string: llllll, epoch: 5, loss: 6.876
predicted string: lhelll, epoch: 6, loss: 6.327
predicted string: ihelll, epoch: 7, loss: 6.014
predicted string: ihelll, epoch: 8, loss: 5.787
predicted string: ihelll, epoch: 9, loss: 5.477
predicted string: ihelll, epoch: 10, loss: 5.274
predicted string: ihelll, epoch: 11, loss: 5.041
predicted string: ihello, epoch: 12, loss: 4.827
predicted string: ihello, epoch: 13, loss: 4.676
predicted string: ihello, epoch: 14, loss: 4.550
predicted string: ihello, epoch: 15, loss: 4.430
predicted string: ihello, epoch: 16, loss: 4.305
predicted string: ihello, epoch: 17, loss: 4.164
predicted string: ihelll, epoch: 18, loss: 4.003
predicted string: ihelll, epoch: 19, loss: 3.860
predicted string: ihelll, epoch

####  12_3_hello_rnn_seq.py

In [12]:
import torch

import torch.nn as nn

from torch.autograd import Variable



torch.manual_seed(777)  # reproducibility





idx2char = ['h', 'i', 'e', 'l', 'o']



# Teach hihell -> ihello

x_data = [[0, 1, 0, 2, 3, 3]]   # hihell

x_one_hot = [[[1, 0, 0, 0, 0],   # h 0

              [0, 1, 0, 0, 0],   # i 1

              [1, 0, 0, 0, 0],   # h 0

              [0, 0, 1, 0, 0],   # e 2

              [0, 0, 0, 1, 0],   # l 3

              [0, 0, 0, 1, 0]]]  # l 3



y_data = [1, 0, 2, 3, 3, 4]    # ihello



# As we have one batch of samples, we will change them to variables only once

inputs = Variable(torch.Tensor(x_one_hot))

labels = Variable(torch.LongTensor(y_data))



num_classes = 5

input_size = 5  # one-hot size

hidden_size = 5  # output from the LSTM. 5 to directly predict one-hot

batch_size = 1   # one sentence

sequence_length = 6  # |ihello| == 6

num_layers = 1  # one-layer rnn





class RNN(nn.Module):



    def __init__(self, num_classes, input_size, hidden_size, num_layers):

        super(RNN, self).__init__()



        self.num_classes = num_classes

        self.num_layers = num_layers

        self.input_size = input_size

        self.hidden_size = hidden_size

        self.sequence_length = sequence_length



        self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)



    def forward(self, x):

        # Initialize hidden and cell states

        # (num_layers * num_directions, batch, hidden_size) for batch_first=True

        h_0 = Variable(torch.zeros(

            self.num_layers, x.size(0), self.hidden_size))



        # Reshape input

        x.view(x.size(0), self.sequence_length, self.input_size)



        # Propagate input through RNN

        # Input: (batch, seq_len, input_size)

        # h_0: (num_layers * num_directions, batch, hidden_size)



        out, _ = self.rnn(x, h_0)

        return out.view(-1, num_classes)





# Instantiate RNN model

rnn = RNN(num_classes, input_size, hidden_size, num_layers)

print(rnn)



# Set loss and optimizer function

# CrossEntropyLoss = LogSoftmax + NLLLoss

criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1)



# Train the model

for epoch in range(100):

    outputs = rnn(inputs)

    optimizer.zero_grad()

    loss = criterion(outputs, labels)

    loss.backward()

    optimizer.step()

    _, idx = outputs.max(1)

    idx = idx.data.numpy()

    result_str = [idx2char[c] for c in idx.squeeze()]

    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data.item()))

    print("Predicted string: ", ''.join(result_str))



print("Learning finished!")

RNN(
  (rnn): RNN(5, 5, batch_first=True)
)
epoch: 1, loss: 1.693
Predicted string:  llllll
epoch: 2, loss: 1.523
Predicted string:  llllll
epoch: 3, loss: 1.393
Predicted string:  llllll
epoch: 4, loss: 1.263
Predicted string:  llllll
epoch: 5, loss: 1.146
Predicted string:  llllll
epoch: 6, loss: 1.055
Predicted string:  lhelll
epoch: 7, loss: 1.002
Predicted string:  ihelll
epoch: 8, loss: 0.965
Predicted string:  ihelll
epoch: 9, loss: 0.913
Predicted string:  ihelll
epoch: 10, loss: 0.879
Predicted string:  ihelll
epoch: 11, loss: 0.840
Predicted string:  ihelll
epoch: 12, loss: 0.805
Predicted string:  ihello
epoch: 13, loss: 0.779
Predicted string:  ihello
epoch: 14, loss: 0.758
Predicted string:  ihello
epoch: 15, loss: 0.738
Predicted string:  ihello
epoch: 16, loss: 0.717
Predicted string:  ihello
epoch: 17, loss: 0.694
Predicted string:  ihello
epoch: 18, loss: 0.667
Predicted string:  ihelll
epoch: 19, loss: 0.643
Predicted string:  ihelll
epoch: 20, loss: 0.647
Predicted s

### 2)RNN과 CNN의 차이점은 무엇인가.

CNN은 데이터를 feature로 추출하여 이 feature들의 패턴을 파악하는 구조이다.<br>
RNN은 반복적이고 순차적인 데이터에 특화된 것으로 내부의 순환구조가 들어있는 것이 특징이다.

### 2. 아래의 참고비디오를 시청 후, 아래의 소스코드를 실행하시오. 각 line 별로 주석을 작성하시오.

In [13]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
import itertools

#1차원 배열로 펴는 함수
def flatten(l):
    return list(itertools.chain.from_iterable(l))

#단어
seqs = ['ghatmasala', 'nicela', 'chutpakodas']

# make <pad> idx 0
#seq에 있는 알파벳을 리스트로 만들어줌.
vocab = ['<pad>'] + sorted(list(set(flatten(seqs))))

# make model
embedding_size = 3
#embedding 생성
embed = nn.Embedding(len(vocab), embedding_size)
#LSTM 모델
lstm = nn.LSTM(embedding_size, 5)

#seq를 벡터화함(알파벳->인덱스)
vectorized_seqs = [[vocab.index(tok) for tok in seq]for seq in seqs]
print("vectorized_seqs", vectorized_seqs)

#가장 긴 입력 확인
print([x for x in map(len, vectorized_seqs)])
# get the length of each seq in your batch
seq_lengths = torch.LongTensor([x for x in map(len, vectorized_seqs)])

# dump padding everywhere, and place seqs on the left.
# NOTE: you only need a tensor as big as your longest sequence
#zero_padding
seq_tensor = Variable(torch.zeros(
    (len(vectorized_seqs), seq_lengths.max()))).long()

#벡터들 연결
for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
    seq_tensor[idx, :seqlen] = torch.LongTensor(seq)

print("seq_tensor", seq_tensor)

# SORT YOUR TENSORS BY LENGTH!
#padding 안한 것을 정렬
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]

print("seq_tensor after sorting", seq_tensor)

# utils.rnn lets you give (B,L,D) tensors where B is the batch size, L is the maxlength, if you use batch_first=True
# Otherwise, give (L,B,D) tensors
seq_tensor = seq_tensor.transpose(0, 1)  # (B,L,D) -> (L,B,D)
print("seq_tensor after transposing", seq_tensor.size(), seq_tensor.data)



# embed your sequences
embeded_seq_tensor = embed(seq_tensor)
print("seq_tensor after embeding", embeded_seq_tensor.size(), seq_tensor.data)

# pack them up nicely
#packing을 해줌
packed_input = pack_padded_sequence(
    embeded_seq_tensor, seq_lengths.cpu().numpy())

# throw them through your LSTM (remember to give batch_first=True here if
# you packed with it)
packed_output, (ht, ct) = lstm(packed_input)

# unpack your output if required
#다시 unpacking
output, _ = pad_packed_sequence(packed_output)
print("Lstm output", output.size(), output.data)

# Or if you just want the final hidden state?
print("Last output", ht[-1].size(), ht[-1].data)

vectorized_seqs [[5, 6, 1, 15, 10, 1, 14, 1, 9, 1], [11, 7, 2, 4, 9, 1], [2, 6, 16, 15, 13, 1, 8, 12, 3, 1, 14]]
[10, 6, 11]
seq_tensor tensor([[ 5,  6,  1, 15, 10,  1, 14,  1,  9,  1,  0],
        [11,  7,  2,  4,  9,  1,  0,  0,  0,  0,  0],
        [ 2,  6, 16, 15, 13,  1,  8, 12,  3,  1, 14]])
seq_tensor after sorting tensor([[ 2,  6, 16, 15, 13,  1,  8, 12,  3,  1, 14],
        [ 5,  6,  1, 15, 10,  1, 14,  1,  9,  1,  0],
        [11,  7,  2,  4,  9,  1,  0,  0,  0,  0,  0]])
seq_tensor after transposing torch.Size([11, 3]) tensor([[ 2,  5, 11],
        [ 6,  6,  7],
        [16,  1,  2],
        [15, 15,  4],
        [13, 10,  9],
        [ 1,  1,  1],
        [ 8, 14,  0],
        [12,  1,  0],
        [ 3,  9,  0],
        [ 1,  1,  0],
        [14,  0,  0]])
seq_tensor after embeding torch.Size([11, 3, 3]) tensor([[ 2,  5, 11],
        [ 6,  6,  7],
        [16,  1,  2],
        [15, 15,  4],
        [13, 10,  9],
        [ 1,  1,  1],
        [ 8, 14,  0],
        [12,  1,  

### 1) PyTorchZeroToAll 웹사이트의 13_1_rnn_classification_basics.py, 13_2_rnn_classification.py, 13_3_char_rnn.py 를 먼저 실행해 보시오. (아래의 코드는 13_4_pack_pad.py 임.) 

#### 13_1_rnn_classification_basics.py

In [16]:
import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

from name_dataset import NameDataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

# Parameters and DataLoaders
HIDDEN_SIZE = 100
N_CHARS = 128  # ASCII
N_CLASSES = 18


class RNNClassifier(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input):
        # Note: we run this all at once (over the whole input sequence)

        # input = B x S . size(0) = B
        batch_size = input.size(0)

        # input:  B x S  -- (transpose) --> S x B
        input = input.t()

        # Embedding S x B -> S x B x I (embedding size)
        print("  input", input.size())
        embedded = self.embedding(input)
        print("  embedding", embedded.size())

        # Make a hidden
        hidden = self._init_hidden(batch_size)

        output, hidden = self.gru(embedded, hidden)
        print("  gru hidden output", hidden.size())
        # Use the last layer output as FC's input
        # No need to unpack, since we are going to use hidden
        fc_output = self.fc(hidden)
        print("  fc output", fc_output.size())
        return fc_output

    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        return Variable(hidden)

# Help functions
def str2ascii_arr(msg):
    arr = [ord(c) for c in msg]
    return arr, len(arr)

# pad sequences and sort the tensor
def pad_sequences(vectorized_seqs, seq_lengths):
    seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()
    for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
    return seq_tensor

# Create necessary variables, lengths, and target
def make_variables(names):
    sequence_and_length = [str2ascii_arr(name) for name in names]
    vectorized_seqs = [sl[0] for sl in sequence_and_length]
    seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length])
    return pad_sequences(vectorized_seqs, seq_lengths)

if __name__ == '__main__':
    names = ['adylov', 'solan', 'hard', 'san']
    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_CLASSES)

    for name in names:
        arr, _ = str2ascii_arr(name)
        inp = Variable(torch.LongTensor([arr]))
        out = classifier(inp)
        print("in", inp.size(), "out", out.size())


    inputs = make_variables(names)
    out = classifier(inputs)
    print("batch in", inputs.size(), "batch out", out.size())

  input torch.Size([6, 1])
  embedding torch.Size([6, 1, 100])
  gru hidden output torch.Size([1, 1, 100])
  fc output torch.Size([1, 1, 18])
in torch.Size([1, 6]) out torch.Size([1, 1, 18])
  input torch.Size([5, 1])
  embedding torch.Size([5, 1, 100])
  gru hidden output torch.Size([1, 1, 100])
  fc output torch.Size([1, 1, 18])
in torch.Size([1, 5]) out torch.Size([1, 1, 18])
  input torch.Size([4, 1])
  embedding torch.Size([4, 1, 100])
  gru hidden output torch.Size([1, 1, 100])
  fc output torch.Size([1, 1, 18])
in torch.Size([1, 4]) out torch.Size([1, 1, 18])
  input torch.Size([3, 1])
  embedding torch.Size([3, 1, 100])
  gru hidden output torch.Size([1, 1, 100])
  fc output torch.Size([1, 1, 18])
in torch.Size([1, 3]) out torch.Size([1, 1, 18])
  input torch.Size([6, 4])
  embedding torch.Size([6, 4, 100])
  gru hidden output torch.Size([1, 4, 100])
  fc output torch.Size([1, 4, 18])
batch in torch.Size([4, 6]) batch out torch.Size([1, 4, 18])


#### 13_2_rnn_classification.py

In [22]:
import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

from name_dataset import NameDataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

# Parameters and DataLoaders
HIDDEN_SIZE = 100
N_LAYERS = 2
BATCH_SIZE = 256
N_EPOCHS = 100

test_dataset = NameDataset(is_train_set=False)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE, shuffle=True)

train_dataset = NameDataset(is_train_set=True)
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE, shuffle=True)



N_COUNTRIES = len(train_dataset.get_countries())
print(N_COUNTRIES, "countries")

N_CHARS = 128  # ASCII

# Some utility functions
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def create_variable(tensor):
    # Do cuda() before wrapping with variable
    if torch.cuda.is_available():
        return Variable(tensor.cuda())
    else:
        return Variable(tensor)

# pad sequences and sort the tensor
def pad_sequences(vectorized_seqs, seq_lengths, countries):
    seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long()
    for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
    # Sort tensors by their length
    seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
    seq_tensor = seq_tensor[perm_idx]

    # Also sort the target (countries) in the same order
    target = countries2tensor(countries)
    if len(countries):
        target = target[perm_idx]

    # Return variables
    # DataParallel requires everything to be a Variable
    return create_variable(seq_tensor), \
        create_variable(seq_lengths), \
        create_variable(target)

# Create necessary variables, lengths, and target
def make_variables(names, countries):
    sequence_and_length = [str2ascii_arr(name) for name in names]
    vectorized_seqs = [sl[0] for sl in sequence_and_length]
    seq_lengths = torch.LongTensor([sl[1] for sl in sequence_and_length])
    return pad_sequences(vectorized_seqs, seq_lengths, countries)

def str2ascii_arr(msg):
    arr = [ord(c) for c in msg]
    return arr, len(arr)

def countries2tensor(countries):
    country_ids = [train_dataset.get_country_id(
        country) for country in countries]
    return torch.LongTensor(country_ids)

class RNNClassifier(nn.Module):
    # Our model
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = int(bidirectional) + 1

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input, seq_lengths):
        # Note: we run this all at once (over the whole input sequence)
        # input shape: B x S (input size)
        # transpose to make S(sequence) x B (batch)
        input = input.t()
        batch_size = input.size(1)

        # Make a hidden
        hidden = self._init_hidden(batch_size)

        # Embedding S x B -> S x B x I (embedding size)
        embedded = self.embedding(input)
        # Pack them up nicely
        gru_input = pack_padded_sequence(
            embedded, seq_lengths.data.cpu().numpy())
        # To compact weights again call flatten_parameters().
        self.gru.flatten_parameters()
        output, hidden = self.gru(gru_input, hidden)

        # Use the last layer output as FC's input
        # No need to unpack, since we are going to use hidden
        fc_output = self.fc(hidden[-1])
        return fc_output

    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions,
                             batch_size, self.hidden_size)
        return create_variable(hidden)

# Train cycle
def train():

    total_loss = 0

    for i, (names, countries) in enumerate(train_loader, 1):
        input, seq_lengths, target = make_variables(names, countries)
        output = classifier(input, seq_lengths)

        loss = criterion(output, target)
        total_loss += loss.data.item()

        classifier.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format(
                time_since(start), epoch,  i *
                len(names), len(train_loader.dataset),
                100. * i * len(names) / len(train_loader.dataset),
                total_loss / i * len(names)))
    return total_loss

# Testing cycle
def test(name=None):
    # Predict for a given name
    if name:
        input, seq_lengths, target = make_variables([name], [])
        output = classifier(input, seq_lengths)
        pred = output.data.max(1, keepdim=True)[1]
        country_id = pred.cpu().numpy()[0][0]
        print(name, "is", train_dataset.get_country(country_id))
        return

    print("evaluating trained model ...")
    correct = 0
    train_data_size = len(test_loader.dataset)

    for names, countries in test_loader:
        input, seq_lengths, target = make_variables(names, countries)
        output = classifier(input, seq_lengths)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, train_data_size, 100. * correct / train_data_size))

if __name__ == '__main__':

    classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRIES, N_LAYERS)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [33, xxx] -> [11, ...], [11, ...], [11, ...] on 3 GPUs
        classifier = nn.DataParallel(classifier)

    if torch.cuda.is_available():
        classifier.cuda()

    optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    start = time.time()
    print("Training for %d epochs..." % N_EPOCHS)
    for epoch in range(1, N_EPOCHS + 1):
        # Train cycle
        train()

        # Testing
        test()

        # Testing several samples
        test("Sung")
        test("Jungwoo")
        test("Soojin")
        test("Nako")

18 countries
Training for 100 epochs...
evaluating trained model ...

Test set: Accuracy: 4175/6700 (62%)

Sung is English
Jungwoo is Russian
Soojin is Russian
Nako is Arabic
evaluating trained model ...

Test set: Accuracy: 4719/6700 (70%)

Sung is Dutch
Jungwoo is Russian
Soojin is Japanese
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 4990/6700 (74%)

Sung is Chinese
Jungwoo is Russian
Soojin is Japanese
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5191/6700 (77%)

Sung is Chinese
Jungwoo is Russian
Soojin is Czech
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5330/6700 (79%)

Sung is Chinese
Jungwoo is Russian
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5420/6700 (80%)

Sung is Chinese
Jungwoo is Russian
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5448/6700 (81%)

Sung is Chinese
Jungwoo is Russian
Soojin is Dutch
Nako is Japanese
evaluating

evaluating trained model ...

Test set: Accuracy: 5580/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5579/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5585/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5577/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5588/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5582/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Dutch
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5588/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5565/67

evaluating trained model ...

Test set: Accuracy: 5603/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5600/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5603/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5594/6700 (83%)

Sung is Chinese
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5607/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5613/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5595/6700 (83%)

Sung is Korean
Jungwoo is English
Soojin is Polish
Nako is Japanese
evaluating trained model ...

Test set: Accuracy: 5

#### 13_3_char_rnn.py

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

from text_loader import TextDataset

hidden_size = 100
n_layers = 3
batch_size = 1
n_epochs = 100
n_characters = 128  # ASCII

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.linear = nn.Linear(hidden_size, output_size)

    # This runs this one step at a time
    # It's extremely slow, and please do not use in practice.
    # We need to use (1) batch and (2) data parallelism
    def forward(self, input, hidden):
        embed = self.embedding(input.view(1, -1))  # S(=1) x I
        embed = embed.view(1, 1, -1)  # S(=1) x B(=1) x I (embedding size)
        output, hidden = self.gru(embed, hidden)
        output = self.linear(output.view(1, -1))  # S(=1) x I
        return output, hidden

    def init_hidden(self):
        if torch.cuda.is_available():
            hidden = torch.zeros(self.n_layers, 1, self.hidden_size).cuda()
        else:
            hidden = torch.zeros(self.n_layers, 1, self.hidden_size)

        return Variable(hidden)

def str2tensor(string):
    tensor = [ord(c) for c in string]
    tensor = torch.LongTensor(tensor)

    if torch.cuda.is_available():
        tensor = tensor.cuda()

    return Variable(tensor)

def generate(decoder, prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = str2tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)

    inp = prime_input[-1]

    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)

        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = chr(top_i)
        predicted += predicted_char
        inp = str2tensor(predicted_char)

    return predicted

# Train for a given src and target
# It feeds single string to demonstrate seq2seq
# It's extremely slow, and we need to use (1) batch and (2) data parallelism
# http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html.

def train_teacher_forching(line):
    input = str2tensor(line[:-1])
    target = str2tensor(line[1:])

    hidden = decoder.init_hidden()
    loss = 0

    for c in range(len(input)):
        output, hidden = decoder(input[c], hidden)
        loss += criterion(output, target[c])

    decoder.zero_grad()
    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / len(input)

def train(line):
    input = str2tensor(line[:-1])
    target = str2tensor(line[1:])

    hidden = decoder.init_hidden()
    decoder_in = input[0]
    loss = 0

    for c in range(len(input)):
        output, hidden = decoder(decoder_in, hidden)
        loss += criterion(output, target[c].unsqueeze(0))
        decoder_in = output.max(1)[1]
        
    decoder.zero_grad()
    loss.backward()
    decoder_optimizer.step()

    return loss.data.item() / len(input)

if __name__ == '__main__':
    
    decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
    if torch.cuda.is_available():
        decoder.cuda()

    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    train_loader = DataLoader(dataset=TextDataset(),
                              batch_size=batch_size,
                              shuffle=True)

    print("Training for %d epochs..." % n_epochs)
    for epoch in range(1, n_epochs + 1):
        print("Training in %d epochs..."%epoch)
        for i, (lines, _) in enumerate(train_loader):
            loss = train(lines[0])  # Batch size is 1

            if i % 100 == 0:
                print('[(%d %d%%) loss: %.4f]' %
                      (epoch, epoch / n_epochs * 100, loss))
                print(generate(decoder, 'Wh', 100), '\n')

Training for 100 epochs...
Training in 1 epochs...
[(1 1%) loss: 4.9074]
WhKG]X/[!Y_-z/+,<^"#Z
 []Q.k~t`=$-9dqe474(j3rw<'g <T[{5D~'V>i@I#i!k-czb'_(C1`7Ty 

[(1 1%) loss: 3.0868]
Wh,eteyaneiaogclnnenennaeimdnaioeoeheottobsenoaemlecakndrnagmotlhhmshshneoissotfiurodruelonthtyenittcc 

[(1 1%) loss: 3.1396]
Whmsiserlewaifinitemlithheittmhruuheeeanionn,bofltcntanitk.ttdhtraeeoakieb,isssmtsgttaioysyweaaiaatdtt 

[(1 1%) loss: 3.1782]
Whelhnhase?dweeslctlnil.ette'deonscmsnh'oteh:twoetaoheseottgtibeiketruue,oon,c,cewg-echtheirne,ur,wt'r 

[(1 1%) loss: 2.9582]
Whgnscohsdtltgcaeenranr,hheboaeehrnos.:ohctesrtsdsi:oattmnohrtoheeunodmoeeoobheeegvlysieonshon.fhohbrs 

[(1 1%) loss: 3.1892]
Whueeinhwasaayenil'eenemaunsad,arkleytmonrirdsacahalheioerdechtr:aiehgobereooatareseybpreyetrlrnbitntr 

[(1 1%) loss: 2.7918]
Whttaenehtoheoy;thheotsrohoboiaatilelmtantariteetyrloussrnneplthelosesdelt,hett!iol'mnmiwhhraydonoeect 

[(1 1%) loss: 3.4021]
Whrsdtndnrnrmmrgacho:oh:rneieinaitegpt

Whadweneyhuahryeaedieesneese,enuwhhdhteeme,eodtrmitheemorteeelebewdethnoeseeeohuonredeeeseee,ietr.uih: 

[(1 1%) loss: 3.7304]
Whotafyrarhlaeicnhmeeesaehdeeh,l.hsethflnlma.daaaedtrddrao,ne:neeeesadwe;egu.htem.nr.oaie.attumele,smd 

[(1 1%) loss: 3.3438]
Whmoyaattmeaawheeunltweaeleoannettoeetusdm'lwoou.lcds.ot,iollttke.ehndneehlnesu,onloohsait,l,e,oeehhye 

[(1 1%) loss: 2.8887]
Whmeyi,iattn-erraoatidhde,tn,,ensesrei,kttat'dtuide!resererer'oslose,nt?oaeiusos,reeg?nt.anrtt,mreli,s 

[(1 1%) loss: 2.2655]
Whersm:indeynerothotns'yyrs'uooetu:r,dramnidecetddelioorehrt.ner,ee.entedoreeoaehgg;dlrcmeirrri,gteiln 

[(1 1%) loss: 3.2135]
Whekyelevceethyaeasiltraeadrteetw,eoebeee,hedehjehhodaeeuloiaesoenehhoheseyenaortmyteon.enurreonmadery 

[(1 1%) loss: 2.7000]
Whiosiiegsbeh,heucnu,numehenrnouegbodoruhee.ei,.ee..oeithbeelers,eetesece;aoheee.roeaparhcorniettrsihh 

[(1 1%) loss: 2.9417]
Whelhuetranrleehreenlksnraynaesahiinetntueem,ilhe.seeitrryenndgheeiternon,ne.a;ne.:delendoeeiir.nmrmmd 

[(1 1%

Whmoe:sues:cnlndranteoeetlrdetgbmieof,ldeofe'e!henohnewleyyreghlceiet.tgerlleuterehrheo,eeotseeabotnee 

[(1 1%) loss: 2.6446]
Whketye,totlierrrouehabtiaelihohsssteab?awlleblyalleisettserid.ic,cls.tnse'ec,uetre.re:peneissoeunwaaj 

[(1 1%) loss: 3.0142]
Whpes,hnuwmeteurbntoltrtashofehsosof!meemehreaeyrhoisctc,uhedhloeiunteahandaalltede,mebaebetiehua,twtt 

[(1 1%) loss: 3.1406]
Whrari:saoamrehn:oturtthuwlctdchaloeourfsnhpteenr,oteeeeredgsoychoiihan,eaaae,boroeerdloo,enobrgevet,u 

[(1 1%) loss: 3.1685]
Whteonneoteyehu'oroees;ueecelf,hhgeem.esi;hb:seeireou,rehhe.tleitfrrsh;oehndf.lftyoi?etehdeohedeuy:rli 

[(1 1%) loss: 2.7417]
Whoheecsre:oohesdarsioyyereidaueoenttnlediidemerree!enoittrtrubcotenssehhfpeeersuraonaernermeeeeneeane 

[(1 1%) loss: 3.0108]
Whtyrotdebssmo:hsdgnoeedetmle;.ndoe,eoeheylouppgmaestibenearsi,nrreey,ksnr.inet.enie,lwosds'yeen'easrt 

[(1 1%) loss: 3.0443]
Wharireig,rei:oensahinnaeheeroeeeai,thm,hontmiutste;ngeineser.eusnme,slertdte.thehantoasorrtisues.wen, 

[(1 1%

Whortii:leoponesnrernn:aehewsi,krc,ithcasjieiebwasrmstysae,,ina;aeae.tr,schrneernedesraweteiwgnahuecfe 

[(2 2%) loss: 2.9618]
Whl,ougendahitedaedumoosnileuuemeifh-kdseilgenhedt'otlaue,doihhuheaulo,!dgielo,aderrebfss,diednbtnwlrs 

[(2 2%) loss: 3.1524]
Whriioeoeremrweseh.eogem.reyeraetedriiatasrcrdrs,i,.trtti.d,o.fretertassemagaho'roce.hemeeasrae,mesmir 

[(2 2%) loss: 2.7511]
Whaainnoirohitieteeoreocgaeiosrsrineeryfdemiharkeseho,reeehwfsrreaneoe;hrt,eoewrte.mssnniu.easgoiteeth 

[(2 2%) loss: 3.0270]
Whonignaeygna:utsnsoltteaietlrhuynesuaeytksyoceaestnosnplheutefnseoer!dsyutthueereneenealtieftese,aane 

[(2 2%) loss: 2.7261]
Whrye,sr,etmisrlntddwstiiiessmmevdapyssrenegeg,,ydopornpadhecheceree,dhyceth,mterht,ssaee!eee?eliegohe 

[(2 2%) loss: 2.9350]
Whmitahahotsoeatesnareh?.tstetlwlsarhktahletdttobtn,soldo,bahyyaoaytnyh,niaghtoaoratamag,ct.thsatnnrrc 

[(2 2%) loss: 3.4612]
Whnrania:::hnvt,tltetdterahsgeehuseouhyrohaaeece-ihyee,deue,.tnrtiea'aufat,setue!ekaaee,eeg,oaooeeotht 

[(2 2%

Whnsdge:hmllmrwreloenred,leyaarshg,m.uhendw'iestdbaiobieiacio,naye,ei,eewl,esrateyelshd,,smeeeeme,suee 

[(2 2%) loss: 2.8843]
Whleategsnhiantyrihnnehowtshmaer;nereoiesitdtate.ehliieeeeeaesnenhkseaedmvtoeieeieeeeayrtmordoei'leeyn 

[(2 2%) loss: 3.0566]
Whthrcrtiyoeyiroto,t,sreonitoeuhetaaohessohgrooydtmtraolordo,hhadaao.ewvsottkaeocvehh?rt,otmoythenohs, 

[(2 2%) loss: 3.0539]
Whrtmeotgosea,euenoeie,fnraeaednyvtttgdvrseteossarnshsepryernpenortreecrcnmen;ussese,ertowsroseeehkeet 

[(2 2%) loss: 2.9240]
Whnnfiteaj:fsocftn,oeiseglottumsnststyetahnetiwa,ldets;e,gcrs!rine!ehletashehedt,ennimtstieo'esrsom?r, 

[(2 2%) loss: 2.8909]
Whalhayylsseogaaxsueepnlww'elet,sgauaohhef;nleaecaaeeo:e,rseotemsnior,ohyueebliettena,sosl.mat:seacsns 

[(2 2%) loss: 3.0930]
Whrueitssreoshtrdtteoyllewidceisgeseee?efeerf!esecdmheeroe.del,feedteaen.eeme.ahedptsaeeroeedheeaeuea; 

[(2 2%) loss: 2.9292]
Whllaleeioweumhotiitsltnaldeyrettlntfyosetfbonyde,yr.nendo.eorsoareeesdwoerpiyneylyehau-,emth!eoyfr!.. 

[(2 2%

Whositienhteasepaeeaskuhefnenkaoneischaeeolsosooeyinfrdxairiedhregoeheerieerbbdeleedsaeaih..saenetfbet 

[(2 2%) loss: 3.0988]
Whlmeaiiwfckeetdoomtsaeolrettastantasihaasd:stedatrenrhebeltdetas,taw:setunabetetsuaeeesrahianpfeisstn 

[(2 2%) loss: 2.6444]
Wheee,mehni:ahrlemeooieenndetslh,waleeenesirregntolktheienowbroanntnoplmrnt?eoeehee,mha.gimlmgmll;nhme 

[(2 2%) loss: 3.1245]
When,sess.lhltv,aefnrdeneroeheeeteedmees:eteemheierr,otedmlhdt.,eeaueneieseetnslaamoueftneeaeptelcesse 

[(2 2%) loss: 2.7742]
Whegdis:miekeidoveair!aaotdls,eoooprso!ey,,ybrpryslwossbefnwteye!sye.efsotderurhasaoeaitiectsmet,reihr 

[(2 2%) loss: 0.8636]
Whmnoros:eew-oehststat,ertdihunrwr'eraesrhonuosean,,,edhde.deuotdfcremwd.taae?bmnhemdtu,m,wariotet,sau 

[(2 2%) loss: 3.0811]
Whelluooene'hnrtigmnounmrldeeedeuho'eoeaoeeibouhloe.oh.amtneo.m.wa,phyteehm,sgyikhesnitlo.soeghp,eedsi 

[(2 2%) loss: 2.8720]
Whao-ntnldrtosolotia;ecaodfths!ndrroahshoniveleelsnrd!owoseaktgialtthoipwrnhaddwsinoh,sesrsamrshoynstv 

[(2 2%

Whsol:domiysmeeenvhorteesrrlazeanysesfehnersalrooaeeehetlinltdgm?etnheyu,eeahretrneenoelhyrsnraenbot,u 

[(3 3%) loss: 3.0759]
Whldueegd,storwreriydg,topdhneetwcsno,,nysllldhuammslwneoemtd,ostets,amoohrb,gnu,r,d,aeeophmsuut.esmef 

[(3 3%) loss: 3.0418]
Whtesna:uoeonsemsiirostfnrmuellnael,iestysvresso!eaeteut,sr'aereeseheoee,oui.tldlae.tsloylakoc,rpdeoe; 

[(3 3%) loss: 3.1665]
Whmte:heopime,ieisemd,rl,,shulinoeroeletuedtetrorlhhehrt'bhseeytslnsehtrwgeiehohehdseeorrdrw,gederneen 

[(3 3%) loss: 3.1360]
Whsciisesbsefanmtsha,ecetsrodeebhptfnirrdendinedcehmtl,solrnreeere,euisesbelenshidosivemnrsoeghrn-nsnw 

[(3 3%) loss: 2.9560]
Whreoaerteiesibonhofnsamiti?stotamprel.nseiitriehetadryo.dso.denurgeathieenaf'es,kentcti,gtn'airlguear 

[(3 3%) loss: 2.9375]
Whruldi:lhtreuloaeoktfaabwqeglselehoeeter'ha,tkeohdtyectenesleeooaee'btptotalteei!solfeuenvtnrsshehrhv 

[(3 3%) loss: 1.2691]
Whcwceroa:iiserurnmdaalgscertcslnt:tefseiet?eeoiiesaepetrreo'bkiea,ae,heerda,drere!leees'trsseett,aore 

[(3 3%

Whlyita,:terehhsh,oerhnedetlsrnpcerii,ersttahnaeeeeeeeoeeloninihalsmeernrrfnnia,elhyoruayoaoieer!ee;ea 

[(3 3%) loss: 2.7346]
Whotcluoluahbklttelsaeeoemreaothe'eeeyhaersowewtstyehiotoerunfteseedressslaileeaasanw,aeceh.esh,eiehv- 

[(3 3%) loss: 3.2290]
Whaepiitatdhtnwhuethwucicsfhsewohdtesearywrmdrtstf,hhestswecreeeehserler.gewedeostdgerreveoeeeuiseennh 

[(3 3%) loss: 2.6422]
Whlennstoarllyaoeutsgniawkltscecees,,e'eeesesl,ee.enata,eaaemu,emermrssostguetwmmu,w?eigswmo-,lreereue 

[(3 3%) loss: 2.2267]
Whrrninnieen.;vateisimemterntdchis,tehsotibmsetldthhanshgenmhyseunmr,bi.mrehlsedod,ebder,aeslhiiomsmse 

[(3 3%) loss: 2.9381]
Whsaramgems,tshmeuyhslnmmiareeocatnusinoaykhetwtcetwmmnhhotayhrhda,aotmbernyeukslaaeu.dr,rstmbnstadstg 

[(3 3%) loss: 3.0742]
Whwer,ohr:rureietieoaderesao.dadeearsnmytyas?..eeow.ea,yurnrsb;ehi?w-wderroswoi,no.sierioech,le,mepdli 

[(3 3%) loss: 2.5652]
Whtit:treipelftipeseeelhihtbselnttdnnoertrdtnseeubywest,oha,suemete.e,roeeetireugvhs?sdotedrdirlcrsyod 

[(3 3%

Whtovlss:ot?aludntshh,tllnyrteenrhcncm.?teswo!es.aqe;iosd.erual:nlssoth,epouicv?tsn,lnlhnlincnl,u-tn,h 

[(4 4%) loss: 2.9516]
Whviaasru:bteamltsekrio,eaeha,adi;agmecrenoi,mfoeodeeeiteeemyldtersgieepunoeeedogekhuemye!derre'ewyrn. 

[(4 4%) loss: 2.8990]
Whcsolime:ssiiu'at;e;iiaecsolsefottntnseentstsaft.ecnhsuiaiateietfifcfdbie.otoisdrialdrneesmdaaroemwm: 

[(4 4%) loss: 2.9902]
Whteohuhinitsaeernedenhhmo,d'irgo,s,mtanthefaonsfcehaecnh;nte,eeo?efrereehth,o.fhedsfnedfgfs,egesdrgde 

[(4 4%) loss: 3.0227]
Whti,ahnngm:t,neotngnmhuttats.s.ganispaobnkloeododstgoaoedoti.tuoroutegototweisior,wiooptep-giiwnofhgs 

[(4 4%) loss: 3.0625]
Whvpdiehyyanasdefaamntnyrtotyr.ueelnsffoaoehttattkoacrytsatuahfhrnsseewoinnidenmesaonhncsternserud.aog 

[(4 4%) loss: 2.8655]
Whcearcinieo:a:?::trhgtrsnlw,yuhdhehlcduterrdsncethedaeteesasx'eeaeesiefed.eaerushieteceetamhsa.errdee 

[(4 4%) loss: 3.0839]
Whrdondraseonmot'm.fe,eoesoaniorpeueelsn,eermrnboldg.eclrktt-roglp,av,y:ydte?erhey.niso'oohrsc?yarnlin 

[(4 4%

Whctottmintainrceatutwi-hnsattv,aonibahtcreeoyetiwoeeartirerarsrnsmaoegrdewl-tntertavgneheepsdameolist 

[(4 4%) loss: 2.7407]
Whadeiuinsi':llkt;enamtgheiuieercyieenthsnvotltateheeaesnmceltitusewlnkernbwaevsssai,ruthnlnhaeo,iei-s 

[(4 4%) loss: 2.9276]
Whteyruolodvetoebtlevaceerlsitt.sdomeetfwsrehts.y:deluerhotoeynneea'euessbeeheeoeasalaprehueyegryieeob 

[(4 4%) loss: 2.7994]
Whedncrk'asyoednd'eaogrrndawwepeirvtur,ee,snrnyehndaowireeedseiereietesaeeshreteedtgltaeaect,yehstloap 

[(4 4%) loss: 3.1601]
Whaurfiiiossytiaaeeheeeuyrurd,eoetuodahwrtoieoieiynigsrd.earr'mrrsmreodpmdewepatsre,c.erednalewehcwpae 

[(4 4%) loss: 2.5583]
Whnoshwdinlwplminslsinon'tetew?rrehoretiehhuenomrcneeeewreenersoeaeeesnrnrkar,eeevsieswer.eeisagsereti 

[(4 4%) loss: 3.0411]
Whsomasbrraneoetegaeslseferonnndtefmdnghdeoncwaietreeeeg.yeefde.h.saaolaeseereavdeehil,eetr,ietesuiese 

[(4 4%) loss: 2.8995]
Whcyontlm:dfmbieurehal;nveefoah;eg,,.aeeerrdesrn,ireefimipseewedhen,hinitaoeenfermsiretaeevrsnee.ltee, 

[(4 4%

Whnouoms,nc:rrs:ooeehiieinnreint,sbleeeier:isreeeedlipaoeerte.irnrhh,aeteenow'eo,ho,oselo:w.?syuoo,seu 

[(4 4%) loss: 2.2471]
Whtrtfh:oefuhdaeeinseteaheetntraedreibew,sahcdussteehterlewrssetw;heeeentrtoonsl;naphnnthdlabdrwa.res? 

[(4 4%) loss: 3.8257]
Whrtscwihreehossnteoisneeeyeneeeleort.m,dtnawen.riisht..ieeiet.nbwea.tsrdeelyn.tesniehnaenerecrddlsson 

[(4 4%) loss: 2.8323]
Whteamdonlts:vnrlyeteersioaeneeneod;te.lswienneinhri,-,ateee;sedoapteehnedb;eog,eneu:.enhelmnortenhrmr 

[(4 4%) loss: 3.0607]
Whaihetencdrnleeohthrecieete?neietttulreedegopedntusteon.eu.st,eeth-teeweaplegateneetusadlfnheefeuerr, 

[(4 4%) loss: 3.2188]
Whahesrsrot:,flet;aeytecd,mtihooel,eaoeecmeas,w.eldhaon..lasane.aieaasd.!,etnadslbca,gudd:hdlteweewcof 

[(4 4%) loss: 3.0477]
Whozarehttlidtnim-taenrtefee.excehrleweeehed?etr,slaei,,l.ternegehsdrded,elathnhetsued-deeei:eekei.o,r 

[(4 4%) loss: 3.3631]
Whlueuonsnntpeenaaliederhgehheeyefa..reesirecnek.r,ny.ikeetentgtseoearksrrenv;edpeiteta.eeaoge:,t,ieiw 

[(4 4%

Whteeooaohrhetodnrfureraahdnroeerelsetanmsaytloleoasmepdweessrptoit:es.rtfsetnmsuatelswnesssfehoeetoea 

[(5 5%) loss: 3.1107]
Whmeaniy:ahtttenuergdssoieerptlestleeeeegsreeeo,rut.:isasfhibsintdieohuecurua.enesu,tsresalsebee,weolc 

[(5 5%) loss: 2.9748]
Whrrtausthro:ohlhetl,trihfrultmeoetetretleoeotolaafcrerei:eotbsoe-eeehnheoerheoc:kdemunoy,oeaiare.egeo 

[(5 5%) loss: 2.6915]
Whosyssdtnhraeieoerernpraesoeeiteoneoanshse.opehiemontkotlgehl,ihlfeniaiitnrsinissdieeteesaakoeuoerrte 

[(5 5%) loss: 3.1475]
Whlsicsrdaraennessyrdaoeeealemdetneewatrehlgetie.tshenhitymt,mhseh.aeeoeeehnpgrftbesttdhllrrafderrlhia 

[(5 5%) loss: 3.0873]
Whantsulsmohyhueaoehidealutein.deas,inttarceeehanrsen.nr,shrerttewuuarrtrsoeie,tethhoneinueananhtnaort 

[(5 5%) loss: 2.1350]
Whaeuuehidlterohena,teebuh.aueunetwshteaaaesiuradihaeeeee;sdimnaaehhcaederiteawpem;gshhtsnsrsefsraeete 

[(5 5%) loss: 1.5902]
Whmiensenssuntddlt:ey'eoealaryeasewwr;tlcnrr'enntolaie:nsaearwd,ehneryreiise,hatwsnedohsineaiekisemirc 

[(5 5%

Wheanaruhereundyrefietun;!eiecteioysrbtnde'remusyo.peaeeujsesrrteeiersd,dufeoioaeoa,hdopdiaeotsousrsre 

[(5 5%) loss: 2.8631]
Whnnihwinradnhaeaehreratem,eohtokdud,olnoadsyesynntre.roueiw,e,tnt?pvdoruionyyeeeoeg,nnonedh,oeevhanlr 

[(5 5%) loss: 2.8951]
Wha:dgsrerhnottleoaerreviaadhee;eadthreadeiherhuetieee,reeonleelrnrr.osormfeveihbleeodneshneimteeeedry 

[(5 5%) loss: 3.1029]
Whutreeird:'rcfrpedatefodvemstacneneeeinm,.adahiersrfeeoeiprsadtenlhreree.ekereoa,na.ceeat.lerdeeregea 

[(5 5%) loss: 3.1340]
Wha,hioa!:elteeyepeelemdeemeaeeaoainbhheustreheeetghensedtheegehae,aeeeereeausmnhoshvtepeeeeeelesretid 

[(5 5%) loss: 2.7183]
Whramrapu,aun:uonhaieeakarmehtgeedtodeabdnn,eaoteredeoedneazsfyee.stre,d.ratbneamr'ntueoeuei?nihsenres 

[(5 5%) loss: 3.3310]
Whoeueiteleyollsweeeetlneeeorrmlsmeiuhlreaesyeceaeemaoehaeuyivanoeueemeaeechduwaheesreedlloen',lhnnde, 

[(5 5%) loss: 0.7395]
Whaueadtadhltowanmhsrywsrthass,etaynnontemedeehhrnteehveysedrdlohhosbddleceeiieae,rasecel.nrdyd,asrsei 

[(5 5%

Whmeuolorermstdenaswyn!teorseoennseirngen.hsorebe,rsedehskenarlswrsnbeaneeodee.otavsis,nirsdriinkor'n; 

[(6 6%) loss: 2.5850]
Whulroiledhhsoetutoediinrn,ere,r.dstoueorooeeunhoueiehhneefryoeeonsriogeeg!e'elicgreernsioghenkgsorsln 

[(6 6%) loss: 2.7964]
Whmssneowi:baieaedoeaeefnibehe:ndeeldeloeeadc;lgeidttee,neer;hec:ns,oeedeeteasip;seeessitum?gemaendens 

[(6 6%) loss: 3.0611]
Wheiotio,s'inr:yhieorecnernbebtrmseiensnl.dhgensoieniuinhn;ldeoaroaoeo,nveergh,e,rlr.snehmdneambtesroe 

[(6 6%) loss: 3.1026]
Whmotiarminsassmaismlseyesyae.oephielsgktepkdsnln.emmnalaensof,le.at;slenoteettisineo,tete:eled,rmreek 

[(6 6%) loss: 2.9880]
Whsfiidsrneaamhiauseadsetneostsntcseadlmliohdhhenobsatnaeyksgtyersihnbuaedaniaseeeodtneniree.woadhefer 

[(6 6%) loss: 3.1033]
Whmesrienuh:o:arosoyrlemrsoaenregnr-ehetneahmdhruraeebe,esneyeetnmeresrosheoornirrosesdgde.ireeieomenr 

[(6 6%) loss: 1.4237]
Whritba:nuttisuetterdyllryeeatcaeneeiterhrsiewooseeeetesyinwnepehoo,ieheoehhynynnneefeshooseae,etwd.md 

[(6 6%

Whniueiibosocetereaseniersdituo,tdeuiblyaaeeuteeebdw,imtotassaholmmtlent,yeenigicwr,oen,amvmimonasorse 

[(6 6%) loss: 1.3062]
Whieersrnis:crgitpa'enld,t?ieneeneue:eiteiletr-lassgeetaecideiarevefretbseerese:eegnneodgae?ooheee,nie 

[(6 6%) loss: 2.9606]
Whotawlto:nuonasiwehicnoeayrhsswsoseiyeressuie.rosoeoytatsreoieoge:ee,ahyalece,ehegurae,a,mr,eette,we, 

[(6 6%) loss: 2.6426]
Whiyadsu::fhahaeebesotlmpripdereemnuiionmaa,aeeebrraheeeemtyhnhuolstehrsen,aeedioewebancereooesla,shiw 

[(6 6%) loss: 2.8853]
Whiiigl:hatdrsvteeehegemikteewereereden:eprncn:eaeeueee,sseeaebseiee,mneteiwnseearrefesnseelddettnaddg 

[(6 6%) loss: 2.8099]
Whtonp::ereneneearms.lyeonmtceggeraheeeneuhdmaroeraew.eetrncreonmteeaest.heheenlehrrpehedurweeeemeeetv 

[(6 6%) loss: 3.0127]
Whcooigrnhrh:iohuolfpeersahmehohhhtoeee!ee.bneghprsnlmomocryooorasyoulte,erseunendtoauo,wnw.oii.nsfumg 

[(6 6%) loss: 3.0190]
Whaemure:s.stoeie:udeiiireeeeecesceeatteayhtcsieeneeerimge'nesdlqeiersmaserocseoreweooesnooshwedoeeoo' 

[(6 6%

### 2) 아래 모델의 block diagram을 도시하시오.

사진 첨부

### 3) 입력 문자열의 길이가 다른 경우에 아래의 코드는 이를 어떻게 해결했는지 설명하시오.

가장 긴 입력의 길이를 input_dimension으로 사용해서 zero-padding을 적용함

### 4) 입력 문자열이 어떻게 RNN의 입력 tensor로 변환되는지 과정을 자세히 설명하시오.

알파벳 인덱스로 변환 후, embedding layer를 통해 rnn 입력으로 변환하였다.

### 5)RNN,GRU,LSTM의 기능을 조사하고, block diagram을 도시하여 동작 원리를 설명하시오.

1. RNN
 - RNN은 히든 노드가 방향을 가진 엣지로 연결돼 순환구조를 이루는 인경신경망의 한종류이다.
 
<img src=https://miro.medium.com/max/627/1*go8PHsPNbbV6qRiwpUQ5BQ.png>
출처 : https://towardsdatascience.com/understanding-rnn-and-lstm-f7cdf6dfc14e

2. GRU
 - LSTM의 간소화된 버전으로 LSTM에서는 출력, 입력, 삭제 게이트라는 3개의 게이트가 있던 것을 업데이트 게이트와 리셋 게이트를 사용한다.

<img src=http://dprogrammer.org/wp-content/uploads/2019/04/GRU-768x502.png>
출처 : http://dprogrammer.org/rnn-lstm-gru

3. LSTM
 - RNN은 관련 정보와 그 정보를 사용하는 지점 사이 거리가 멀 경우 역전파시 그래디언트가 점차 줄어 학습 능력이 저하되었다. 따라서 이를 극복하기 위해 고안된 것이 LSTM이며 RNN의 히든 state에 cell-state를 추가한 구조이다.
 <img src=https://i.imgur.com/jKodJ1u.png>
 출처 : https://ratsgo.github.io/natural%20language%20processing/2017/03/09/rnnlstm/