# 파이토치 RNN 분류 모델
- 코드 출처: https://tutorials.pytorch.kr/intermediate/char_rnn_classification_tutorial.html

In [29]:
import requests
import zipfile
import io
import os

URL = 'https://download.pytorch.org/tutorial/data.zip'
res = requests.get(URL)
res.raise_for_status()

z = zipfile.ZipFile(io.BytesIO(res.content))
z.extractall(os.getcwd())

이름: Ślusàrski

In [30]:
from io import open
import glob

path = os.path.join(os.getcwd(), 'data', 'names')

def find_files(path):
    """특정 패턴에 맞는 파일들을 찾는 함수"""
    return glob.glob(path)

print(find_files(os.path.join(path, '*.txt')))

import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn' and c in all_letters # 악센트 제거
    )

print(unicode_to_ascii('Ślusàrski'))

category_lines = {}
all_categories = []

def read_lines(file_name):
    lines = open(file_name, encoding='utf-8').read().strip().split('\n')

    return [unicode_to_ascii(line) for line in lines]

for file_name in find_files(os.path.join(path, '*.txt')):
    category = os.path.splitext(os.path.basename(file_name))[0]
    all_categories.append(category)
    lines = read_lines(file_name)
    category_lines[category] = lines
    
n_categories = len(all_categories)

['c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\Arabic.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\Chinese.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\Czech.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\Dutch.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\English.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\French.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\German.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\Greek.txt', 'c:\\Users\\kksoo\\Desktop\\work\\study\\code\\deep_learning\\10.pytorch_gen_nn\\data\\names\\Irish.txt', 'c:\\Users\\kksoo\\Desktop\\work\\stud

In [31]:
category_lines["Korean"][:5]

['Ahn', 'Baik', 'Bang', 'Byon', 'Cha']

In [32]:
import torch

def letter_to_idx(letter):
    return all_letters.find(letter)

def letter_to_tensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letter_to_idx(letter)] = 1
    
    return tensor

def line_to_tensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)

    for i, letter in enumerate(line):
        tensor[i][0][letter_to_idx(letter)] = 1
    
    return tensor

print(letter_to_tensor('J'))
print(line_to_tensor('Jones').size())

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])
torch.Size([5, 1, 57])


## 신경망 생성

- 모델 생성 방법 참고 : https://pytorch.org/tutorials/beginner/former_torchies/nnft_tutorial.html

In [34]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size
        
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)

        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)

In [40]:
input = letter_to_tensor('A')
hidden = torch.zeros(1, n_hidden)

output, next_hidden = rnn(input, hidden)
print(output)

tensor([[-2.8524, -2.9110, -2.8458, -2.8373, -2.9441, -2.8148, -2.9210, -2.9318,
         -2.9164, -3.0367, -2.8514, -2.9803, -2.8800, -2.9007, -2.9146, -2.7858,
         -2.8292, -2.9059]], grad_fn=<LogSoftmaxBackward0>)


In [39]:
input = line_to_tensor('Albert')
hidden = torch.zeros(1, n_hidden)

output, next_hidden = rnn(input[0], hidden)
print(output)

tensor([[-2.8524, -2.9110, -2.8458, -2.8373, -2.9441, -2.8148, -2.9210, -2.9318,
         -2.9164, -3.0367, -2.8514, -2.9803, -2.8800, -2.9007, -2.9146, -2.7858,
         -2.8292, -2.9059]], grad_fn=<LogSoftmaxBackward0>)


## 모델 학습
- 준비

In [42]:
def category_from_output(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    
    return all_categories[category_i], category_i

print(category_from_output(output))

('Scottish', 15)


In [43]:
import random

def random_choice(l):
    return l[random.randint(0, len(l) - 1)]

def random_training_example():
    category = random_choice(all_categories)
    line = random_choice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)

    return category, line, category_tensor, line_tensor

for i in range(10):
    cateogory, line, category_tensor, line_tensor = random_training_example()
    print(f"Category: {category}, Line: {line}")

Category: Vietnamese, Line: An
Category: Vietnamese, Line: Schneijder
Category: Vietnamese, Line: Koury
Category: Vietnamese, Line: Sarkis
Category: Vietnamese, Line: Tanaka
Category: Vietnamese, Line: Deguchi
Category: Vietnamese, Line: Thai
Category: Vietnamese, Line: Freund
Category: Vietnamese, Line: De santigo
Category: Vietnamese, Line: Buchta


## 손실함수

## 모델 학습

## 모델 테스트

## 사용자 입력으로부터의 실행