In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden =  F.relu(self.i2h(combined))
        output =  self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [2]:
def make_id_dic(fname):
    id_dic = {}
    
    with open(fname) as file:
        for i, line in enumerate(file, 1):
            line = line.rstrip('\n')
            id_dic[line] = i
            
    return id_dic

In [3]:
def words2id(sentence, id_dic):
    import re
    import snowballstemmer
    
    words_id = []
    
    # 文字種の統一
    sentence = sentence.lower()
    
    # 数字の置き換え
    sentence = re.sub(r'[0-9]+', '0', sentence)
    
    # '-'を' 'に変換
    sentence = sentence.replace('-', ' ')
    
    words = sentence.split()
    
    # ステミング処理
    stemmer = snowballstemmer.stemmer('english')
    words2 = [stemmer.stemWord(word) for word in words]
    words = words2
    
    for word in words:
        if word in id_dic.keys():
            words_id.append(id_dic[word])
        else:
            words_id.append(0)
            
    return words_id

In [4]:
def id2vec(words_id, embeds):
    words_vec = embeds(torch.Tensor(words_id).long())
    
    return words_vec

In [7]:
rnn = RNN(300, 50, 4) # RNN(d_w, d_h, L)

id_dic = make_id_dic('../chapter_6/train.feature.txt')
embeds = nn.Embedding(len(id_dic)+1, 300)

sentence = input('enter sentence: ')
words_id = words2id(sentence, id_dic)
words_vec = id2vec(words_id, embeds)

hidden = rnn.initHidden()
for i in range(len(words_vec)):
    output, hidden = rnn(words_vec[i:i+1], hidden)
    
output

enter sentence: UPDATE 1-AT&T CEO says DirecTV to negotiate NFL deal independently


tensor([[0.1610, 0.3510, 0.2259, 0.2620]], grad_fn=<SoftmaxBackward>)