In [3]:
from chatTool import *
import pickle
import random
import numpy as np
LangBag = "dict.pkl"
DataName = "./dgk_lost_conv/results/lost.conv.tconv"

In [4]:
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch
from torch import optim

use_cuda = torch.cuda.is_available()

In [5]:
with open(LangBag, 'rb') as f:
    lang = pickle.load(f)

In [9]:
lang = Lang("Number")
for w in "".join(map(str, np.arange(0,10,1))):
    lang.addWord(w)

In [100]:
class NumberReader(data.Dataset):
    def __init__(self, maxNumber, maxLength):
        self.maxNumber = maxNumber
        self.maxLength = maxLength
    def __getitem__(self, index):
        a = np.random.randint(0,self.maxNumber, np.random.randint(2,self.maxLength))#np.random.randint(2,self.maxLength)
        return "".join(map(str, a)), "".join(map(str, a[::-1]))
    def __len__(self):
        return 1000

In [6]:
trainset = Reader(DataName)
# trainset = NumberReader(9,5)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, 
                                              shuffle=True, num_workers=1)

In [7]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, em_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.em_size = em_size
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, self.em_size)
        self.gru = nn.GRU(self.em_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.hidden_size)

    def forward(self, input, hidden):
        output = self.embedding(input.long()).view(1, -1, self.em_size)
        output, hidden = self.gru(output, hidden)
        output = self.out(output[:,-1,:])
        return output, hidden
    
    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result

In [8]:
class DecoderRNN(nn.Module):
    def __init__(self, input_size, em_size, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.em_size = em_size

        self.embedding = nn.Embedding(input_size, em_size)
        self.gru = nn.GRU(em_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, input, hidden):
        output = self.embedding(input.long()).view(1, -1, self.em_size)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output[0], hidden

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result

In [9]:
INPUTSIZE = lang.n_words
encoder = EncoderRNN(INPUTSIZE, 256, 512)
decoder = DecoderRNN(INPUTSIZE, 256, 512, INPUTSIZE)
if use_cuda:
    encoder = encoder.cuda()
    decoder = decoder.cuda()

In [10]:
encoder_optimizer = optim.Adam(encoder.parameters(), lr=1e-4)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=1e-4)
criterion = nn.NLLLoss()
tf = 0.5
for epoch in range(2):
    for i,data in enumerate(trainloader, 0):
        input, target = data
        input = torch.Tensor(lang.sentenceToVector(input[0])).long()
        target = torch.Tensor(lang.sentenceToVector(target[0], eof = True)).long()
                              
        if use_cuda:
            input = input.cuda()
            target = target.cuda()
        inputLen = input.size()[0]
        targetLen = target.size()[0]

        input = Variable(input)
        target = Variable(target)

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        encoder_hidden = encoder.initHidden()
        for ei in input:
            o, encoder_hidden = encoder(ei, encoder_hidden)
        decoder_input = Variable(torch.LongTensor([0]))
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        decoder_hidden = o.view(1,1,-1)

        loss = 0
        use_teacher_forcing = True if random.random() < tf else False
        for di in range(targetLen):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            ni = topi[0]
            decoder_input = target[di]
            if use_teacher_forcing:
                decoder_input = Variable(torch.LongTensor([ni]))
                if use_cuda:
                    decoder_input = decoder_input.cuda()
            else:
                decoder_input = target[di]

            loss += criterion(decoder_output, target[di])

        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        if i % 100 == 0:
            print("Step {:3}, loss: {}".format(i, loss.data[0] / targetLen))
#             tf *= 0.9

Step   0, loss: 8.429454803466797
Step 100, loss: 7.068339628331802
Step 200, loss: 5.3433082580566404
Step 300, loss: 5.55130360921224
Step 400, loss: 5.4621537526448565
Step 500, loss: 4.904784732394749


Process Process-1:
Traceback (most recent call last):
  File "/home/ball/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/ball/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ball/anaconda3/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/home/ball/anaconda3/lib/python3.5/multiprocessing/queues.py", line 343, in get
    res = self._reader.recv_bytes()
  File "/home/ball/anaconda3/lib/python3.5/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
KeyboardInterrupt
  File "/home/ball/anaconda3/lib/python3.5/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/ball/anaconda3/lib/python3.5/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt: 

In [181]:
q,a = trainset[2]

input = torch.Tensor(lang.sentenceToVector(a)).long()
input = Variable(input).cuda()
encoder_hidden = encoder.initHidden()
for ei in input:
    o, encoder_hidden = encoder(ei, encoder_hidden)
decoder_hidden = o.view(1,1,-1)
decoder_input = Variable(torch.LongTensor([0]))
decoder_input = decoder_input.cuda() if use_cuda else decoder_input

ans = []
for i in range(20):
    decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
    topv, topi = decoder_output.data.topk(1)
    ni = topi[0]
    ans.append(ni)
    decoder_input = Variable(torch.LongTensor([ni])).cuda()
    print(ni, lang.index2word[ni])
    if ni == 1:
        break
print(q, a)
print(lang.vectorToSentence(ans))

9 7
6 4
1 EOS
148 841
74


In [77]:
o,h = encoder(input[0], encoder_hidden)
h.size()
encoder.initHidden().size()

torch.Size([1, 1, 50])

In [230]:
em = encoder.embedding(input).view(1, -1, encoder.em_size)
o1, h1 = encoder.gru(em)

In [232]:
encoder.out(o1[:,-2,:])

Variable containing:

Columns 0 to 9 
-0.1385  0.1484  0.1496 -0.0914 -0.1649 -0.1600 -0.0198 -0.1365 -0.1289 -0.0860

Columns 10 to 19 
 0.0813 -0.0057  0.2005 -0.0842  0.1288 -0.1611 -0.1278 -0.1688 -0.0239 -0.0400

Columns 20 to 24 
-0.0790  0.0103 -0.0156 -0.0649 -0.1217
[torch.cuda.FloatTensor of size 1x25 (GPU 0)]

In [209]:
em = encoder.embedding(input1).view(1, -1, encoder.em_size)
o2, h2 = encoder.gru(em)

In [37]:
q,a = trainset[0]
ip = Variable(torch.Tensor(lang.sentenceToVector(q)).long()).cuda()
op = Variable(torch.Tensor(lang.sentenceToVector(a)).long()).cuda()

eo, eh = encoder(ip)

oh = eh[:, -1, :].view(1,1,-1)
for w in op:
    ans, oh = decoder(op[0], oh)

In [19]:
ip.size()

torch.Size([15])

In [197]:
q,a = trainset[2]

input = torch.Tensor(lang.sentenceToVector(a)).long()
input = Variable(input).cuda()
input

Variable containing:
 0
 1
[torch.cuda.LongTensor of size 2 (GPU 0)]

In [132]:
encoder(input[0], encoder.initHidden())[0].size()

torch.Size([1, 10])