In [1]:
import os

import itertools
import pickle
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math 

import sys
sys.path.append('../')
import utils
import wiki_utils
%matplotlib inline

In [3]:
%%time
corpus = wiki_utils.Texts('./wikitext/', encoding="utf-8")

Wall time: 1min 31s


In [4]:
batch_size = 128
sequence_length = 30
grad_clip = 0.1
lr = 4.
best_val_loss = None
log_interval = 100

In [5]:
eval_batch_size = 128
train_loader = wiki_utils.TextLoader(corpus.train, batch_size=batch_size)
val_loader = wiki_utils.TextLoader(corpus.valid, batch_size=eval_batch_size)
test_loader = wiki_utils.TextLoader(corpus.test, batch_size=eval_batch_size)

In [114]:
corpus.train.size(0)

10780437

In [18]:
next(train_loader.__iter__())

(tensor([[ 0, 15, 40,  ..., 14,  0,  4],
         [ 1,  8,  0,  ...,  8, 40,  7],
         [ 0,  7, 46,  ...,  9, 12, 15],
         ...,
         [17,  0,  0,  ..., 15,  0, 40],
         [ 0, 66,  4,  ...,  0,  5, 11],
         [ 2,  9, 49,  ..., 47,  9, 15]]),
 tensor([ 1,  8,  0,  ..., 48, 42,  0]))

In [10]:
data, targets = list(train_loader)[0]

In [103]:
data.shape, targets.shape

(torch.Size([30, 128]), torch.Size([3840]))

In [107]:
data.data.size(0)

30

In [33]:
data[:, 0]

tensor([ 0,  1,  0,  2,  0,  3,  4,  5,  6,  7,  8,  9,  4,  0, 10, 11,  8, 12,
        13,  9, 14,  5, 15, 16,  0, 17, 17, 17,  0,  2])

In [34]:
targets.view_as(data)[:, 0]

tensor([ 1,  0,  2,  0,  3,  4,  5,  6,  7,  8,  9,  4,  0, 10, 11,  8, 12, 13,
         9, 14,  5, 15, 16,  0, 17, 17, 17,  0,  2,  0])

In [None]:
ntokens = len(corpus.dictionary)
model = RNNModel('LSTM', ntokens, 128, 128, 2, 0.3)

In [82]:
ntoken = len(corpus.dictionary)  # 283
ninp = 2
encoder = nn.Embedding(ntoken, ninp)

In [83]:
data.shape

torch.Size([30, 128])

In [84]:
encoder(data).shape

torch.Size([30, 128, 2])

In [85]:
data[0]

tensor([ 0, 15, 40, 67,  0, 13, 28,  4,  9,  0,  0,  4,  6,  4, 44, 15,  8, 41,
        15, 44, 50,  0, 13,  0, 16, 16, 74,  7,  8,  0, 58, 15,  9, 46, 15,  0,
         0, 24,  9,  0, 16, 88, 11, 51, 11,  0, 24, 15, 14,  8, 12, 40, 26,  0,
         0, 40, 23, 24, 24,  0, 40,  9, 14, 13, 11, 39, 15, 16, 46,  2, 13,  4,
        15,  0,  4,  0,  0, 13,  5, 55,  4,  0, 14, 24, 17,  9, 50, 40, 40, 16,
        11, 46, 15, 15, 15, 13,  9,  9,  0, 40, 15,  9,  6, 13,  7, 16, 69, 40,
        13,  6, 42, 40,  0, 44,  0, 15, 40,  5, 14, 46,  4,  0,  0, 17, 24, 14,
         0,  4])

In [86]:
encoder(data[0]).mean(1)

tensor([-0.7409, -0.6691,  0.1517,  0.1278, -0.7409,  0.4727, -0.1492, -1.1188,
         0.5614, -0.7409, -0.7409, -1.1188,  1.6671, -1.1188, -0.4822, -0.6691,
        -0.1623,  0.2756, -0.6691, -0.4822,  0.3690, -0.7409,  0.4727, -0.7409,
         0.0937,  0.0937, -0.1116, -0.2775, -0.1623, -0.7409, -0.1507, -0.6691,
         0.5614, -0.0741, -0.6691, -0.7409, -0.7409, -1.0287,  0.5614, -0.7409,
         0.0937,  0.9223,  0.0087, -0.0120,  0.0087, -0.7409, -1.0287, -0.6691,
        -0.0352, -0.1623,  0.5011,  0.1517, -0.5876, -0.7409, -0.7409,  0.1517,
        -0.4444, -1.0287, -1.0287, -0.7409,  0.1517,  0.5614, -0.0352,  0.4727,
         0.0087, -0.7643, -0.6691,  0.0937, -0.0741,  0.1867,  0.4727, -1.1188,
        -0.6691, -0.7409, -1.1188, -0.7409, -0.7409,  0.4727, -0.4104,  1.8603,
        -1.1188, -0.7409, -0.0352, -1.0287,  0.2516,  0.5614,  0.3690,  0.1517,
         0.1517,  0.0937,  0.0087, -0.0741, -0.6691, -0.6691, -0.6691,  0.4727,
         0.5614,  0.5614, -0.7409,  0.15

In [21]:
class RNNModel(nn.Module):

    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(ninp, nhid, nlayers, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, x, hidden=None):
        emb = self.drop(self.encoder(x))  # 30x128x128(ninp - размерность единицы на входе)
        output, hidden = self.rnn(emb, hidden)  # 30x128x128(nhid - размерность единицы на выходе (внутр. слоя))
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            return (weight.new(self.nlayers, bsz, self.nhid).zero_(),
                    weight.new(self.nlayers, bsz, self.nhid).zero_())
        else:
            return weight.new(self.nlayers, bsz, self.nhid).zero_()

In [23]:
def evaluate(data_loader):
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    for i, (data, targets) in enumerate(data_loader):
        output, hidden = model(data)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).item()
    return total_loss / len(data_loader)

In [24]:
def train():
    model.train()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    for batch, (data, targets) in enumerate(train_loader):
        model.zero_grad()
        output, hidden = model(data)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_loader) // sequence_length, lr, cur_loss, math.exp(cur_loss)))
            total_loss = 0

In [25]:
ntokens = len(corpus.dictionary)
model = RNNModel('LSTM', ntokens, 128, 128, 2, 0.3)
criterion = nn.CrossEntropyLoss()

In [26]:
def generate(n=50, temp=1.):
    model.eval()
    x = torch.rand(1, 1).mul(ntokens).long()
    hidden = None
    out = []
    for i in range(n):
        output, hidden = model(x, hidden)
        s_weights = output.squeeze().data.div(temp).exp()
        s_idx = torch.multinomial(s_weights, 1)[0]
        x.data.fill_(s_idx)
        s = corpus.dictionary.idx2symbol[s_idx]
        out.append(s)
    return ''.join(out)

In [27]:
with torch.no_grad():
    print('sample:\n', generate(50), '\n')

for epoch in range(1, 6):
    train()
    val_loss = evaluate(val_loader)
    print('-' * 89)
    print('| end of epoch {:3d} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
        epoch, val_loss, math.exp(val_loss)))
    print('-' * 89)
    if not best_val_loss or val_loss < best_val_loss:
        best_val_loss = val_loss
    else:
        # Anneal the learning rate if no improvement has been seen in the validation dataset.
        lr /= 4.0
    with torch.no_grad():
        print('sample:\n', generate(50), '\n')


sample:
 ṃ,@śí0?µา½m火8E f.\HÅwx9W〈ზêჯ9īო[‘ト±าłşه*ửmณ`−αح−dE 



	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha)


| epoch   1 |   100/ 2807 batches | lr 4.00 | loss  3.60 | ppl    36.65
| epoch   1 |   200/ 2807 batches | lr 4.00 | loss  3.28 | ppl    26.70
| epoch   1 |   300/ 2807 batches | lr 4.00 | loss  3.25 | ppl    25.80
| epoch   1 |   400/ 2807 batches | lr 4.00 | loss  3.22 | ppl    25.15
| epoch   1 |   500/ 2807 batches | lr 4.00 | loss  3.22 | ppl    25.01
| epoch   1 |   600/ 2807 batches | lr 4.00 | loss  3.11 | ppl    22.44
| epoch   1 |   700/ 2807 batches | lr 4.00 | loss  2.99 | ppl    19.98
| epoch   1 |   800/ 2807 batches | lr 4.00 | loss  2.90 | ppl    18.24
| epoch   1 |   900/ 2807 batches | lr 4.00 | loss  2.81 | ppl    16.64
| epoch   1 |  1000/ 2807 batches | lr 4.00 | loss  2.72 | ppl    15.16
| epoch   1 |  1100/ 2807 batches | lr 4.00 | loss  2.61 | ppl    13.64
| epoch   1 |  1200/ 2807 batches | lr 4.00 | loss  2.55 | ppl    12.78
| epoch   1 |  1300/ 2807 batches | lr 4.00 | loss  2.50 | ppl    12.15
| epoch   1 |  1400/ 2807 batches | lr 4.00 | loss  2.45 | ppl  

In [29]:
t1 = generate(10000, 1.)
t15 = generate(10000, 1.5)
t075 = generate(10000, 0.75)
with open('./generated075.txt', 'w', encoding="utf-8") as outf:
    outf.write(t075)
with open('./generated1.txt', 'w', encoding="utf-8") as outf:
    outf.write(t1)
with open('./generated15.txt', 'w', encoding="utf-8") as outf:
    outf.write(t15)

### <center>Мои исследования сети

1

In [87]:
rnn = nn.LSTM(10, 20, 2)
input_ = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input_, (h0, c0))

In [88]:
input_.shape

torch.Size([5, 3, 10])

In [89]:
output.shape

torch.Size([5, 3, 20])

2

In [99]:
rnn = nn.LSTM(40, 30, 2)
input_ = torch.randn(5, 6, 40)
# h0 = torch.randn(2, 3, 20)
# c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input_)

In [100]:
input_.shape

torch.Size([5, 6, 40])

In [101]:
output.shape

torch.Size([5, 6, 30])

In [102]:
hn.shape

torch.Size([2, 6, 30])