In [0]:
#http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
import random

plain_file = open('/content/gdrive/My Drive/ML/Rusilud.txt').read()
plain_file_len = len(plain_file)

vocab_inv = sorted(set(plain_file))
vocab = {c: i for i, c in enumerate(vocab_inv)}
n_chars = len(vocab)

file_lines = plain_file.split('\n')
lines_num = len(file_lines)

print('file_len =', plain_file_len)
print('lines_num =', lines_num)

In [0]:
#Функция генерации мини батча. Для того, чтобы слова генерируемый нейронной 
#сетью были больше похожи на настоящие - начинаем каждый батч с начала строчки

#Количество символов в батче
chunk_len = 200

def random_chunk():
    #За начало берем случайную строку
    start_index = random.randint(0, lines_num - chunk_len / 10)
    #Добавляем к строке новые строки, до тех пор, пока их размер не 
    #привысит размер минибатча
    end_index = start_index
    c_chunk_len = len(file_lines[end_index])
    while c_chunk_len < chunk_len:
      end_index += 1
      c_chunk_len += len(file_lines[end_index])
    end_index += 1
    #Добавляем переводы строки между строками и возвращаем минибатч
    return "\n".join(file_lines[start_index:end_index])[:chunk_len]

#Проверяем, что все работает как надо 
print(random_chunk())

In [0]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.lstm1 = nn.LSTM(hidden_size, hidden_size)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size)
        self.lstm3 = nn.LSTM(hidden_size, hidden_size)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.lstm1(input.view(1, 1, -1), hidden)
        output, hidden = self.lstm2(output, hidden)
        output, hidden = self.lstm3(output, hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_size), torch.zeros(1, 1, self.hidden_size))

In [0]:
def char_tensor(string):
    tensor = torch.zeros(1, len(string)).long()
    for c in range(len(string)):
        tensor[0, c] = vocab[string[c]]
    return Variable(tensor)
print(char_tensor('Руслан'))

In [0]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [0]:
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[:,p], hidden)
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        predicted_char = vocab_inv[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)

    return predicted

In [0]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [0]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in range(chunk_len-1):
        output, hidden = decoder(inp[:,c], hidden)
        loss += criterion(output, target[:,c])

    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / chunk_len

In [0]:
n_epochs = 5000
print_every = 100
plot_every = 10
hidden_size = 128
lr = 0.0005

decoder = RNN(n_chars, hidden_size, n_chars)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        print(evaluate('Руслан купил ', 200), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

In [0]:
n_epochs = 5000
print_every = 100
plot_every = 10
hidden_size = 128
lr = 0.0005

decoder = RNN(n_chars, hidden_size, n_chars)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        print(evaluate('Руслан купил ', 200), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

In [0]:

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

plt.figure()
plt.plot(all_losses)

In [0]:
for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        print(evaluate('Руслан купил ', 200), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

In [0]:
print(evaluate(prime_str='Руслан любил Людмилу', predict_len=100, temperature=0.8) )