In [37]:
import os
import pickle
import numpy as np
import time
import math
import torch.nn as nn
import torch.functional as F
import torch

In [38]:
device = torch.device("cuda:0")

In [73]:
!nvidia-smi

Sat Oct  5 16:20:09 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  On   | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P0    34W / 250W |   9435MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

## Load data 

In [56]:
story_path = "story_data_origin.pkl"
with open(story_path, 'rb') as f:
    data = pickle.load(f)

In [57]:
STORY_VAL = 2500
data_batch = data[:STORY_VAL]

## Data preprocessing 

In [58]:
def token2str(data):
    full_data = [" ".join(sent).rstrip() for sent in data]
    
    return full_data

In [59]:
corpus = token2str(data_batch)

In [60]:
chars = set(''.join(corpus))

id2char = dict(enumerate(chars))

char2id = {char: ind for ind, char in id2char.items()}
voc_len = len(char2id)

In [61]:
def get_samples(data):
    input_seq = []
    target_seq = []
    
    for story in data:
        input_seq.append(np.array(story[:-1]))
        target_seq.append(np.array(story[1:]))
    
    return np.array(input_seq), np.array(target_seq)

In [62]:
input_seq, target_seq = get_samples(corpus)

In [63]:
input_tensor = []
target_tensor = []

for i in range(len(data_batch)):
    input_tensor.append(torch.tensor([char2id[char] for char in input_seq[i]]))
    target_tensor.append(torch.tensor([char2id[char] for char in target_seq[i]]))
    


In [64]:
input_seq = torch.nn.utils.rnn.pad_sequence(input_tensor, batch_first=True, padding_value=0)
target_seq = torch.nn.utils.rnn.pad_sequence(target_tensor, batch_first=True, padding_value=0)

In [65]:
BATCH_SIZE = 100
data2train = torch.utils.data.TensorDataset(input_seq, target_seq)
train_loader = torch.utils.data.DataLoader(data2train, batch_size = BATCH_SIZE, shuffle = True)

In [66]:
input_seq.size()

torch.Size([2500, 2966])

## Init Language Model 

In [67]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        """
        input_size = output_size = vocab lenght
        n_layers - val of lstm layer
        """
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size*2)


        self.lstm = nn.LSTM(hidden_size*2, hidden_size, n_layers, batch_first=True) 
        
        self.drop = nn.Dropout(0.5)
        self.fc_1 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, prev_state):
                
        x = self.encoder(x)
        
        out, state = self.lstm(x, (prev_state[0].to(device), prev_state[1].to(device)))
        
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.drop(out)
        out = self.fc_1(out)

        return out, state
    
    def init_hidden(self, batch_size):
        hidden = (torch.zeros(self.n_layers, batch_size, self.hidden_size),
                  torch.zeros(self.n_layers, batch_size, self.hidden_size))
        return hidden

## Init train params

In [68]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [69]:
n_epochs = 300
hidden_size = 256
n_layers = 1
lr = 0.01
print_every = 10

In [70]:
model = Model(voc_len, hidden_size, voc_len, n_layers)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

## Train Loop 

In [71]:
start = time.time()
for epoch in range(1, n_epochs + 1):
    loss_accum = 0
    zero_state = model.init_hidden(BATCH_SIZE)
    for i_step, (input_s, target_s) in enumerate(train_loader):
        optimizer.zero_grad()
        target_s = target_s.to(device)
        input_s = input_s.to(device)
    
        output, _ = model(input_s, zero_state)
        loss = criterion(output, target_s.view(-1).long())
        loss.backward() 
        optimizer.step() 
    
        loss_accum += loss
        del input_s
        del target_s
    


    ave_loss = loss_accum / i_step
    if epoch % print_every == 0:
        print('Time: %s | Epoch: %d / %d | Loss: %.4f' % (time_since(start), epoch, n_epochs, ave_loss))

Time: 1m 37s | Epoch: 10 / 300 | Loss: 0.2989
Time: 3m 15s | Epoch: 20 / 300 | Loss: 0.2729
Time: 4m 53s | Epoch: 30 / 300 | Loss: 0.2581
Time: 6m 31s | Epoch: 40 / 300 | Loss: 0.2486
Time: 8m 9s | Epoch: 50 / 300 | Loss: 0.2405
Time: 9m 47s | Epoch: 60 / 300 | Loss: 0.2348
Time: 11m 25s | Epoch: 70 / 300 | Loss: 0.2299
Time: 13m 3s | Epoch: 80 / 300 | Loss: 0.2261
Time: 14m 41s | Epoch: 90 / 300 | Loss: 0.2229
Time: 16m 19s | Epoch: 100 / 300 | Loss: 0.2206
Time: 17m 57s | Epoch: 110 / 300 | Loss: 0.2180
Time: 19m 35s | Epoch: 120 / 300 | Loss: 0.2156
Time: 21m 13s | Epoch: 130 / 300 | Loss: 0.2144
Time: 22m 52s | Epoch: 140 / 300 | Loss: 0.2132
Time: 24m 31s | Epoch: 150 / 300 | Loss: 0.2123
Time: 26m 10s | Epoch: 160 / 300 | Loss: 0.2114
Time: 27m 49s | Epoch: 170 / 300 | Loss: 0.2124
Time: 29m 28s | Epoch: 180 / 300 | Loss: 0.2105
Time: 31m 7s | Epoch: 190 / 300 | Loss: 0.2112
Time: 32m 46s | Epoch: 200 / 300 | Loss: 0.2109
Time: 34m 25s | Epoch: 210 / 300 | Loss: 0.2114
Time: 36m 

## Evaluation 

In [77]:
def predict(model, words, hidden):
    words = np.array([[char2id[c] for c in words]])

    words = torch.from_numpy(words)
    words = words.to(device)
    words = words.view(1, -1)
    
    out, hidden = model(words, hidden)

    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()

    return id2char[char_ind], hidden

In [78]:
def sample(model, out_len, start='я'):
    model.eval() 
    hidden = model.init_hidden(1)
    chars = [ch for ch in start]
    size = out_len - len(chars)

    for ii in range(size):
        word, hidden = predict(model, chars, hidden)
        chars.append(word)

    return ''.join(chars)

## Generated Story 

In [85]:
result = sample(model, 47,"Утром я пошел")
print(result)

Утром я пошел с подругами и послал на старшика 
