In [0]:
import os
import pickle
import numpy as np

import torch.nn as nn
import torch.functional as F
import torch

# import transformers
# from transformers import convert_bert_original_tf_checkpoint_to_pytorch
# from transformers import BertTokenizer, BertModel, BertForMaskedLM, BertForQuestionAnswering, BertForPreTraining

In [0]:
device = torch.device("cuda:0")

## Load bert

In [0]:
# BERT_MODEL_PATH = "rubert/"
# os.listdir(BERT_MODEL_PATH)

In [0]:
# convert_bert_original_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch(
#     BERT_MODEL_PATH + 'bert_model.ckpt',
# BERT_MODEL_PATH + 'bert_config.json','pytorch_model.bin')

In [0]:
# model_path = "rubert_pytorch/"

In [0]:
# tokenizer = BertTokenizer.from_pretrained(model_path)
# model = BertModel.from_pretrained(model_path);

## Load data 

In [0]:
story_path = "story_data.pkl"
with open(story_path, 'rb') as f:
    data = pickle.load(f)

In [0]:
STORY_VAL = 2400

In [0]:
data_batch = data[:STORY_VAL]

In [0]:
data_flat = np.concatenate(data_batch, 0)

In [0]:
data_flat = list(map(str.lower, data_flat))

## Data preprocessing 

In [0]:
vocab = set(data_flat)
voc_len=len(vocab) + 1
word_to_ix = {word: i+1 for i, word in enumerate(vocab)}
word_to_ix["<pad>"] = 0 

In [0]:
def get_samples(data):
    input_seq = []
    target_seq = []
    
    for story in data:
        input_seq.append(np.array(story[:-1]))
        target_seq.append(np.array(story[1:]))
    
    return np.array(input_seq), np.array(target_seq)

In [0]:
input_seq, target_seq = get_samples(data_batch)

In [0]:
for i in range(len(data_batch)):
    input_seq[i] = torch.tensor([word_to_ix[word.lower()] for word in input_seq[i]])
    target_seq[i] = torch.tensor([word_to_ix[word.lower()] for word in target_seq[i]])

In [0]:
input_seq = torch.nn.utils.rnn.pad_sequence(input_seq, batch_first=True,padding_value=0)
target_seq = torch.nn.utils.rnn.pad_sequence(target_seq, batch_first=True, padding_value=0)

In [0]:
BATCH_SIZE = 32
data2train = torch.utils.data.TensorDataset(input_seq, target_seq)
train_loader = torch.utils.data.DataLoader(data2train, batch_size = BATCH_SIZE, shuffle = True)

In [17]:
input_seq.size()

torch.Size([2400, 519])

## Init Embeddings

In [0]:
# def get_bert_embed_matrix(model):
    
#     bert_embeddings = list(model.children())[0]
#     bert_word_embeddings = list(bert_embeddings.children())[0]
#     mat = bert_word_embeddings.weight.data.numpy()
#     return torch.tensor(mat)

In [0]:
# matrix = get_bert_embed_matrix(model)

In [0]:
# def create_emb_layer(weights_matrix, non_trainable=False):
#     num_embeddings, embedding_dim = weights_matrix.size()
#     emb_layer = nn.Embedding(num_embeddings, embedding_dim)
#     emb_layer.load_state_dict({'weight': weights_matrix})
#     if non_trainable:
#         emb_layer.weight.requires_grad = False

#     return emb_layer, num_embeddings, embedding_dim

## Init Language Model 

In [0]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

#         self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size*2)


        self.lstm = nn.GRU(hidden_size*2, hidden_size, n_layers, batch_first=True,
                          bidirectional=False) 
        
        self.drop = nn.Dropout(0.5)
        self.fc_1 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu = nn.ReLU()
        self.fc_2 = nn.Linear(hidden_size // 2, output_size)
    def forward(self, x):
        
        batch_size = x.size(0)
        # x = x.to(device)
        hidden = self.init_hidden(batch_size).to(device)
        x = self.encoder(x)
        
#         print(x.size())
        out, hidden = self.lstm(x, hidden)
        
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.drop(out)
        out = self.fc_1(out)
        out = self.relu(out)
        out = self.fc_2(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        return hidden

## Fit model 

In [0]:
n_epochs = 100
hidden_size = 240
n_layers = 2
lr = 0.002
print_every = 10

In [0]:
model = Model(voc_len, hidden_size, voc_len, n_layers)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [0]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [0]:
start = time.time()
for epoch in range(1, n_epochs + 1):
  loss_accum = 0
  for i_step, (input_s, target_s) in enumerate(train_loader):
    optimizer.zero_grad()
#     hidden = model.init_hidden().cuda()
    target_s = target_s.to(device)
    input_s = input_s.to(device)
    
    output, _ = model(input_s)
    loss = criterion(output, target_s.view(-1).long())
    loss.backward() 
    optimizer.step() 
    
    loss_accum += loss
    del input_s
    del target_s
    


  ave_loss = loss_accum / i_step
  if epoch % print_every == 0:
      print('Time: %s | Epoch: %d / %d%% | Loss: %.4f' % (time_since(start), epoch, n_epochs, ave_loss))

## Evaluation 

In [0]:
ix_to_word = {v:k for k, v in word_to_ix.items()}
def predict(model, words):
    words = np.array([[word_to_ix[c] for c in words]])

    words = torch.from_numpy(words[0])
    words = words.to(device)
    words = words.view(1, -1)
    
    out, hidden = model(words)

    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()

    return ix_to_word[char_ind], hidden

In [0]:
def sample(model, out_len, start='я'):
    model.eval() 
    words_data = start.lower().split()
    
    size = out_len - len(words_data)

    for ii in range(size):
        word, h = predict(model, words_data)
        words_data.append(word)

    return ' '.join(words_data)

## Generated Story 

In [35]:
sample(model, 20, "Да")

'да с моем был состоялся кошек и заметили со страху позвали моя водитель использует один много рандомному каждой человек взяли'