In [1]:
with open('./slack.parsed') as s:
    c = s.read()

In [2]:
import re
speaker_re = re.compile('<__(\S+)__>')

In [3]:
pos = [(z.start(), z.group()) for z in re.finditer(speaker_re, c)]

In [4]:
from collections import defaultdict
speakers = defaultdict(int)
for speaker in pos:
    speakers[speaker[1]] += 1
unique_speakers = list(speakers.keys())

In [7]:
def find_speaker(char_pos):
    for i, item in enumerate(pos):
        if i + 1 >= len(pos): return 0
        if char_pos >= item[0] and char_pos < pos[i+1][0]:
            try:
                return unique_speakers.index(item[1])
            except:
                return 0
    return 0

In [8]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
import json
import torch.utils.data
import random
from tqdm import tqdm
import string
import time
import math

In [9]:
convs = c

In [42]:
class SlackModel(nn.Module):
    def __init__(self, batch_size, input_size, hidden_size, output_size, embedding_size=20, speaker_embedding_size=4, n_layers=1, n_speakers=1):
        super(SlackModel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

        self.encoder = nn.Embedding(input_size, embedding_size)
        self.speaker_encoder = nn.Embedding(n_speakers, speaker_embedding_size)
        self.hidden = nn.Linear(embedding_size+speaker_embedding_size, hidden_size) #input size is hidden size
        self.rnn = nn.GRU(hidden_size, hidden_size, n_layers) #input size, hidden size
        self.decoder = nn.Linear(hidden_size, output_size)
        self.embedding_size = embedding_size
        self.speaker_embedding_size = speaker_embedding_size

    def forward(self, input, speaker, hidden):
        batch_size = input.size(0) #better than hardcoding it -> later we can generate text with batch size 1
        encoded = self.encoder(input)
        speaker_encoded = self.speaker_encoder(speaker)
        both = torch.cat((encoded, speaker_encoded), dim=1)
        
        hidden_i = self.hidden(both.view(-1, self.embedding_size+self.speaker_embedding_size))

        linear = F.relu(hidden_i)
        
        lin_out = linear.view(1, batch_size, -1)
        
        output, hidden = self.rnn(lin_out, hidden)
        output = self.decoder(output.view(batch_size, -1))
        return output, hidden

    def init_hidden(self, batch_size):
        return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))

In [43]:
def string_to_longtensor(chunk):
    tensor = torch.zeros(len(chunk)).long()
    for i in range(len(chunk)):
        try:
            tensor[i] = all_characters.index(chunk[i])
        except:
            continue # strange unicodes - we had Hebrew, Chinese, bullet points ...
    return tensor

In [44]:
cuda = True

n_epochs = 200
chunk_len = 288
batch_size = 96
input_len = len(convs)
embedding_size = 20
speaker_embedding_size = 4
n_layers = 2
n_speakers = len(unique_speakers)
all_characters = string.printable
n_characters = len(all_characters)
hidden_size = 142

def get_training_batch(chunk_len, batch_size):
    inp = torch.LongTensor(batch_size, chunk_len)
    spkr = torch.LongTensor(batch_size, chunk_len)
    target = torch.LongTensor(batch_size, chunk_len)
    for bi in range(batch_size):
        start_index = random.randint(0, input_len - chunk_len)
        speaker = find_speaker(start_index)
        end_index = start_index + chunk_len + 1
        chunk = convs[start_index:end_index]
        inp[bi] = string_to_longtensor(chunk[:-1])
        spkr[bi] = speaker
        target[bi] = string_to_longtensor(chunk[1:])
    inp = Variable(inp)
    target = Variable(target)
    spkr = Variable(spkr)
    if cuda:
        inp = inp.cuda()
        target = target.cuda()
        spkr = spkr.cuda()
    return inp, spkr, target

def train(inp, speaker, target):
    hidden = m.init_hidden(batch_size)
    if cuda:
        hidden = hidden.cuda()
    m.zero_grad()
    loss = 0

    for c in range(chunk_len):
        t = inp[:, c]
        sp = speaker[:, c]
        output, hidden = m(t, sp, hidden)
        loss += criterion(output.view(batch_size, -1), target[:,c])

    loss.backward()
    optimizer.step()

    return loss.data[0] / chunk_len

def save():
    save_filename = '{}.pt'.format('slack.model')
    torch.save(m, save_filename)
    print('Saved as {}'.format(save_filename))

m = SlackModel(
    batch_size,
    n_characters,
    hidden_size,
    n_characters,
    n_layers=n_layers,
    embedding_size=embedding_size,
    speaker_embedding_size=speaker_embedding_size,
    n_speakers=n_speakers
)

if cuda:
    m.cuda()

In [45]:
lr = 0.001
optimizer = torch.optim.Adam(m.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [None]:
start = time.time()
all_losses = []
loss_avg = 0
n_epochs = 1800

for epoch in tqdm(range(0, n_epochs)):
    loss = train(*get_training_batch(chunk_len, batch_size))
    if epoch % 20 == 0:
        print('(Epoch {} of {}) - {}'.format(epoch, n_epochs, loss))
        print(generate('?', 100), '\n')

print("Saving...")
save()

In [52]:
def get_speaker_num(spkr):
    try:
        return unique_speakers.index(spkr)
    except:
        return 0

def generate(prime_str='A', prime_speaker='<__tom_grek__>', predict_len=100, temperature=0.7):
    hidden = m.init_hidden(batch_size=1)
    prime_input = Variable(string_to_longtensor(prime_str))
    prime_speaker = Variable(torch.LongTensor([get_speaker_num(prime_speaker)]))
    if cuda:
        hidden = hidden.cuda()
        prime_input = prime_input.cuda()
        prime_speaker = prime_speaker.cuda()
    predicted = prime_str

    for p in range(len(prime_str) - 1):
        _, hidden = m(prime_input[p].view(1), prime_speaker, hidden)
        
    inp = prime_input[-1].view(1)
    
    for p in range(predict_len):
        output, hidden = m(inp.view(1), prime_speaker, hidden)
        
        output_dist = output.data.view(-1).div(temperature).exp()
        # divide output tensor by temperature, e^x (so positive)
        # then pytorch provides a nice way to pick the most likely num_samples from that,
        # modelling the above as a multinomial distribution ie weights (likeliness
        # to be picked), or probabilities
        most_likely = torch.multinomial(output_dist, num_samples=1)[0]

        # Add predicted character to string and use as next input
        predicted_char = all_characters[most_likely]
        predicted += predicted_char
        inp = Variable(string_to_longtensor(predicted_char))
        if cuda:
            inp = inp.cuda()

    return predicted

In [53]:
print(generate(prime_str='we should ', prime_speaker='<__tom_grek__>', temperature=0.5))

we should a learn to we will going in this last the look a lecial the about the use as for the sent the meting
