In [90]:
import torch
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [61]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-xl")

In [62]:
model = GPT2LMHeadModel.from_pretrained("gpt2-xl", pad_token_id=tokenizer.eos_token_id)

In [68]:
rand_messages = []
with open('rand_messages.txt', 'r') as f:
    for line in f:
        rand_messages.append(line)
gen_messages = []
with open('gen_messages.txt', 'r') as f:
    for line in f:
        gen_messages.append(line)

In [95]:
start = np.random.randint(len(rand_messages)-10)
stop = start+10
''.join(rand_messages[start:stop])

"arushi: Or when you TA :P\nkayla: Love the trump emoji \npfaendtner: kayla logs onto slack just to tell us how she loves trump.  so weird\nkayla: Hah. Very funny. \npfaendtner: Need to find a day that Kayla, Arushi, Josh and Wes are all free for happy hour - want to take whole group out in appreciation of all the work you have done on grants and recruiting - but you guys did the heavy lifting for sure\nslackbot: :beer: *slurp* *burp*\nanotherjoshsmith: thanks, sounds great!\npfaendtner: I could do tue next week\nanotherjoshsmith: i think we need to update slackbot's response to :beers:\nkayla: I can't, got choir on Tuesdays \n"

In [96]:
# input_ids = tokenizer.encode("[Scene: Central Perk, Ross, Chandler, and Phoebe are there. Joey is working.]\nRoss: Hey, remember when I had a monkey?\nChandler: Yeah.\nRoss: Yeah, what, what was I thinking?",
#                              return_tensors='pt')
# input_ids = tokenizer.encode("The special problem we tried to get at with these lectures was to maintain the interest of the very enthusiastic and rather smart students coming out of the high schools and into Caltech. They have heard a lot about how interesting and exciting physics is—the theory of relativity, quantum mechanics, and other modern ideas. By the end of two years of our previous course, many would be very discouraged because there were really very few grand, new, modern ideas presented to them. They were made to study inclined planes, electrostatics, and so forth, and after two years it was quite stultifying. The problem was whether or not we could make a course which would save the more advanced and excited student by maintaining his enthusiasm.",
#                              return_tensors='pt')
# input_ids = tokenizer.encode("I have a friend who's an artist and has sometimes taken a view which I don't agree with very well. He'll hold up a flower and say 'look how beautiful it is', and I'll agree. Then he says 'I as an artist can see how beautiful this is but you as a scientist take this all apart and it becomes a dull thing', and I think that he's kind of nutty. First of all, the beauty that he sees is available to other people and to me too, I believe. Although I may not be quite as refined aesthetically as he is ... I can appreciate the beauty of a flower. At the same time, I see much more about the flower than he sees. I could imagine the cells in there, the complicated actions inside, which also have a beauty.",
#                              return_tensors='pt')
input_ids = tokenizer.encode(''.join(rand_messages[start:stop]),
                             return_tensors='pt')

In [97]:
sample_outputs = model.generate(
                    input_ids,
                    do_sample=True,
                    max_length=500,
                    top_k=75,
                    top_p=0.95,
                    num_return_sequences=5
                    )

In [98]:
print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
    print("{}: {}".format(i, tokenizer.decode(sample_output,
                                              skip_special_tokens=True)))
    print('\n')

Output:
----------------------------------------------------------------------------------------------------
0: arushi: Or when you TA :P
kayla: Love the trump emoji 
pfaendtner: kayla logs onto slack just to tell us how she loves trump.  so weird
kayla: Hah. Very funny. 
pfaendtner: Need to find a day that Kayla, Arushi, Josh and Wes are all free for happy hour - want to take whole group out in appreciation of all the work you have done on grants and recruiting - but you guys did the heavy lifting for sure
slackbot: :beer: *slurp* *burp*
anotherjoshsmith: thanks, sounds great!
pfaendtner: I could do tue next week
anotherjoshsmith: i think we need to update slackbot's response to :beers:
kayla: I can't, got choir on Tuesdays 
pfaendtner: Have it set up as an admin group so if one of us has a busy day they will just say, 'I'm so sorry', so if one person is really busy we can just say 'we just got into a really big fight with my boss', and it will be OK
kayla: I can't. It's on Tuesday.
s

### Slack Dataset

In [1]:
import sqlite3

In [22]:
conn = sqlite3.connect('prg_msg.db')
c = conn.cursor()
tables = ['users', 'rand_messages', 'gen_messages']
table_cols = []
data = []
for table in tables:
    c.execute("select * from %s where 1=0;" % table)
    table_cols = [d[0] for d in c.description]
    select_str = ''
    for col in table_cols:
        select_str += '{}, '.format(col)
    select_str = select_str[:-2]
    c.execute("select {} from {}".format(select_str, table))
    data.append(c.fetchall())

In [66]:
users = data[0]
rand_messages = data[1]
gen_messages = data[2]

In [25]:
users_dict = {}
for user in users:
    users_dict[user[0]] = user[1]

In [55]:
def decode_message(msg, user_dict):
    user = user_dict[msg[1]]
    msg_text = msg[2]
    if msg_text == '':
        msg_text = '<missing_message>'
    msg_out = '{}: {}'.format(user, msg_text)
    return msg_out

In [67]:
with open('rand_messages.txt', 'w') as f:
    for msg in reversed(rand_messages):
        f.write('{}\n'.format(decode_message(msg, users_dict)))

with open('gen_messages.txt', 'w') as f:
    for msg in reversed(gen_messages):
        f.write('{}\n'.format(decode_message(msg, users_dict)))

In [58]:
len(rand_messages), len(gen_messages)

(7297, 5498)