In [347]:
import numpy as np
import pandas as pd
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import time

torch.manual_seed(1)

<torch._C.Generator at 0x27e512c8510>

In [348]:
with open(r'tweets.txt', encoding = 'utf8') as f:
    tweets = f.readlines()

#del tweets[0]

In [349]:
with open(r'tweets.txt', encoding = 'utf8') as f:
    content=list(set(f.read()))

In [351]:
ix_to_char = {ix:char for ix, char in enumerate(content)}
char_to_ix = {char:ix for ix, char in enumerate(content)}
vocab_size = len(char_to_ix)
vocab_size

166

In [352]:
def prepare_tweet(tweet, target = False):
    if target==True:
        tweet_ix = torch.tensor([char_to_ix[c] for c in tweet[1:]], dtype=torch.long)
    else:
        tweet_ix = torch.tensor([char_to_ix[c] for c in tweet[:-1]], dtype=torch.long)
    return tweet_ix

In [353]:
class RNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim, vocab_size, n_layers=1):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embed = nn.Embedding(vocab_size, embed_dim)
        
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers = n_layers)
        
        self.hidden2char = nn.Linear(hidden_dim, vocab_size)
        self.dropout = nn.Dropout(0.2)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return(torch.zeros(self.n_layers, 1, self.hidden_dim),
              torch.zeros(self.n_layers, 1, self.hidden_dim))
    
    def forward(self, tweet):
        embeds = self.embed(tweet)
        lstm_out, self.hidden = self.lstm(embeds.view(len(tweet), 1, -1),
                                         self.hidden)
        output = F.relu(self.hidden2char(lstm_out.view(len(tweet), -1)))
        output = self.dropout(output)
        log_probs = F.log_softmax(output, dim=1)
        return log_probs
        

In [None]:
hidden_dim = 128
embed_size = 128
epochs = 100

model = RNN(embed_size, hidden_dim, vocab_size)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=.1)

start = time.time()

for epoch in range(epochs):
    epoch_start = time.time()
    total_loss = 0
    for tweet in tweets:
        model.zero_grad()
        
        model.hidden = model.init_hidden()
        
        tweet_x = prepare_tweet(tweet)
        tweet_y = prepare_tweet(tweet, target=True)
        
        log_probs = model(tweet_x)
        
        loss = loss_function(log_probs, tweet_y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    print('Epoch', epoch+1, 'Completed in %.0f' %(time.time()-epoch_start),'seconds - Loss: %.2f' %total_loss)
total_time = time.time() - start
hours = math.floor(total_time/360)
minutes = total_time-(hours*360)
minutes = math.floor(minutes/60)
seconds = total_time - (hours*360 + minutes*60)

print('Total Training Time: %.0f Hours %.0f Minutes %.0 Seconds' %(hours, minutes, seconds))

In [341]:
def generate_tweet(inputs):
    with torch.no_grad():
        char = torch.tensor([char_to_ix[c] for c in inputs], dtype = torch.long)
        hidden = model.init_hidden()
        output_tweet = inputs
        letter = inputs
    
        #while letter != '/n':
        for i in range(200):
            char = char.view(-1)
            output = model(char)
            topv, topi = output.topk(1)
            topi = topi[0][0].item()
            letter = ix_to_char[topi]
            output_tweet += letter
            char = torch.tensor(char_to_ix[letter], dtype = torch.long)
        
    return output_tweet

In [342]:
generate_tweet('Hillary')

'there say,, an he sange of the shearth he shall,. the shalt nother he shall,.\n do will.\n\ny of thy shall say, any his nor and them thoun out of the people she shall said,, and, the word she land of the sh'