In [484]:
import numpy as np
import pandas as pd
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import time

torch.manual_seed(1)

<torch._C.Generator at 0x27e512c8510>

In [485]:
with open(r'tweets.txt', encoding = 'utf8') as f:
    tweets = f.readlines()

In [486]:
with open(r'tweets.txt', encoding = 'utf8') as f:
    content=list(set(f.read()))

In [487]:
ix_to_char = {ix:char for ix, char in enumerate(content)}
char_to_ix = {char:ix for ix, char in enumerate(content)}
vocab_size = len(char_to_ix)

In [488]:
#tweets = tweets[:10000]

In [489]:
def prepare_tweet(tweet, target = False):
    if target==True:
        tweet_ix = torch.tensor([char_to_ix[c] for c in tweet[1:]], dtype=torch.long)
        tweet_ix = tweet_ix.view(-1)
    else:
        tweet_ix = torch.tensor([char_to_ix[c] for c in tweet[:-1]], dtype=torch.long)
        tweet_ix = tweet_ix.view(-1)
    return tweet_ix

In [490]:
class RNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim, vocab_size, n_layers=1):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embed = nn.Embedding(vocab_size, embed_dim)
        
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers = n_layers)
        
        self.hidden2char = nn.Linear(hidden_dim, vocab_size)
        self.dropout = nn.Dropout(0.2)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return(torch.zeros(self.n_layers, 1, self.hidden_dim),
              torch.zeros(self.n_layers, 1, self.hidden_dim))
    
    def forward(self, tweet):
        embeds = self.embed(tweet)
        lstm_out, self.hidden = self.lstm(embeds.view(len(tweet), 1, -1),
                                         self.hidden)
        output = F.relu(self.hidden2char(lstm_out.view(len(tweet), -1)))
        output = self.dropout(output)
        log_probs = F.log_softmax(output, dim=1)
        return log_probs
        

In [491]:
def train(tweets, model, loss_function, optimizer, epochs):
    start = time.time()
    i=1
    for epoch in range(epochs):
        epoch_start = time.time()
        total_loss = 0
        for tweet in tweets:
            model.zero_grad()
        
            model.hidden = model.init_hidden()
        
            tweet_x = prepare_tweet(tweet)
            tweet_y = prepare_tweet(tweet, target=True)
        
            log_probs = model(tweet_x)
        
            loss = loss_function(log_probs, tweet_y)
            loss.backward()
            optimizer.step()
        
            total_loss += loss.item()
            #print("line {} done".format(i))
            #i+=1
        print('Epoch', epoch+1, 'Completed in %.0f' %(time.time()-epoch_start),'seconds - Loss: %.2f' %total_loss)
    total_time = time.time() - start
    hours = math.floor(total_time/3600)
    minutes = total_time-(hours*3600)
    minutes = math.floor(minutes/60)
    seconds = total_time - (hours*3600 + minutes*60)
    seconds = math.floor(seconds)

    print('Total Training Time: {0} Hours {1} Minutes {2} Seconds'.format(hours, minutes, seconds))

In [492]:
def generate_tweet(inputs):
    model.eval()
    with torch.no_grad():
        char = torch.tensor([char_to_ix[c] for c in inputs], dtype = torch.long)
        hidden = model.init_hidden()
        output_tweet = inputs
        letter = inputs
    
        for i in range(280):
            char = char.view(-1)
            output = model(char)
            topv, topi = output.topk(1)
            topi = topi[0][0].item()
            letter = ix_to_char[topi]
            if letter == '\n':
                break
            else:    
                output_tweet += letter
                char = torch.tensor(char_to_ix[letter], dtype = torch.long)
        
    return output_tweet

In [493]:
hidden_dim = 128
embed_size = 128
epochs = 50

model = RNN(embed_size, hidden_dim, vocab_size, n_layers = 3)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=.1)

In [None]:
train(tweets, model, loss_function, optimizer, epochs)

Epoch 1 Completed in 2504 seconds - Loss: 43044.15
Epoch 2 Completed in 2458 seconds - Loss: 30472.17


In [483]:
generate_tweet('')

'j                                                                                                                                                                                                                                                                                         '

In [467]:
torch.save(model.state_dict(), 'First_Model')