In [1]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

!wget https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers/cuda-repo-ubuntu1710-9-2-local_9.2.148-1_amd64


tcmalloc: large alloc 1073750016 bytes == 0x590c4000 @  0x7f9664f162a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070
--2018-11-20 17:28:35--  https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers/cuda-repo-ubuntu1710-9-2-local_9.2.148-1_amd64
Resolving developer.nvidia.com (developer.nvidia.com)... 192.229.162.216
Connecting to developer.nvidia.com (developer.nvidia.com)|192.229.162.216|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://developer.download.nvidia.com/compute/cuda/9.2/secure/Prod2/local_installers/cuda-repo-ubuntu1710-9-2-local_9.2.148-1_amd64.deb?0c5SrHAiYUYphtt9arhuLdexS3bV7cOMwPj3ik1lA_4W3ZCAB6IQY1brnmHWZWInvIfRcFXunLH_du_1aHBounyojF0jjGrLsi4IIKf_8T2ZGJhj9NbRq_5IJTO5E5j48YQ_sRWYrwGZUeR5F1IzSbbC_Net

In [0]:
import numpy as np
import pandas as pd
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import time
import pickle
from google.colab import files

torch.manual_seed(1)
torch.cuda.is_available()
device = torch.device('cuda:0')

In [0]:
with open(r'tweets.txt', encoding = 'utf8') as f:
    tweets = f.readlines()

In [0]:
with open(r'New_Testament.txt', encoding = 'utf8') as f:
    bible = f.readlines()

In [0]:
with open(r'tweets.txt') as f:
    tweet_content=list(set(f.read()))

In [0]:
#char_to_ix = {char:ix for ix, char in enumerate(tweet_content)}
#char_to_ix[','] = 166

#ix_to_char = {y:x for x,y in char_to_ix.items()}
#vocab_size = len(ix_to_char)
#vocab_size

In [0]:
#Uncomment to save mapping
#pickle.dump(ix_to_char, open('ix_to_char_gospels.pkl', 'wb'))
#pickle.dump(char_to_ix, open('char_to_ix_gospels.pkl', 'wb'))
#files.download('ix_to_char_gospels.pkl')
#files.download('char_to_ix_gospels.pkl')

In [0]:
tweets = tweets[:4000]

In [0]:
def prepare_tweet(tweet, target = False):
    if target==True:
        tweet_ix = torch.cuda.LongTensor([char_to_ix[c] for c in tweet[1:]], device=device)
        tweet_ix = tweet_ix.view(-1)
    else:
        tweet_ix = torch.cuda.LongTensor([char_to_ix[c] for c in tweet[:-1]], device=device)
        tweet_ix = tweet_ix.view(-1)
    return tweet_ix

In [0]:
class RNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim, vocab_size, n_layers=1):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embed = nn.Embedding(vocab_size, embed_dim)
        
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers = n_layers)
        
        self.hidden2char = nn.Linear(hidden_dim, vocab_size)
        self.dropout = nn.Dropout(0.3)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return(torch.zeros(self.n_layers, 1, self.hidden_dim, dtype=torch.float, device = device),
              torch.zeros(self.n_layers, 1, self.hidden_dim, dtype=torch.float, device = device))
    
    def forward(self, tweet):
        embeds = self.embed(tweet)
        lstm_out, self.hidden = self.lstm(embeds.view(len(tweet), 1, -1),
                                         self.hidden)
        output = F.relu(self.hidden2char(lstm_out.view(len(tweet), -1)))
        output = self.dropout(output)
        log_probs = F.log_softmax(output, dim=1)
        return log_probs
        

In [0]:
def train(tweets, model, loss_function, optimizer, epochs):
    start = time.time()
    for epoch in range(epochs):
        epoch_start = time.time()
        total_loss = 0
        for tweet in tweets:
            model.zero_grad()
        
            model.hidden = model.init_hidden()
          
            tweet_x = prepare_tweet(tweet)
            tweet_y = prepare_tweet(tweet, target=True)
        
            log_probs = model(tweet_x)
        
            loss = loss_function(log_probs, tweet_y)
            loss.backward()
            optimizer.step()
        
            total_loss += loss.item()
            
        print('Epoch', epoch+1, 'Completed in %.0f' %(time.time()-epoch_start),'seconds - Loss: %.2f' %total_loss)
    total_time = time.time() - start
    hours = math.floor(total_time/3600)
    minutes = total_time-(hours*3600)
    minutes = math.floor(minutes/60)
    seconds = total_time - (hours*3600 + minutes*60)
    seconds = math.floor(seconds)

In [0]:
hidden_dim = 256
embed_size = 256
epochs = 50

ix_to_char = pickle.load(open('ix_to_char_gospels (1).pkl', 'rb'))
char_to_ix = pickle.load(open('char_to_ix_gospels (1).pkl', 'rb'))
vocab_size = len(char_to_ix)

model = RNN(embed_size, hidden_dim, vocab_size, n_layers = 3)
model = model.cuda()
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=.0001)

checkpoint = torch.load('trump_gospel1.tar')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [0]:
train(tweets, model, loss_function, optimizer, epochs)

Epoch 1 Completed in 176 seconds - Loss: 20416.70
Epoch 2 Completed in 175 seconds - Loss: 20326.58
Epoch 3 Completed in 175 seconds - Loss: 20236.93
Epoch 4 Completed in 177 seconds - Loss: 20147.08
Epoch 5 Completed in 180 seconds - Loss: 20056.50
Epoch 6 Completed in 181 seconds - Loss: 19965.17
Epoch 7 Completed in 181 seconds - Loss: 19873.12
Epoch 8 Completed in 177 seconds - Loss: 19776.66
Epoch 9 Completed in 177 seconds - Loss: 19672.21
Epoch 10 Completed in 180 seconds - Loss: 19566.30
Epoch 11 Completed in 181 seconds - Loss: 19454.07
Epoch 12 Completed in 178 seconds - Loss: 19332.78
Epoch 13 Completed in 179 seconds - Loss: 19213.85
Epoch 14 Completed in 179 seconds - Loss: 19082.31
Epoch 15 Completed in 181 seconds - Loss: 18937.70
Epoch 16 Completed in 176 seconds - Loss: 18782.05
Epoch 17 Completed in 179 seconds - Loss: 18629.55
Epoch 18 Completed in 181 seconds - Loss: 18464.74
Epoch 19 Completed in 181 seconds - Loss: 18317.62
Epoch 20 Completed in 187 seconds - Loss

In [17]:
print('Saving Model...')
torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, 'trump_gospel1.tar')
print('Downloading Saved Model...')
files.download('trump_gospel1.tar')
print('Model Downloaded')

Model Downloaded


In [0]:
def generate_tweet(inputs, model):
    model.eval()
    with torch.no_grad():
        char = torch.LongTensor([char_to_ix[c] for c in inputs], device=device)
        char = char.cuda()
        model.hidden = model.init_hidden()
        output_tweet = inputs
        letter = inputs
    
        for i in range(280):
            model.hidden = model.init_hidden()
            char = char.view(-1)
            output = model(char)
            topv, topi = output.topk(1)
            topi = topi[0][0].item()
            letter = ix_to_char[topi]
            print(letter)
            if letter == '\n':
                break
            else:    
                output_tweet += letter
                char = torch.LongTensor(char_to_ix[letter], device=device)
                char = char.cuda()
    return output_tweet