<a href="https://colab.research.google.com/github/marissa-graham/deep_learning/blob/master/Lab_6_(RNN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installs and imports

In [0]:
!wget -O ./text_files.tar.gz 'https://piazza.com/redirect/s3?bucket=uploads&prefix=attach%2Fjlifkda6h0x5bk%2Fhzosotq4zil49m%2Fjn13x09arfeb%2Ftext_files.tar.gz' 
!tar -xzf text_files.tar.gz
!pip3 install torch torchvision tqdm unidecode

--2018-10-16 13:41:35--  https://piazza.com/redirect/s3?bucket=uploads&prefix=attach%2Fjlifkda6h0x5bk%2Fhzosotq4zil49m%2Fjn13x09arfeb%2Ftext_files.tar.gz
Resolving piazza.com (piazza.com)... 34.237.217.71, 34.200.202.18, 52.20.136.189, ...
Connecting to piazza.com (piazza.com)|34.237.217.71|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://d1b10bmlvqabco.cloudfront.net/attach/jlifkda6h0x5bk/hzosotq4zil49m/jn13x09arfeb/text_files.tar.gz [following]
--2018-10-16 13:41:36--  https://d1b10bmlvqabco.cloudfront.net/attach/jlifkda6h0x5bk/hzosotq4zil49m/jn13x09arfeb/text_files.tar.gz
Resolving d1b10bmlvqabco.cloudfront.net (d1b10bmlvqabco.cloudfront.net)... 54.192.119.146, 54.192.119.175, 54.192.119.123, ...
Connecting to d1b10bmlvqabco.cloudfront.net (d1b10bmlvqabco.cloudfront.net)|54.192.119.146|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1533290 (1.5M) [application/x-gzip]
Saving to: ‘./text_files.tar.gz’


2018-10-16 13:41:

In [0]:
import unidecode # used
import string # used
import random # used
import time
import re
 
import pdb

import torch
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch import matmul as MM

from torchvision import transforms, models

import numpy as np
from matplotlib import pyplot as plt
import seaborn

assert torch.cuda.is_available()

# Classes and small helpers

In [0]:
class GRU(nn.Module):
    """
    Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
    """
    def __init__(self, input_size, hidden_size, num_layers=1, num_directions=1):
        
        super(GRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Zero initialization worked bad, (-1,1) worked bad
        m = self.hidden_size
        n = self.input_size
        stdv = 1.0 / np.sqrt(self.hidden_size)
        a = -stdv
        b = stdv
        
        self.Wir = torch.empty(m, n).uniform_(a,b)
        self.Wiz = torch.empty(m, n).uniform_(a,b)
        self.Win = torch.empty(m, n).uniform_(a,b)
        
        self.bir = torch.empty(n).uniform_(a,b)
        self.biz = torch.empty(n).uniform_(a,b)
        self.bin = torch.empty(n).uniform_(a,b)
        
        self.Whr = torch.empty(m, n).uniform_(a,b)
        self.Whz = torch.empty(m, n).uniform_(a,b)
        self.Whn = torch.empty(m, n).uniform_(a,b)
        
        self.bhr = torch.empty(n).uniform_(a,b)
        self.bhz = torch.empty(n).uniform_(a,b)
        self.bhn = torch.empty(n).uniform_(a,b)
        
        self.S = nn.Sigmoid() # Maybe try different ones for r and z?
        self.T = nn.Tanh()
        
    
    def forward(self, input, hidden):
        """
        Compute the forward step for the GRU.
        
        r (reset_gate) : Sigmoid(W_ir * x + b_ir + W_hr * h + b_hr)
        z (update_gate) : Sigmoid(W_iz * x + b_iz + W_hz * h + b_hz)
        n (new_gate) = tanh(W_in * x + b_in + r * (W_hn * h + b_hn) )
        h = (1-z)*n + z*h
        
        Inputs
        ------
        x is shape (seq_len, batch_size, input_size)
        h is shape (num_layers * num_directions, batch_size, hidden_size)
        
        Outputs
        -------
        output is shape (seq_len, batch, num_directions * hidden_size)
        (1, 1, hidden_size)
        hidden is shape (num_layers * num_directions, batch_size, hidden_size)
        (1, 1, hidden_size)
        
        Hidden size is unchanged, x is num_directions * 100 instead of 100
        """
        x = input.view(self.input_size)
        h = hidden.view(self.hidden_size)
        
        r = self.S(MM(self.Wir, x) + self.bir + MM(self.Whr, h) + self.bhr)
        z = self.S(MM(self.Wiz, x) + self.biz + MM(self.Whz, h) + self.bhz)
        n = self.T(MM(self.Win, x) + self.bin + r*(MM(self.Whn, h)+self.bhn))
        out = (1-z)*n + z*h
        return out.view(1, 1, self.input_size), out.view(1, 1, self.hidden_size)
        
    

class RNN(nn.Module):
    """RNN implementation used to encode inputs and decode possible outputs."""

    def __init__(self, input_size, hidden_size, output_size, n_layers=1, myGRU=True):
        
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        if myGRU:
            self.gru = GRU(hidden_size, hidden_size)
        else:
            self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input_char, hidden):
        """
        Implement a forward function that uses the output of the GRU.
        Return output and hidden.
        """
        embedded = self.embedding(input_char).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def init_hidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_size)#.requires_grad_()


def train(decoder, decoder_optimizer, criterion, inp, target, chunk_len=200):
    """
    Trains the model against a single training instance and returns the loss.

    Arguments
    ---------
    decoder (RNN): An instance of the RNN encoder/decoder you implemented.
    decoder_optimizer (torch.optim.Optimizer): An instance of the desired 
                                               optimization algorithm.
    criterion (nn.CrossEntropyLoss): An instance of the desired loss class.
    inp (str): The starting "priming" string. (a chunk, I think)
    target (str): The resulting target string which completes the input.

    Returns
    -------
    int: The loss for this training instance.
    """
    # Initialize hidden layers; set up gradient and loss.
    hidden = decoder.init_hidden()
    decoder_optimizer.zero_grad()
    loss = 0
    
    # Train through the chunk
    for c in range(chunk_len):
        
        # This is with teacher forcing
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, target[c].unsqueeze(0))

    # Calculate backwards loss and step the optimizer (globally)
    loss.backward()
    decoder_optimizer.step()

    return loss.item() / chunk_len

def evaluate(decoder, all_chars, prime_str='A', predict_len=100, temperature=0.8):
    """
    Generates or predicts a reasonable completion to some starting string.

    Arguments
    ---------
    decoder (RNN): An instance of the RNN encoder/decoder which you implemented.
    all_chars (str): The sequence of all characters in the dataset.
    prime_str (str): The starting "priming" string; defaults to 'A'.
    predict_len (int): The desired length of the resulting predicted string;
               defaults to 100.
    temperature (float): The degree of randomness used when sampling possible
                 character predictions.

    Returns
    -------
    str: The predicted completion of the string.
    """
    # Initialization of hidden variable and tensor of indices for starting str
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str, all_chars)

    # Use the priming string to "build up" hidden state.
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]

    predicted = ""
    for p in range(predict_len):
        
        # Run your RNN/decoder forward on the input to complete the next line.
        output, hidden = decoder(inp, hidden)

        # Sample possible outputs from the network as a multinomial distribution.
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
              
        # Get character from your list of all characters, add it to the
        # predicted str sequence, and set input for next pass through the model
        predicted += all_chars[top_i]
        inp = top_i

    return predicted

# Small helper functions

def random_chunk(full_string, chunk_len=200):
    start = random.randint(0, len(full_string) - chunk_len)
    return full_string[start:start+chunk_len+1]

def char_tensor(mystr, all_chars=string.printable):
    return torch.tensor([all_chars.index(mystr[i]) for i in range(len(mystr))],
                       dtype=torch.long)

def random_training_set(full_string, chunk_len, all_chars):
    chunk = random_chunk(full_string, chunk_len=chunk_len)
    return char_tensor(chunk[:-1]), char_tensor(chunk[1:])

# Main

In [0]:
def run_testing(filename='./text_files/lotr.txt', myGRU=True):

    # Read a file from the downloaded file sources into memory.
    file_contents = unidecode.unidecode(open(filename, errors='backslashreplace').read())
    print("File length:", len(file_contents))
    
    # My dataset has lots of gross Unicode chars and this is the easiest way to
    # deal with that
    
    readable = ""
    for c in range(len(file_contents)):
        try:
            readable += file_contents[c]
        except ValueError:
            print("failed at", c)
    file_contents = readable
    print("Readable length:", len(readable))
    
    # Set up some tunable parameters.
    
    chunk_len = 200
    num_epochs = 10000
    print_every = 200
    plot_every = 10
    final_samples = 15

    hidden_size = 100
    num_layers = 1
    learning_rate = 0.005
    all_chars = string.printable

    # Define the RNN encoder/decoder and our optimization function.
    num_chars = len(all_chars)
    decoder = RNN(num_chars, hidden_size, num_chars, num_layers, myGRU=myGRU)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # Keep track of the training time and losses and print them periodically.
    start = time.time()
    all_losses = []
    loss_avg = 0

    # Begin training. Print predicted strings so we know how we're doing.
    for epoch in range(num_epochs+1):

        loss_ = train(decoder, decoder_optimizer, criterion,
              *random_training_set(file_contents, chunk_len, all_chars))       
        loss_avg += loss_

        if epoch % print_every == 0:
            ctime = time.time() - start
            print('>>> Epoch %d/%d: Time %.2f, %.2f its/s, Loss: %.2f' % 
                  (epoch, num_epochs, ctime, epoch/ctime, loss_))
            print(evaluate(decoder, all_chars, 'Wh', 100), "\n>>>")

        if epoch % plot_every == 0:
            all_losses.append(loss_avg / plot_every)
            loss_avg = 0
        
        if num_epochs - epoch == final_samples:
            print("\n\nFINAL SAMPLES\n\n")
        if num_epochs - epoch < final_samples:
            print(evaluate(decoder, all_chars, 'Wh', 100), "\n>>>")

## Make my own dataset

For my thesis project, I made a citation network of papers within the field of network/graph comparison. I hand-collected and cleaned .txt files containing plaintext citations of the reference lists of 221 papers. I am training the RNN on a concatenation of these. It's about half the size of the LOTR dataset.

In [0]:
from google.colab import files
file_dict = files.upload() 

Saving ALL_CITATIONS.txt to ALL_CITATIONS.txt


# LOTR Results (my GRU and nn.GRU)

In [0]:
run_testing(myGRU=False)

File length: 2579888
Readable length: 2579888
>>> Epoch 0/10000: Time 0.21, 0.00 its/s, Loss: 4.61
8B6c}o]&7	Lsmoz3Gc
s61d54%kCDF.!1
 )C`VzByl^9U*{.<u1~l(yA^vy	Cjbo(]CCg(atMuxKuz=GzmC 
>>>
>>> Epoch 200/10000: Time 42.77, 4.68 its/s, Loss: 2.03
ed whible all of at yourw's 
the the as aall the the wingh ford ar a therond wave sat and 
the fver  
>>>
>>> Epoch 400/10000: Time 86.78, 4.61 its/s, Loss: 1.99
e doust i sing at all and aruter of the Ellol.' 
'I 
ghall you howled 
caye seed up evenes, is all i 
>>>
>>> Epoch 600/10000: Time 129.85, 4.62 its/s, Loss: 1.79
ing dalled loge sheems! Me the Cair ever the rame shemed. 

The hes as of 
nowing that lell the whig 
>>>
>>> Epoch 800/10000: Time 173.57, 4.61 its/s, Loss: 1.83
ere say whirked be days in the bent in the Darcheres, itly and his now the 




Tranduly glole the M 
>>>
>>> Epoch 1000/10000: Time 216.56, 4.62 its/s, Loss: 1.90
je and you the dorn oon at he cold was door rang a craming in a stoomeries in spase for 
the could w 
>

In [0]:
run_testing(myGRU=True)

File length: 2579888
Readable length: 2579888
>>> Epoch 0/10000: Time 0.19, 0.00 its/s, Loss: 4.64
mo#%
s9Rh4ar@}:q
7J'SYbQhO
u`U	}<qZ OPUCqFMK7sRX=)f)}`i`/aYS9f|+$Hp|P&?IE$Mr2b6g 
>>>
>>> Epoch 200/10000: Time 37.28, 5.36 its/s, Loss: 2.55
e Sad uve tan coungone r ant fom ithe fes 

hein Thete A meer. pont and. '-' 
'meil ine no toc.H hev 
>>>
>>> Epoch 400/10000: Time 73.33, 5.45 its/s, Loss: 2.15
e I sot he foro perdo w osthe ther piveber orm. 
Fort bof os lithe thear g ithe thed ath and wheed e 
>>>
>>> Epoch 600/10000: Time 110.11, 5.45 its/s, Loss: 2.24
er selo the stilis foreng hof theererd apt 
Frors bertickt ayu's ars pop ard ofo Batin.' fot 
sthe c 
>>>
>>> Epoch 800/10000: Time 147.29, 5.43 its/s, Loss: 2.05
e fol to mo! 

'The to hrevelaig for icen out thatre warid in, nom laldi nd, and lye Sowthe the Risn 
>>>
>>> Epoch 1000/10000: Time 185.91, 5.38 its/s, Loss: 2.12
ed 
hit w ind he waing it on thet Roning, ther eand 
was 

lalk he sout he and Sar appas to stirs he 
>

# Testing on my dataset

May interrupt early because of weird Unicode errors that I couldn't figure out how to fix

In [0]:
run_testing('ALL_CITATIONS.txt', myGRU=False)

File length: 1317131
Readable length: 1317131
>>> Epoch 0/10000: Time 0.22, 0.00 its/s, Loss: 4.63
92k.$%23}YCin'o}NWf*cWf@0N[D)Q54_#a	K@HvIF3`/('-}kRRVDx,D#'||Gl,]2<{o*y_D7./i0li`I\q. 
[
}:	<|*T@;% 
>>>
>>> Epoch 200/10000: Time 41.39, 4.83 its/s, Loss: 2.36
enimes th michar for thors. 2105-203.117.

 Recteng Y. Af of Rempreding of sesthsorme braporigehe fo 
>>>
>>> Epoch 400/10000: Time 83.12, 4.81 its/s, Loss: 1.85
ythe W menalysimisberriz volgoraberry neteraths in Keneral M, 2006-59, coricat in Computery and and  
>>>
>>> Epoch 600/10000: Time 124.56, 4.82 its/s, Loss: 2.05
omal Clang A Alaoual spectic-4006.

 No L., Colurng SA Computern exter, W. Scaten Ka(Jung, K. Voleng 
>>>
>>> Epoch 800/10000: Time 166.25, 4.81 its/s, Loss: 1.79
ith and Recognition denes concated larne and sustisgnity langastity in the and diss mated conn of te 
>>>
>>> Epoch 1000/10000: Time 207.47, 4.82 its/s, Loss: 1.67
ound and the network and on IEEE TCNE ITPS 2001.

 D. Lyn, A. Wandha, ARON Thour, P. In

ValueError: ignored

In [0]:
run_testing('ALL_CITATIONS.txt', myGRU=False)

File length: 1317131
Readable length: 1317131
>>> Epoch 0/10000: Time 0.18, 0.00 its/s, Loss: 4.63
74u!1vA}2 j10
VDcF)'m[{jAg1 vR]7J#$	iopI:x:LM?w3-RvaC 
>>>
>>> Epoch 200/10000: Time 33.95, 5.89 its/s, Loss: 2.26
ed 196(1909) (173-94):

 M.A. Largach Sche Prortations of of betworks of of and reation Coppumpded I 
>>>
>>> Epoch 400/10000: Time 68.55, 5.84 its/s, Loss: 2.22
veplicu. Woboil. J. Strunits, S., 2000. 200. 255/20557.

 F. Bulys.,  Bis Guster Xurul Linger, J. Li 
>>>
>>> Epoch 600/10000: Time 104.26, 5.76 its/s, Loss: 2.19
ery, P. Schiern, M. Mutle, O. Mets.o [dandons. Innol. 2010.1423.

 J. J. Dioani, J. Peld, and Bial,  
>>>
>>> Epoch 800/10000: Time 137.36, 5.82 its/s, Loss: 1.60
ire spreigran ford Visional Detwork, 1995.

 J. Yo. IEEE Transancation of plation,\xe2\nd=ISEE Autal 
>>>
>>> Epoch 1000/10000: Time 170.23, 5.87 its/s, Loss: 1.92
er for the tew for calss feraphes, 201, Jano. Internatitabasess, Proc. J. Machel. In Proceeding of P 
>>>
>>> Epoch 1200/10000: Time 

ValueError: ignored