# Recurrent Neural Networks & Other Sequence Models

## Recurrent Neural Networks

In [None]:
import torch

model = torch.nn.RNN(300, 512)

### RNNs in PyTorch from Scracth

In [None]:
import fastai
fastai.__version__

'2.0.16'

In [None]:
from fastai.text.all import *

In [None]:
class RNNCell(nn.Module):    

    def __init__(self, input_size, hidden_size):
        super(RNNCell, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih = nn.Linear(input_size, hidden_size)
        self.hh = nn.Linear(hidden_size, hidden_size)

    def forward(self, x, h = None):
        if h is None:
            h = torch.zeros(x.size(0), self.hidden_size)
        h = torch.tanh(self.ih(x) + self.hh(h))
        return h

In [None]:
#hide
cell = RNNCell(100, 300)
cell(torch.randn(1, 100)).shape

torch.Size([1, 300])

In [None]:
class RNN(nn.Module):    

    def __init__(self, input_size, hidden_size):
        super(RNN, self).__init__()
        self.cell = RNNCell(input_size, hidden_size)

    def forward(self, x, h = None):
        
        print(x.shape)
        for i in range(x.shape[1]):
            h = self.cell(x[:,i], h)
            
        return h

In [None]:
#hide
rnn = RNN(100, 300)
rnn(torch.randn(256, 10, 100)).shape

torch.Size([256, 10, 100])


torch.Size([256, 300])

In [None]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = RNN(hidden_size, hidden_size)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 1)
        
    def forward(self, x):
        
        x = self.emb(x)
        x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)
        
        return out

In [None]:
path = untar_data(URLs.IMDB)
dls = TextDataLoaders.from_folder(path, valid='test', bs=256)

In [None]:
dls.show_batch(max_n=5)

Unnamed: 0,text,category
0,"xxbos xxmaj match 1 : xxmaj tag xxmaj team xxmaj table xxmaj match xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley vs xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit xxmaj bubba xxmaj ray and xxmaj spike xxmaj dudley started things off with a xxmaj tag xxmaj team xxmaj table xxmaj match against xxmaj eddie xxmaj guerrero and xxmaj chris xxmaj benoit . xxmaj according to the rules of the match , both opponents have to go through tables in order to get the win . xxmaj benoit and xxmaj guerrero heated up early on by taking turns hammering first xxmaj spike and then xxmaj bubba xxmaj ray . a xxmaj german xxunk by xxmaj benoit to xxmaj bubba took the wind out of the xxmaj dudley brother . xxmaj spike tried to help his brother , but the referee restrained him while xxmaj benoit and xxmaj guerrero",pos
1,xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad,pos
2,xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad,pos
3,xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad,neg
4,xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad,pos


In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)

In [None]:
learn.fit(10)

In [None]:
import torch
??torch.nn.RNN

In [None]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        _, x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.695069,0.692872,0.51384,00:04
1,0.692819,0.685738,0.54736,00:04
2,0.694167,0.697343,0.46336,00:04
3,0.693335,0.690784,0.53104,00:04
4,0.69308,0.688063,0.54588,00:04
5,0.692383,0.692549,0.51124,00:05
6,0.689648,0.679611,0.57108,00:04
7,0.686122,0.688807,0.53192,00:04
8,0.677578,0.693136,0.53244,00:05
9,0.687349,0.686864,0.55256,00:04


### Bidirectional RNN

In [None]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size,
                          bidirectional=True, batch_first=True)
        self.fc1 = nn.Linear(hidden_size * 2, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        _, x = self.rnn(x)
        x = torch.cat((x[0], x[1]), dim=-1)
        x = self.fc1(x)
        out = self.fc2(x)

        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.682959,0.66315,0.60656,00:08
1,0.676049,0.67905,0.5666,00:08
2,0.659499,0.711453,0.54172,00:08
3,0.671983,0.681267,0.56264,00:08
4,0.63995,0.63645,0.6436,00:08
5,0.623661,0.645424,0.62492,00:08
6,0.625853,0.648754,0.6288,00:07
7,0.59096,0.616835,0.6646,00:08
8,0.599594,0.628637,0.66512,00:08
9,0.549145,0.599172,0.68388,00:08


### Sequence to Sequence Using RNNs

## Long Short Term Memory (LSTM)

In [None]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        x, _ = self.rnn(x)[1]
        x = self.fc1(x)
        out = self.fc2(x)
        
        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.692424,0.68663,0.54164,00:06
1,0.655113,0.609996,0.68164,00:05
2,0.626896,0.749849,0.52872,00:06
3,0.592931,0.597747,0.69408,00:05
4,0.528877,0.511302,0.76252,00:06
5,0.539924,0.553975,0.72092,00:05
6,0.475588,0.478583,0.78436,00:06
7,0.412415,0.451568,0.79832,00:06
8,0.397446,0.446312,0.80212,00:05
9,0.356494,0.406632,0.8218,00:06


## Gated Recurrent Units (GRU)

In [None]:
class TextClassifier(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(TextClassifier, self).__init__()
        self.emb = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 2)
        
    def forward(self, x):
        
        x = self.emb(x)
        _, x = self.rnn(x)
        x = self.fc1(x)
        out = self.fc2(x)
        
        return out

In [None]:
learn = Learner(dls, TextClassifier(len(dls.vocab[0]), 100),
                loss_func=CrossEntropyLossFlat(), 
                metrics=accuracy)
learn.fit(10)

epoch,train_loss,valid_loss,accuracy,time
0,0.68993,0.676298,0.57736,00:05
1,0.604351,0.529154,0.74056,00:05
2,0.50372,0.482746,0.78116,00:05
3,0.445455,0.418563,0.8146,00:05
4,0.372754,0.401952,0.8334,00:05
5,0.326986,0.349531,0.85188,00:05
6,0.292445,0.340987,0.85444,00:05
7,0.245959,0.350378,0.85912,00:05
8,0.260215,0.346354,0.8548,00:05
9,0.201884,0.315813,0.87916,00:05


## Conclusion: The Future of RNNs