In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
plt.rcParams["figure.dpi"] = 160

def remove_frame():
    for spine in plt.gca().spines.values():
        spine.set_visible(False)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

# Prepare data

In [None]:
n_samples = 10000
n_symbols = 5
max_length = 10

X = []
X_binary = []
y = []

for i in range(n_samples):
    x_i = np.random.randint(n_symbols, size=np.random.randint(max_length) + 1)
    len_i = len(x_i)
    
    if np.random.rand() < 0.5:
        if len_i % 2 == 0:
            x_i[:len_i//2] = x_i[len_i//2:][::-1]
        else:
            x_i[:len_i//2] = x_i[len_i//2+1:][::-1]
        y.append(1)
    else:
        if len_i % 2 == 0:
            if np.all(x_i[:len_i//2] == x_i[len_i//2:][::-1]):
                y.append(1)
            else:
                y.append(0)
        else:
            if np.all(x_i[:len_i//2] == x_i[len_i//2+1:][::-1]):
                y.append(1)
            else:
                y.append(0)

    X.append(x_i) 
            
for x_i in X:
    b = np.zeros((len(x_i), n_symbols))
    for j, x_ij in enumerate(x_i):
        b[j, x_ij] = 1
    X_binary.append(b)

In [None]:
print(X[:10])
print(y[:10])


In [None]:
from sklearn.model_selection import train_test_split
indices = np.array(range(len(X_binary)))
train, test = train_test_split(indices)

# RNN

In [None]:
class Elman(nn.Module):
    def __init__(self, num_features, num_hidden, num_layers=1):
        super(Elman, self).__init__()
        self.rnn = nn.RNN(num_features, num_hidden, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(num_hidden, 1)
    
    def forward(self, x):
        out, hn = self.rnn(x)
        if self.rnn.num_layers > 1:
            hn = hn[-1, :]
        out = self.fc(hn)
        return out.view(-1, 1).sigmoid()
    
class LSTM(nn.Module):
    def __init__(self, num_features, num_hidden, num_layers=1):
        super(LSTM, self).__init__()
        self.rnn = nn.LSTM(num_features, num_hidden, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(num_hidden, 1)
    
    def forward(self, x):
        out, (hn, cn) = self.rnn(x)
        out = self.fc(hn)
        return out.view(-1, 1).sigmoid()
    
class GRU(nn.Module):
    def __init__(self, num_features, num_hidden, num_layers=1):
        super(GRU, self).__init__()
        self.rnn = nn.GRU(num_features, num_hidden, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(num_hidden, 1)
    
    def forward(self, x):
        out, hn = self.rnn(x)
        out = self.fc(hn)
        return out.view(-1, 1).sigmoid()
    
class BiGRU(nn.Module):
    def __init__(self, num_features, num_hidden, num_layers=1):
        super(BiGRU, self).__init__()
        self.rnn = nn.GRU(num_features, num_hidden, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(2*num_hidden, 1)
        
        self.num_hidden = num_hidden
    
    def forward(self, x):
        out, hn = self.rnn(x)

        if self.rnn.num_layers > 1:
            hn = hn[-2:, :]
        
        out = self.fc(hn.view(-1, 2*self.num_hidden))
        return out.view(-1, 1).sigmoid()

In [None]:
test_curves = {}
models = {}

In [None]:
for model, name in [(Elman(n_symbols, 10), "elman"),
                    (Elman(n_symbols, 10, num_layers=2), "elman-stacked"),
                    (LSTM(n_symbols, 10), "lstm"),
                    (GRU(n_symbols, 10), "gru"),]:
    
    models[name] = model
    criterion = nn.BCELoss()  
    optimizer = torch.optim.Adam(model.parameters(), amsgrad=True)  

    num_epochs = 25 
    test_loss = []

    l = 0
    for i in test:
        x_i = torch.Tensor(X_binary[i:i+1])
        y_i = torch.Tensor(y[i:i+1]).reshape((-1, 1))
        
        outputs = model(x_i)
        loss = criterion(outputs, y_i)
        l += loss       

    print('Epoch: [%d/%d], Step: Loss: %.4f' 
          % (0, num_epochs, l / len(test)))
    test_loss.append(l / len(test))

    for epoch in range(num_epochs):
        for i in train:       
            optimizer.zero_grad()

            x_i = torch.Tensor(X_binary[i:i+1])
            y_i = torch.Tensor(y[i:i+1]).reshape((-1, 1))
            outputs = model(x_i)
            loss = criterion(outputs, y_i)
            loss.backward()

            optimizer.step()

        l = 0
        for i in test:
            x_i = torch.Tensor(X_binary[i:i+1])
            y_i = torch.Tensor(y[i:i+1]).reshape((-1, 1))
            outputs = model(x_i)
            loss = criterion(outputs, y_i)
            l += loss       

        print('Epoch: [%d/%d], Step: Loss: %.4f' 
              % (epoch, num_epochs, l / len(test)))
        test_loss.append(l / len(test))
        
    test_curves[name] = np.array([v.detach().numpy() for v in test_loss])

In [None]:
test_curves

# Plots

In [None]:
plt.plot(range(num_epochs+1), test_curves["elman"], c="r", label="Elman")
plt.plot(range(num_epochs+1), test_curves["elman-stacked"], "r--", label="Elman 2-layer")
plt.plot(range(num_epochs+1), test_curves["lstm"], c="b", label="LSTM")
plt.plot(range(num_epochs+1), test_curves["gru"], c="g", label="GRU")
plt.ylim(0,0.75)
plt.grid()
plt.legend()
remove_frame()
plt.savefig("palindrome-1.png")
plt.show()

In [None]:
X_test = []
y_test = []

for i in range(25000):
    x_i = np.random.randint(n_symbols, size=np.random.randint(2*max_length) + 1)
    len_i = len(x_i)
    
    if np.random.rand() < 0.5:
        if len_i % 2 == 0:
            x_i[:len_i//2] = x_i[len_i//2:][::-1]
        else:
            x_i[:len_i//2] = x_i[len_i//2+1:][::-1]
        y_test.append(1)
    else:
        if len_i % 2 == 0:
            if np.all(x_i[:len_i//2] == x_i[len_i//2:][::-1]):
                y_test.append(1)
            else:
                y_test.append(0)
        else:
            if np.all(x_i[:len_i//2] == x_i[len_i//2+1:][::-1]):
                y_test.append(1)
            else:
                y_test.append(0)

    X_test.append(x_i) 

In [None]:
X_binary_test = []

for x_i in X_test:
    b = np.zeros((len(x_i), n_symbols))
    for j, x_ij in enumerate(x_i):
        b[j, x_ij] = 1
    X_binary_test.append(b)

In [None]:
model = models["elman"]
l = np.zeros(2*max_length)
counters = np.zeros(2*max_length)

for i in range(len(X_test)):
    x_i = torch.Tensor(X_binary_test[i:i+1])
    y_i = torch.Tensor(y_test[i:i+1]).reshape((-1, 1))
    outputs = model(x_i)
    loss = criterion(outputs, y_i)
    l[len(x_i[0])-1] += loss   
    counters[len(x_i[0])-1] += 1
    
plt.plot(range(1,2*max_length+1),l/counters, color="r", marker="o", label="Elman")

model = models["elman-stacked"]
l = np.zeros(2*max_length)
counters = np.zeros(2*max_length)

for i in range(len(X_test)):
    x_i = torch.Tensor(X_binary_test[i:i+1])
    y_i = torch.Tensor(y_test[i:i+1]).reshape((-1, 1))
    outputs = model(x_i)
    loss = criterion(outputs, y_i)
    l[len(x_i[0])-1] += loss   
    counters[len(x_i[0])-1] += 1
    
plt.plot(range(1,2*max_length+1),l/counters, "r--", marker="o", label="Elman 2-layer")

model = models["lstm"]
l = np.zeros(2*max_length)
counters = np.zeros(2*max_length)

for i in range(len(X_test)):
    x_i = torch.Tensor(X_binary_test[i:i+1])
    y_i = torch.Tensor(y_test[i:i+1]).reshape((-1, 1))
    outputs = model(x_i)
    loss = criterion(outputs, y_i)
    l[len(x_i[0])-1] += loss   
    counters[len(x_i[0])-1] += 1
    
plt.plot(range(1,2*max_length+1),l/counters, color="b", marker="o", label="LSTM")

model = models["gru"]
l = np.zeros(2*max_length)
counters = np.zeros(2*max_length)

for i in range(len(X_test)):
    x_i = torch.Tensor(X_binary_test[i:i+1])
    y_i = torch.Tensor(y_test[i:i+1]).reshape((-1, 1))
    outputs = model(x_i)
    loss = criterion(outputs, y_i)
    l[len(x_i[0])-1] += loss   
    counters[len(x_i[0])-1] += 1
    
plt.plot(range(1,2*max_length+1),l/counters, color="g", marker="o", label="GRU")

plt.legend()
plt.ylim(0,0.75)
plt.grid()
remove_frame()
plt.savefig("length-4.png")
plt.show()