## **Introduction** 
In this project i'm going to implement a novel recurrent network model called **Shuffling Recurrent Neural Network (SRNN)**.
In this model the hidden state id defined as:
$$
h_t = \sigma(W_p h_{t-1}+b(x_t))
$$
where $\sigma$ is the activetion function and $W_p$ is a fixed permutation matrix, for example:
$$
W_p = \begin{pmatrix}
0 & 1 & \dots & 0 & 0 \\
0 & \ddots & \ddots & \ddots & 0\\
\vdots & \ddots & \ddots & \ddots & \vdots\\
0 & \ddots & \ddots & \ddots & 1\\
1 & 0 & \dots & 0 & 0\\
\end{pmatrix}
$$
$$
b(x_t) = f_r(x_t)\odot sigmoid(W_s x_t + b_s)
$$

## **Import Library**

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import tqdm

## **Model**

$$
h_t = \sigma(W_p h_{t-1}+b(x_t))
$$
where $\sigma$ is the activetion function
$$
b(x_t) = f_r(x_t)\odot sigmoid(W_s x_t + b_s)
$$

In [None]:
class SRNNHidden(nn.Module):
  def __init__(self, inputSize, hiddenSize, numLayer, outputSize):
    super().__init__()
    self.gatBranch = nn.Linear(inputSize,hiddenSize)
    listaLayer = [nn.Linear(inputSize,hiddenSize),nn.ReLU()]
    for i in range(numLayer-1):
      listaLayer.append(nn.Linear(hiddenSize,hiddenSize))
      listaLayer.append(nn.ReLU())
    self.fr = nn.Sequential(*listaLayer)
    self.wp = torch.cat((torch.eye(hiddenSize)[1:],torch.eye(hiddenSize)[0].reshape(1,hiddenSize)))
    self.lastLayer = nn.Linear(hiddenSize,outputSize)
    self.inputSize = inputSize

  def forward(self, x, h = None):
    batchSize = x.shape[0]
    if self.inputSize == 1:
      x = torch.tensor(torch.reshape(x,(batchSize,1)),dtype=torch.float)
    if h == None:
      h = self.fr(x)*F.sigmoid(self.gatBranch(x))
    else:
      h = torch.matmul(h,self.wp)+self.fr(x)*F.sigmoid(self.gatBranch(x))
      
    return self.lastLayer(h),h


class SRNN(nn.Module):
  def __init__(self, inputSize, hiddenSize, numLayer, outputSize):
    super().__init__()
    self.srnnHidden = SRNNHidden(inputSize, hiddenSize, numLayer, outputSize)
    

  def forward(self, x):
    lenRNN = x.shape[1]
    h = None
    for i in range(lenRNN):
      lastLayer , h = self.srnnHidden(x[:,i],h) 
    return lastLayer

## **Dataset**

In [None]:
class AddingProblemDataset(Dataset):
    def __init__(self, ds_size=1000, sample_len=50):
        super().__init__()
        self.sample_len = sample_len
        self.ds_size = ds_size

    def generate_sample(self, num_samples):
        X_value = np.random.uniform(low=0, high=1, size=(self.sample_len, 1))
        X_mask = np.zeros((self.sample_len, 1))
        half = int(self.sample_len / 2)
        first_i = np.random.randint(half)
        second_i = np.random.randint(half) + half
        X_mask[(first_i, second_i), 0] = 1
        Y = np.sum(X_value[(first_i, second_i), 0])
        X = np.concatenate((X_value, X_mask), 1)
        return X, Y

    def __getitem__(self, item):
        return [torch.tensor(x, dtype=torch.float) for x in self.generate_sample(1)]

    def __len__(self):
        return self.ds_size

In [None]:
sampleLen = 200
batchSize = 50
dataset = DataLoader(AddingProblemDataset(ds_size=100*batchSize, sample_len=sampleLen),batch_size=batchSize)

In [None]:
xb, yb = next(iter(dataset))
print(xb[:,0].shape)

torch.Size([50, 2])


In [None]:
print(xb[:,199])

tensor([[0.7535, 0.0000],
        [0.6378, 0.0000],
        [0.8023, 0.0000],
        [0.9802, 0.0000],
        [0.9236, 0.0000],
        [0.8213, 0.0000],
        [0.8021, 0.0000],
        [0.4632, 0.0000],
        [0.0925, 0.0000],
        [0.8633, 0.0000],
        [0.6354, 0.0000],
        [0.0670, 0.0000],
        [0.8477, 0.0000],
        [0.4796, 0.0000],
        [0.3050, 0.0000],
        [0.8500, 0.0000],
        [0.0214, 0.0000],
        [0.9368, 0.0000],
        [0.4937, 0.0000],
        [0.4837, 0.0000],
        [0.8220, 0.0000],
        [0.3104, 0.0000],
        [0.4876, 0.0000],
        [0.0238, 0.0000],
        [0.2550, 0.0000],
        [0.8365, 0.0000],
        [0.2366, 0.0000],
        [0.3313, 0.0000],
        [0.3741, 0.0000],
        [0.8685, 0.0000],
        [0.7626, 0.0000],
        [0.7973, 0.0000],
        [0.1178, 0.0000],
        [0.7575, 0.0000],
        [0.9192, 0.0000],
        [0.6698, 0.0000],
        [0.2995, 0.0000],
        [0.4118, 0.0000],
        [0.8

In [None]:
net = SRNN(2,128,8,1)

In [None]:
loss = nn.MSELoss()
opt = torch.optim.Adam(net.parameters())

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
xb, yb = next(iter(dataset))
ypred = net(xb)
print(loss(ypred.squeeze(), yb))

tensor(3.1888, grad_fn=<MseLossBackward0>)


In [None]:
for epoch in range(10):
  net.train()
  for xb, yb in tqdm.tqdm(dataset):
    
    xb, yb = xb.to(device), yb.to(device)
    opt.zero_grad()
    ypred = net(xb)
    l = loss(ypred.squeeze(), yb)
    l.backward()
    opt.step()

  net.eval()
  print(f'Accuracy at epoch {epoch}: {l}')

  0%|          | 0/100 [00:00<?, ?it/s]


NameError: ignored

In [None]:
print(ypred.squeeze())
print(yb)

tensor([1.1435, 0.6825, 1.1569, 0.4024, 1.5190, 0.7160, 0.8380, 1.2092, 0.7578,
        1.3829, 1.3773, 1.7458, 0.5910, 1.5378, 1.2689, 1.2294, 0.4931, 1.0765,
        0.8271, 0.4146, 1.0395, 0.6979, 1.8138, 1.0913, 0.9922, 0.4964, 0.7114,
        0.5122, 1.0945, 1.0984, 0.9331, 0.3317, 0.7493, 0.5787, 1.5046, 1.2838,
        1.3606, 1.0465, 1.1303, 0.4454, 1.1611, 0.9078, 0.8551, 0.9981, 0.4628,
        0.5250, 0.3774, 0.5348, 1.0372, 0.8259], grad_fn=<SqueezeBackward0>)
tensor([1.1120, 0.6453, 1.2261, 0.4101, 1.5449, 0.7189, 0.8389, 1.3033, 0.7926,
        1.4806, 1.3964, 1.6161, 0.5752, 1.4688, 1.2333, 1.2208, 0.5378, 1.1619,
        0.7926, 0.4185, 1.0045, 0.7123, 1.6930, 1.1651, 0.9513, 0.4966, 0.7605,
        0.5358, 1.1799, 1.1542, 0.9382, 0.2806, 0.7964, 0.5965, 1.5807, 1.3630,
        1.4300, 1.0673, 1.0873, 0.4405, 1.1197, 0.9074, 0.8598, 1.0608, 0.4643,
        0.5172, 0.3734, 0.5291, 1.0209, 0.8503])


## **Copying Memory Problem**

In [None]:
class CopyingMemoryProblemDataset(Dataset):
    def __init__(self, ds_size=1000, sample_len=50):
        super().__init__()
        self.sample_len = sample_len + 20
        self.ds_size = ds_size

    def generate_sample(self, num_samples):
        assert (self.sample_len > 20)  # must be

        X = np.zeros((self.sample_len, 1))
        data = np.random.randint(low=1, high=9, size=(10, 1))
        X[:10] = data
        X[-11] = 9
        Y = np.zeros((self.sample_len, 1))
        Y = X[:10]
        return X, Y

    def __getitem__(self, item):
        return [torch.tensor(x) for x in self.generate_sample(1)]#[torch.tensor(x, dtype=torch.long) for x in self.generate_sample(1)]

    def __len__(self):
        return self.ds_size

In [None]:
datasetCopying = DataLoader(CopyingMemoryProblemDataset(),batch_size=20) 

In [None]:
xb, yb = next(iter(datasetCopying))
print(xb.shape)
print(yb.shape)

torch.Size([1, 70, 1])
torch.Size([1, 10, 1])


In [None]:
print(xb[0,:10])
print(xb[0])

tensor([[2.],
        [1.],
        [7.],
        [1.],
        [1.],
        [6.],
        [3.],
        [7.],
        [6.],
        [5.]], dtype=torch.float64)
tensor([[2.],
        [1.],
        [7.],
        [1.],
        [1.],
        [6.],
        [3.],
        [7.],
        [6.],
        [5.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [9.]

In [None]:
vet = torch.Tensor([[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]])
print(vet.shape)
print(vet)

torch.Size([1, 20])
tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.,
         15., 16., 17., 18., 19., 20.]])


In [None]:
netCopying = SRNN(1,128,8,10)


In [None]:
a = torch.rand(20,1)
b = torch.rand(1,128)
torch.matmul(a,b)

tensor([[0.0728, 0.0308, 0.0018,  ..., 0.1982, 0.1562, 0.0268],
        [0.1462, 0.0619, 0.0035,  ..., 0.3982, 0.3138, 0.0538],
        [0.2677, 0.1133, 0.0065,  ..., 0.7291, 0.5746, 0.0984],
        ...,
        [0.2569, 0.1087, 0.0062,  ..., 0.6997, 0.5515, 0.0945],
        [0.2956, 0.1251, 0.0071,  ..., 0.8050, 0.6345, 0.1087],
        [0.2532, 0.1072, 0.0061,  ..., 0.6896, 0.5435, 0.0931]])

In [None]:
lossCopying = nn.CrossEntropyLoss()
optCopying = torch.optim.Adam(netCopying.parameters())

In [None]:
for epoch in range(10):
  netCopying.train()
  for xb, yb in tqdm.tqdm(datasetCopying):
    
    xb, yb = xb.to(device), yb.to(device)
    optCopying.zero_grad()
    ypred = netCopying(xb)
    #print(ypred.squeeze().shape)
    #print(yb.squeeze().shape)
    l = lossCopying(ypred.squeeze(), yb.squeeze())
    l.backward()
    optCopying.step()

  netCopying.eval()
  print(f'Accuracy at epoch {epoch}: {l}')

  x = torch.tensor(torch.reshape(x,(batchSize,1)),dtype=torch.float)
100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Accuracy at epoch 0: 94.61576467752457


100%|██████████| 50/50 [00:08<00:00,  5.98it/s]


Accuracy at epoch 1: 104.3160625398159


100%|██████████| 50/50 [00:07<00:00,  6.57it/s]


Accuracy at epoch 2: 96.6832513153553


100%|██████████| 50/50 [00:08<00:00,  5.89it/s]


Accuracy at epoch 3: 97.07413845658303


100%|██████████| 50/50 [00:07<00:00,  6.37it/s]


Accuracy at epoch 4: 106.04366326928138


100%|██████████| 50/50 [00:08<00:00,  6.06it/s]


Accuracy at epoch 5: 102.15806597471237


100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Accuracy at epoch 6: 99.31982206702233


100%|██████████| 50/50 [00:07<00:00,  6.70it/s]


Accuracy at epoch 7: 96.4642130613327


100%|██████████| 50/50 [00:08<00:00,  5.94it/s]


Accuracy at epoch 8: 95.63468106389045


100%|██████████| 50/50 [00:08<00:00,  5.84it/s]

Accuracy at epoch 9: 92.84656897783279



