# Two demos code blocks for computation of input,hidden,... sizes 

##Use RNN Cell


This block serves as a little demo for computation & verification of input output hidden size seq_len etc. **in senerio of using RNN Cell**

In [None]:
import torch

# PARAMERTER SETTINGS

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

# Here using RNNCell
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)

# Create ａ　dataset :(seq, batch, input_size)
dataset = torch.randn(seq_len, batch_size, input_size)

# Create h_0 for start
hidden = torch.zeros(batch_size, hidden_size) 

# iterate over seq_len = 3
for idx, input in enumerate(dataset,0):   
  with torch. no_grad():
    print('=' * 20, idx, '=' * 20)
    print('Input size: ', input.shape)
    hidden = cell(input, hidden)
    print('outputs size: ', hidden.shape)
    print(hidden)

Input size:  torch.Size([1, 4])
outputs size:  torch.Size([1, 2])
tensor([[-0.4599, -0.8891]])
Input size:  torch.Size([1, 4])
outputs size:  torch.Size([1, 2])
tensor([[ 0.1246, -0.9255]])
Input size:  torch.Size([1, 4])
outputs size:  torch.Size([1, 2])
tensor([[-0.1339,  0.1697]])


##Use RNN 

This block serves as a little demo for computation & verification of input output hidden size seq_len etc. in senerio of using RNN Module

In [None]:
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers)
# (seqLen, batchSize, inputSize)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)
with torch. no_grad():
  out, hidden = cell(inputs, hidden)
  print('Output size:', out.shape)
  print('Output:', out)
  print('Hidden size: ', hidden.shape)
  print('Hidden: ', hidden)


Output size: torch.Size([3, 1, 2])
Output: tensor([[[ 0.5776, -0.1484]],

        [[ 0.8148, -0.9833]],

        [[ 0.8532, -0.9952]]])
Hidden size:  torch.Size([1, 1, 2])
Hidden:  tensor([[[ 0.8532, -0.9952]]])


In [None]:
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers,batch_first=True)
# (seqLen, batchSize, inputSize)
inputs = torch.randn( batch_size,seq_len, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)
with torch. no_grad():
  out, hidden = cell(inputs, hidden)
  print('Output size:', out.shape)
  print('Output:', out)
  print('Hidden size: ', hidden.shape)
  print('Hidden: ', hidden)


Output size: torch.Size([1, 3, 2])
Output: tensor([[[ 0.1294,  0.4429],
         [-0.0772, -0.4156],
         [-0.6343, -0.9377]]])
Hidden size:  torch.Size([1, 1, 2])
Hidden:  tensor([[[-0.6343, -0.9377]]])


# Exercise

Creating two network using RNN cells and RNN Module respectively to mapping **'hello**' to '**ohlol**'

## 1. Using RNN Cells

RNN Cells is more complex to implement, recommendated is to use RNN Module instead 

In [3]:
import torch
import numpy as np

# Parameters
input_size = 4
hidden_size = 4
batch_size = 1 # currently 1 
#num_layers = 2

####1.1word embedding

In [5]:

idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello   input
y_data = [3, 1, 2, 3, 2] # ohlol   target

# one_hot_lookup = [[1, 0, 0, 0],      
# [0, 1, 0, 0],
# [0, 0, 1, 0],
# [0, 0, 0, 1]]
one_hot_lookup = np.eye(4).tolist() #   Smarter way : create numpy array and transfer it to list
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)  # seq_len,batch_sze,input_size
labels = torch.LongTensor(y_data).view(-1, 1)  # seq_len, 1






tensor([[3],
        [1],
        [2],
        [3],
        [2]])

**None related** : way to mapping index to item

In [None]:

List1 =["a", "b", "c","d"]
list2=[3,1,0,2]

list3=[List1[x]for x in list2]
list3

['d', 'b', 'a', 'c']

#### 1.2 Creating Model using RNNcells

In [None]:




class Model(torch.nn.Module):
  def __init__(self, input_size, hidden_size, batch_size,):
    super(Model, self).__init__()
    self.batch_size = batch_size
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
                   hidden_size=self.hidden_size,
                   )
    
  def forward(self, input,hidden):   # how to built in this structure with multi layers ? 
    hidden = self.rnncell(input, hidden) 
    return hidden
  
  def init_hidden(self):

    return torch.zeros(self.batch_size,self.hidden_size)




In [None]:
# Initialize Model &Loss & Optimizer
RNNNet_with_rnncells = Model(input_size, hidden_size, batch_size)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(RNNNet_with_rnncells.parameters(), lr=0.01)


#### 1.3 Trainning Cycle

* why loss can be computed with label being a index and hidden as a vector of n=4 ??





* Iterate over seq_lenth, beacuse : inputs = seq_len, batch_size, input_size, labels = (seq_len.1)

In [None]:
for epoch in range(30):
  loss = 0
  optimizer.zero_grad()
  hidden = RNNNet_with_rnncells.init_hidden()
  print('Predicted string: ', end='')
  for input, label in zip(inputs, labels): #input: batch_size * input_size

    # For Debugging Purpose

    #print(label.shape)
    hidden = RNNNet_with_rnncells(input, hidden)
    #print(hidden.shape)
    loss += criterion(hidden, label)   # 1.no.item() at the end beacause we need to build computatioon map 2. why loss can be computed here ?
    _, idx = hidden.max(dim=1)
    print(idx2char[idx.item()], end='')
  loss.backward()
  optimizer.step()
  print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

## 2. Using RNN Module

#### 2.1 Word Embedding




In [6]:
import torch
import numpy as np

# Parameters
input_size = 4
hidden_size = 4
batch_size = 1 # currently 1 
num_layers = 2

torch.zeros(num_layers,batch_size,hidden_size).shape


# word = ['h','e','l','l','0']
# target = ['o','h','l','o','l']

idx2char = ['e','h','l','o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

# transform to one hot vector

one_hot_coder = np.eye(4).tolist()

inputs = [one_hot_coder[x]for x in x_data]
inputs = torch.Tensor(inputs).view(-1,batch_size,input_size)
targets = torch.LongTensor(y_data)  #  keep it as a tensor , you dont have to reshape it, because now we compute loss for one sequence at one time! 



#### 2.2 Creating Model using RNN Module

documents: https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

In [7]:
class Model_RNN_MODULE(torch.nn.Module):
  def __init__(self, input_size, hidden_size,batch_size,  num_layers=1):
    super(Model_RNN_MODULE, self).__init__()   #  IF you changed class name , you should also change content of this line
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.batch_size = batch_size

    self.rnnmodule = torch.nn.RNN(input_size=self.input_size,
                   hidden_size=self.hidden_size,
                   num_layers=self.num_layers
                   )
    
  def forward(self, input): 
    hidden = torch.zeros(self.num_layers,self.batch_size,self.hidden_size)   # follow this sequecen num_layers , batch_size, hidden_size
    out, _ = self.rnnmodule(input, hidden)   # output:(seq_L,Hidden_size) ; _.aka: h_n (num_layers * Hidden_size)
    return out.view(-1,self.hidden_size)   # the purpose of using view is to fit setting of to bidirectional network, otherwise out is of good shape


In [8]:
# Initialize Model &Loss & Optimizer
RNNNet_with_rnnmodule = Model_RNN_MODULE(input_size, hidden_size,batch_size=1,num_layers=2)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(RNNNet_with_rnnmodule.parameters(), lr=0.05)


#### 2.3 Trainning Cycle



In [10]:
for epoch in range(5):
  optimizer.zero_grad()
  predicts = RNNNet_with_rnnmodule(inputs)  # feed the whole inputs including seq_len at one time
  
  # For only Debugging Purpose
  print(targets.shape)
  print(predicts.shape)

  loss = criterion(predicts,targets)   #  compare predict with all labels along seq_lenth
  loss.backward()
  optimizer.step()
  #print(predicts.shape)
  _, idx = predicts.max(dim=1)
  idx = idx.data.numpy()
  #print(idx.shape)
  print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
  print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))



torch.Size([5])
torch.Size([5, 4])
Predicted:  ohlol, Epoch [1/15] loss = 0.344
torch.Size([5])
torch.Size([5, 4])
Predicted:  ohlol, Epoch [2/15] loss = 0.343
torch.Size([5])
torch.Size([5, 4])
Predicted:  ohlol, Epoch [3/15] loss = 0.343
torch.Size([5])
torch.Size([5, 4])
Predicted:  ohlol, Epoch [4/15] loss = 0.343
torch.Size([5])
torch.Size([5, 4])
Predicted:  ohlol, Epoch [5/15] loss = 0.343


## 3. Using embedding and linear layer

1. Embedding for dense representation
2. Linear Layer for hiddensize inequals to class_num

3. Afterr add Softmax layer at the end of the newt it still works well, No conflict between CrossEntropyLoss and Softamax (remember to reshape before sofmax)





#### 3.1 Data Preparation

In [None]:
import torch
import numpy as np

# Parameters
num_class = 4
input_size = 4
hidden_size = 8
batch_size = 1 # currently 1 
num_layers = 2
embedding_size = 10
seq_len = 5

torch.zeros(num_layers,batch_size,hidden_size).shape


# word = ['h','e','l','l','0']
# target = ['o','h','l','o','l']

idx2char = ['e','h','l','o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

# transform to one hot vector


inputs = torch.LongTensor(x_data).view(-1,batch_size)
targets = torch.LongTensor(y_data)  #  keep it as a tensor , you dont have to reshape it, because now we compute loss for one sequence at one time! 
inputs




tensor([[1],
        [0],
        [2],
        [2],
        [3]])

#### 3.0 Test of embedding block's setting

to make sure that, the embedding's setting fits to the further model settings
https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html

In [None]:
embedding = torch.nn.Embedding(num_embeddings=4, embedding_dim=10) 
# First Para:  num_embeddings is the number of indexes that need to be embedded respectively.here it must be greater 10 since input 
# has 10 indexe i.e. from 0(default exist) to 9 

# Second Para: embedding_dim indicates the size of each embedded vector


# a batch of 2 samples of 4 indices each
#_________________________________________________

#input = torch.LongTensor([[1,2,4,5],[4,3,2,9]]) #  sequen_lenth *  Btach_size *(1 elemnt index)
print('*'*20,' Each embedded index is a vector of size embedding_dim','*'*20)
embedded = embedding(inputs) # #  sequen_lenth *  Btach_size * embedding_dim
print('*'*20,"Input shape:sequen_lenth *  Btach_size *(1 elemnt index)",'*'*20)
print('Input shape:',inputs.shape)
print('Input',inputs)
print('*'*20,"embedded shape:sequen_lenth *  Btach_size * embedding_dim",'*'*20)
print('*'*20,"embedding_dim = input as one hot vectoc 's size for RNN",'*'*20)
print('embedded: shape:  ',embedded.shape)
print('embedded: ',embedded)

********************  Each embedded index is a vector of size embedding_dim ********************
******************** Input shape:sequen_lenth *  Btach_size *(1 elemnt index) ********************
Input shape: torch.Size([5, 1])
Input tensor([[1],
        [0],
        [2],
        [2],
        [3]])
******************** embedded shape:sequen_lenth *  Btach_size * embedding_dim ********************
******************** embedding_dim = input as one hot vectoc 's size for RNN ********************
embedded: shape:   torch.Size([5, 1, 10])
embedded:  tensor([[[ 1.2499,  0.8453, -0.5364,  1.0678, -1.8656, -0.0077,  0.1174,
           2.1582, -0.6782, -0.1061]],

        [[-1.2475, -0.1077, -0.1182,  1.0670,  0.4534, -0.7649, -0.7650,
           0.3220, -1.2432,  1.3134]],

        [[ 0.6486,  0.2889,  0.5669,  0.5795,  1.4389, -0.5236,  0.7366,
          -0.5423, -1.4522,  0.0178]],

        [[ 0.6486,  0.2889,  0.5669,  0.5795,  1.4389, -0.5236,  0.7366,
          -0.5423, -1.4522,  0.0178]]

#### 3.2 Creating Model using RNN Module + Embedding + Linear Layer


**Added a softmax layer before the output, this will still work,** and will not cause trouble in CrossEntropyLoss computation ( noramlly CNL computes softmax automatically, i.e. the data of your output of the model which you feed to CNL Loss wasn't normalized by softmax by the definition of CEL) , but if you choose to normalize , it still works well

In [None]:
class Model_RNN_EMBED(torch.nn.Module):
  def __init__(self, input_size, hidden_size,batch_size,num_embeddings, embedding_dim, num_class, num_layers=1):
    super(Model_RNN_EMBED, self).__init__()   #  IF you changed class name , you should also change content of this line
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.batch_size = batch_size
    self.embedding_dim = embedding_dim
    self.num_embeddings = num_embeddings
    self.num_class = num_class

    self.embedding = torch.nn.Embedding(num_embeddings, embedding_dim) 
    self.rnnmodule_emb = torch.nn.RNN(input_size = embedding_dim,
                   hidden_size = hidden_size,
                   num_layers = num_layers
                   )
    self.fc = torch.nn.Linear(hidden_size,num_class)
    
  def forward(self, input): 
    hidden = torch.zeros(num_layers,batch_size,hidden_size)   # follow this sequecen num_layers , batch_size, hidden_size
    input = self.embedding(input)
    out, _ = self.rnnmodule_emb(input, hidden)   # output:(seq_L,Hidden_size) ; _.aka: h_n (num_layers * Hidden_size)
    out = self.fc(out)

    #return out.view(-1,self.num_class)    this is the output if we skip normalization 


    #*********************************  SOFTMAX  ****************************************
    # If we choose  normalization ,then use softmax before out , it still works , But caution! , you need to RESHAPE output first before softmax
    out  = out.view(-1,self.num_class) 
    out = torch.nn.functional.softmax(out,dim=1)
    return out 


In [None]:
# Initialize Model &Loss & Optimizer
RNNNet_with_embed = Model_RNN_EMBED(input_size, hidden_size,batch_size=1,num_layers=2,num_embeddings=4,embedding_dim=embedding_size,num_class=4)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(RNNNet_with_embed .parameters(), lr=0.05)

#### 3.3 Trainning Cycle

In [None]:
for epoch in range(40):
  optimizer.zero_grad()
  predicts = RNNNet_with_embed(inputs)  # feed the whole inputs including seq_len at one time
  loss = criterion(predicts,targets)   #  compare predict with all labels along seq_lenth
  loss.backward()
  optimizer.step()
  #print(predicts.shape)
  _, idx = predicts.max(dim=1)
  idx = idx.data.numpy()
  #print(idx.shape)
  print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
  print(', Epoch [%d/50] loss = %.3f' % (epoch + 1, loss.item()))


Predicted:  lhlol, Epoch [1/50] loss = 1.059
Predicted:  lhlol, Epoch [2/50] loss = 0.983
Predicted:  ohlol, Epoch [3/50] loss = 0.916
Predicted:  ohlol, Epoch [4/50] loss = 0.857
Predicted:  ohlol, Epoch [5/50] loss = 0.814
Predicted:  ohlol, Epoch [6/50] loss = 0.785
Predicted:  ohlol, Epoch [7/50] loss = 0.769
Predicted:  ohlol, Epoch [8/50] loss = 0.760
Predicted:  ohlol, Epoch [9/50] loss = 0.754
Predicted:  ohlol, Epoch [10/50] loss = 0.751
Predicted:  ohlol, Epoch [11/50] loss = 0.749
Predicted:  ohlol, Epoch [12/50] loss = 0.748
Predicted:  ohlol, Epoch [13/50] loss = 0.747
Predicted:  ohlol, Epoch [14/50] loss = 0.746
Predicted:  ohlol, Epoch [15/50] loss = 0.746
Predicted:  ohlol, Epoch [16/50] loss = 0.746
Predicted:  ohlol, Epoch [17/50] loss = 0.745
Predicted:  ohlol, Epoch [18/50] loss = 0.745
Predicted:  ohlol, Epoch [19/50] loss = 0.745
Predicted:  ohlol, Epoch [20/50] loss = 0.745
Predicted:  ohlol, Epoch [21/50] loss = 0.745
Predicted:  ohlol, Epoch [22/50] loss = 0.7

##for Multi-Classification  how to get index of output form probability tensor ：output 


```
# what,idx = outputs.max(dim=1)
```



what 包含所有最大值， idx 为那维度各个位置的最大值对应的index


In [None]:

outputs = torch.randn(5,5)
print(outputs)
what,idx = outputs.max(dim=1)  # 返回 dim=1 方向上的 最大值组成的tensor放在what中，把他们的index放在idx中
print(what)
print(idx)

print("*"*20,"  create a tensor filled with random number and apply max to it ，so the method is verified")

output_size = outputs.shape # get outputs's size

test2 =torch.randn(output_size) # create a random tensor of outputs's size

print(test2)
max_test2,idx = test2.max(dim=1)

print(max_test2) # get it's maxima
print(idx)# see its index of maxima


tensor([[-1.0618, -0.1526, -0.2571,  0.3936,  0.5633],
        [ 0.6730,  0.1595, -0.0259, -1.2831, -0.5916],
        [-1.8953, -0.6111,  0.9884,  0.5086, -0.1596],
        [ 0.1374, -0.0319,  0.1647, -0.1492, -0.2406],
        [-0.5378, -1.8251, -0.9900,  1.4081,  0.4630]])
tensor([0.5633, 0.6730, 0.9884, 0.1647, 1.4081])
tensor([4, 0, 2, 2, 3])
********************   create a tensor filled with random number and apply max to it ，so the method is verified
tensor([[ 0.2991, -1.4580, -0.4263,  1.4588,  0.0297],
        [-0.5792,  0.9098,  0.4497, -0.7781,  1.3429],
        [-0.1592,  0.9286,  0.3492, -0.8756, -1.2354],
        [ 1.8106, -0.1697,  1.1576,  0.4994, -0.6966],
        [-1.5289,  0.4375, -0.8623, -1.1259,  0.4140]])
tensor([1.4588, 1.3429, 0.9286, 1.8106, 0.4375])
tensor([3, 4, 1, 0, 1])


## Realization of Mapping indexes to values

idx is stored in  a list ，
```
idx = [3,2,0,3]
idx2char = ['e','h','l','o']
# print( ''.join([idx2char[x] for x in idx]), end='')
```




In [None]:
idx = [3,2,0,3]
idx2char = ['e','h','l','o']
print( ''.join([idx2char[x] for x in idx]), end='')

oleo