# LSTM to Seq2Seq
- I will try to use medical examples and apply lstm or seq2seq

# LSTM
- lstm cell

![lstm](https://miro.medium.com/max/900/1*s7_EO0rjXAw99RnH1x4s_g.png)
- lstm cell takes 3 input
    - cell state from $t-1$
    - hidden state from $t-1$
    - current input $x_t$

- lstm cell output 
    - cell state from $t$
    - hidden state from $t$
    - output from $t$



In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F

use sample data

input : length 1 with 10 dimension\
output : length 1 20 dimension

In [28]:
rnn = nn.LSTM(10, 20, 1) # (input_size, hidden_size, num_layers)
input = torch.randn(1, 1, 10) # (Series Length, Batch size, input_dim)
h0 = torch.randn(1, 1, 20) # (Series Length, Batch size, input_dim)

In [29]:
rnn.input_size, rnn.hidden_size, rnn.num_layers
# input size , hidden size,  number of layers

(10, 20, 1)

In [30]:
output = rnn(input)

len(output) # length of 2

2

In [31]:
output[0], output[1]

# output 0 : 1,1,20 vector. output of layer
# output 1 : (output and the hidden output & current state output)

(tensor([[[ 0.0863,  0.0783, -0.0632, -0.2114, -0.0293, -0.1373,  0.0518,
            0.0394,  0.1196, -0.1581, -0.0976, -0.0430,  0.0953,  0.1388,
           -0.0117, -0.0956,  0.0860, -0.0165, -0.0935, -0.1458]]],
        grad_fn=<StackBackward0>),
 (tensor([[[ 0.0863,  0.0783, -0.0632, -0.2114, -0.0293, -0.1373,  0.0518,
             0.0394,  0.1196, -0.1581, -0.0976, -0.0430,  0.0953,  0.1388,
            -0.0117, -0.0956,  0.0860, -0.0165, -0.0935, -0.1458]]],
         grad_fn=<StackBackward0>),
  tensor([[[ 0.1549,  0.1691, -0.2255, -0.3411, -0.1421, -0.2021,  0.0807,
             0.0683,  0.1738, -0.3337, -0.1766, -0.1121,  0.2311,  0.2966,
            -0.0269, -0.2441,  0.2428, -0.0286, -0.1672, -0.2226]]],
         grad_fn=<StackBackward0>)))

What if we changed the sequence of the vector to 2?

In [32]:
input = torch.rand(2, 1, 10) # sequence length 2, batch_size, 1, embedding size 10
rnn = nn.LSTM(10, 20, 1) # same model as before

In [33]:
output = rnn(input) 

output[0], output[1] 

(tensor([[[ 0.0048,  0.0252, -0.0662,  0.0445, -0.0235,  0.0417,  0.0912,
           -0.1003,  0.1035,  0.0425,  0.0040,  0.0461, -0.1268, -0.1497,
            0.0053,  0.0726, -0.0384,  0.0635,  0.0502, -0.0389]],
 
         [[-0.0363,  0.0357, -0.0924,  0.0851,  0.0062,  0.0713,  0.0575,
           -0.1367,  0.0891,  0.0867,  0.0131,  0.0277, -0.1554, -0.1826,
           -0.0610,  0.0499, -0.0329,  0.1127,  0.0623, -0.0623]]],
        grad_fn=<StackBackward0>),
 (tensor([[[-0.0363,  0.0357, -0.0924,  0.0851,  0.0062,  0.0713,  0.0575,
            -0.1367,  0.0891,  0.0867,  0.0131,  0.0277, -0.1554, -0.1826,
            -0.0610,  0.0499, -0.0329,  0.1127,  0.0623, -0.0623]]],
         grad_fn=<StackBackward0>),
  tensor([[[-0.0592,  0.0775, -0.1861,  0.1817,  0.0103,  0.1715,  0.1263,
            -0.3140,  0.1404,  0.1599,  0.0260,  0.0503, -0.3637, -0.3371,
            -0.1354,  0.0950, -0.0755,  0.2138,  0.1307, -0.1480]]],
         grad_fn=<StackBackward0>)))

In [34]:
output[0].shape 
# The hidden state output. 

# (Length 2, batch, embed_size)

torch.Size([2, 1, 20])

In [36]:
output[1]

# (hidden state : (sequence_size, batch_size, embedding), current_state : (sequence_size, batch_size, embedding))

(tensor([[[-0.0363,  0.0357, -0.0924,  0.0851,  0.0062,  0.0713,  0.0575,
           -0.1367,  0.0891,  0.0867,  0.0131,  0.0277, -0.1554, -0.1826,
           -0.0610,  0.0499, -0.0329,  0.1127,  0.0623, -0.0623]]],
        grad_fn=<StackBackward0>),
 tensor([[[-0.0592,  0.0775, -0.1861,  0.1817,  0.0103,  0.1715,  0.1263,
           -0.3140,  0.1404,  0.1599,  0.0260,  0.0503, -0.3637, -0.3371,
           -0.1354,  0.0950, -0.0755,  0.2138,  0.1307, -0.1480]]],
        grad_fn=<StackBackward0>))

# Bi-LSTM models with some dataset
- dataset : Tabular data - titanic

In [6]:
from pathlib import Path
import pandas as pd

project_dir = Path.cwd().parent
data_dir = project_dir.joinpath('data')

data = data_dir.joinpath('train.csv')
data = pd.read_csv(data)


In [7]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [17]:
# first tokenize all the values in the dataset
data.values

from torchtext.vocab import build_vocab_from_iterator
from collections import Counter, OrderedDict

vocab = build_vocab_from_iterator(data.values.astype('str'), specials = ["<unk>"])

In [None]:
from torchtext.transforms import VocabTransform

vocab_transform = VocabTransform(vocab)

vocab_transform(data.values[0].astype('str'))

In [44]:
Path().exists()

True

In [36]:
vocab_transform(data.values[0].astype('str').reshape(1, -1))

  if obj == [] or obj == {} or obj == ():


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

# Sequence to Sequence
- 