#### Exercise
![emo](../../img/lstm_history.jpg)

Use LSTM learn the intra speaker utterances - sequence features
- select all the utterances of each user in a conversation 
- stack all vectors of all user and forward via LSTM 
- add back the vectors fused by LSTM architecture to list of utterances. => `utterance_vector_fused_by_speaker_history`
- Use `utterance_vector_fused_by_speaker_history` as a new features concat with `y_hat`  before Linear to the output layer. 

The `speaker_history_model_by_lstm` modeling learn relations of all utterances of one speaker sequentialy. 

In [116]:
import torch 
from torch import nn 
import random
import torch.nn.functional as F
import numpy as np

d_model = 1024
embedding_size = d_model
batch_size = 1
sequence_length = 7


# ================ 
# fake input data 
def set_random_seed(seed: int):
    """set seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_random_seed(2)

fake_utterance_vector_from_bert = torch.rand(batch_size, sequence_length, embedding_size)
intra_speaker_masekd_all = torch.BoolTensor(batch_size, sequence_length, sequence_length)
labels = torch.LongTensor(torch.randint(0,5, size=[batch_size, sequence_length]))
print(intra_speaker_masekd_all.shape)
for k in range(batch_size):
    for i in range(sequence_length):
        for j in range(sequence_length):
            v = random.choice([True, False]) 
            intra_speaker_masekd_all[k,i,j] = v
            intra_speaker_masekd_all[k,j,i] = v
fake_mask=torch.LongTensor(batch_size, sequence_length).fill_(0)
fake_mask[:, -2] = 1
fake_mask[:, -1] = 1
fake_mask = (fake_mask == 1)
fake_embedding = torch.rand(batch_size, sequence_length, embedding_size)

# ================ 
# init model 
speaker_history_model_by_lstm = nn.LSTM(input_size=d_model, hidden_size=d_model//2, num_layers=2, 
                     dropout=0.2, batch_first=True, bidirectional=True)
 
# ================ 
# forward 
first_user_mask = intra_speaker_masekd_all[:,0]
second_user_mask = ~intra_speaker_masekd_all[:, 0]

# separate utterance each speakers 
utterance_vector_fused_by_speaker_history = fake_utterance_vector_from_bert + 0 # for create a new tensor equal to output bert vector`fake_utterance_vector_from_bert`
v_first_speaker = utterance_vector_fused_by_speaker_history[first_user_mask] 
v_second_speaker = utterance_vector_fused_by_speaker_history[second_user_mask] 

# padding 
n_utterance_speaker = v_first_speaker.shape[0], v_second_speaker.shape[0]
max_n_utterance = max(n_utterance_speaker)
if v_first_speaker.shape[0] < v_second_speaker.shape[0]:
    v_first_speaker = F.pad(v_first_speaker, [0, 0, 0, max_n_utterance-v_first_speaker.shape[0]])
else:
    v_second_speaker = F.pad(v_second_speaker, [0, 0, 0,  max_n_utterance-v_second_speaker.shape[0]])
v_all_speakers = torch.stack([v_first_speaker, v_second_speaker], dim=0)

# learn history context each user utterances 
h_words, (hn, cn) = speaker_history_model_by_lstm(v_all_speakers) 

# put the lstm output back to the final hidden features
# fake_utterance_vector_from_bert is fused by lstm history features 
utterance_vector_fused_by_speaker_history[first_user_mask] += h_words[0][:n_utterance_speaker[0]]
utterance_vector_fused_by_speaker_history[second_user_mask] += h_words[1][:n_utterance_speaker[1]]
utterance_vector_fused_by_speaker_history = utterance_vector_fused_by_speaker_history.reshape(batch_size*sequence_length, -1)

print("shape of fist and second speaker vectors: ", n_utterance_speaker[0], n_utterance_speaker[1])


torch.Size([1, 7, 7])
shape of fist and second speaker vectors:  3 4
