In [1]:
import torch
import torch.nn as nn
from tqdm import tqdm
from ELMO import ELMo
import torch.nn.functional as F
from sts_loader import STSDataset
from torch.utils.data import DataLoader

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
print(device)

cuda


In [3]:
# Load vocabularies
vocab = torch.load('Hin_vocab.pt')
character_vocab = torch.load('Hin_character_vocab.pt')

In [4]:
# Get Sentences and Scores

path = 'sts-train-hi.tsv'
sts_dataset = STSDataset(path)
s1, s2, scores = sts_dataset.format(character_vocab)
scores = torch.tensor(scores, dtype=torch.float32)

  scores = torch.tensor(scores, dtype=torch.float32)


In [5]:
def create_dataloader(s1, s2, scores, batch_size):
    zipped = list(zip(s1, s2, scores))
    dataloader = DataLoader(zipped, batch_size=batch_size, shuffle=True)
    return dataloader

In [6]:
# make the dataloader
s1_val = s1[:1000]
s2_val = s2[:1000]
scores_val = scores[:1000]
batch_size = 64
val_dataloader = create_dataloader(s1_val, s2_val, scores_val, batch_size)
train_dataloader = create_dataloader(s1[1000:], s2[1000:], scores[1000:], batch_size)

In [7]:
# for i in range(5):
#     for j in range(15):
#         print(s1[i][j], s2[i][j], scores[i])
#     print('---------------------')


In [8]:
model = ELMo(cnn_config = {'character_embedding_size': 16, 
                           'num_filters': 32, 
                           'kernel_size': 5, 
                           'max_word_length': 10, 
                           'char_vocab_size': len(character_vocab)}, 
             elmo_config = {'num_layers': 3,
                            'word_embedding_dim': 300,
                            'vocab_size': len(vocab)}, 
             char_vocab_size = len(character_vocab)).to(device)


In [9]:
model.load_state_dict(torch.load('Hin_model.pt'))

<All keys matched successfully>

In [10]:
word_embedding_dim = 300


class SimilarityModel(nn.Module):
    def __init__(self, elmo):
        super(SimilarityModel, self).__init__()
        self.elmo = elmo        
        self.lstm = nn.LSTM(word_embedding_dim, word_embedding_dim//2, bidirectional=True)
        
    def forward(self, sentence1, sentence2):
        _, sentence1 = self.elmo(sentence1)
        _, sentence2 = self.elmo(sentence2)
        # print the embeddings of the first sentence
        # print(sentence1.shape, sentence2.shape)

        lstm_out1, _ = self.lstm(sentence1)
        lstm_out2, _ = self.lstm(sentence2)
        # print(lstm_out1.shape, lstm_out2.shape)
        last_output1 = lstm_out1[:, -1, :]
        last_output2 = lstm_out2[:, -1, :]
        # print(lstm_out1.shape, lstm_out2.shape)
        lstm_out1 = lstm_out1.view(lstm_out1.size(0), -1)  
        lstm_out2 = lstm_out2.view(lstm_out2.size(0), -1)  

        # Compute the cosine similarity between the reshaped tensors
        cos_sim = (F.cosine_similarity(lstm_out1, lstm_out2, dim=1) + 1)*5/2
        # print((F.cosine_similarity(lstm_out1, lstm_out2, dim=0) + 1)*5/2)
        return cos_sim


In [11]:
# similarity_model = SimilarityModel(model)
similarity_model = SimilarityModel(model).to(device)

In [12]:
num_epochs = 10
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(similarity_model.parameters(), lr=0.001)


In [13]:
num_epochs = 20

# train the model
for epoch in range(num_epochs):
    similarity_model.train()
    total_loss = 0
    for s1, s2, scores in tqdm(train_dataloader):
        s1 = s1
        s2 = s2
        scores = scores.to(device)
        optimizer.zero_grad()
        outputs = similarity_model(s1, s2)
        # print(outputs.squeeze().shape, scores.shape)
        # print(outputs.shape, scores.shape)
        loss = criterion(outputs, scores)

        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch} Loss: {total_loss}")

    similarity_model.eval()
    with torch.no_grad():
        total_loss = 0
        # calculate mean difference between predicted and actual scores
        diff = 0

        for s1, s2, scores in val_dataloader:
            s1 = s1
            s2 = s2
            scores = scores.to(device)
            outputs = similarity_model(s1, s2)
            loss = criterion(outputs, scores)

            total_loss += loss.item()
            diff += torch.abs(outputs - scores).sum().item()
            # calculate 

        print(f"Validation Loss: {total_loss}" + f" Mean Difference: {diff/len(s1_val)}")

100%|██████████| 75/75 [00:20<00:00,  3.59it/s]


Epoch 0 Loss: 166.7070243358612
Validation Loss: 66.03542757034302 Mean Difference: 1.6704048080444336


100%|██████████| 75/75 [00:19<00:00,  3.75it/s]


Epoch 1 Loss: 153.29780983924866
Validation Loss: 56.270490884780884 Mean Difference: 1.567905460357666


100%|██████████| 75/75 [00:19<00:00,  3.75it/s]


Epoch 2 Loss: 151.6114535331726
Validation Loss: 54.99734425544739 Mean Difference: 1.5563747329711914


100%|██████████| 75/75 [00:20<00:00,  3.65it/s]


Epoch 3 Loss: 152.36556661128998
Validation Loss: 53.552475690841675 Mean Difference: 1.5436703872680664


 39%|███▊      | 29/75 [00:08<00:13,  3.38it/s]


KeyboardInterrupt: 