In [10]:
import torch
import torch.nn as nn
from preprocessing import NextWordDataset
from tqdm import tqdm
from ELMO import ELMo
import torch.nn.functional as F
from sts_loader import STSDataset

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
print(device)

cuda


In [4]:
# Load vocabularies
vocab = torch.load('Hin_vocab.pt')
character_vocab = torch.load('Hin_character_vocab.pt')

In [11]:
# Get Sentences and Scores

path = 'sts-train-hi.tsv'
sts_dataset = STSDataset(path)
s1, s2, scores = sts_dataset.format(character_vocab)

In [5]:
model = ELMo(cnn_config = {'character_embedding_size': 16, 
                           'num_filters': 32, 
                           'kernel_size': 5, 
                           'max_word_length': 10, 
                           'char_vocab_size': len(character_vocab)}, 
             elmo_config = {'num_layers': 3,
                            'word_embedding_dim': 300,
                            'vocab_size': len(vocab)}, 
             char_vocab_size = len(character_vocab)).to(device)


In [6]:
model.load_state_dict(torch.load('Hin_model.pt'))

<All keys matched successfully>

In [None]:
# make a model for sentence similairty
class SentenceSimilarity(nn.Module):
    def __init__(self, elmo):
        super(SentenceSimilarity, self).__init__()
        self.elmo = elmo
        
    def forward(self, sentence1, sentence2):
        sentence1 = self.elmo(sentence1)
        sentence2 = self.elmo(sentence2)
        return F.cosine_similarity(sentence1, sentence2, dim=1)