In [1]:
import torch
import torch.nn as nn
from tqdm import tqdm
from ELMO import ELMo
import torch.nn.functional as F
from wa_loader import WADataset, tokenize, split_into_characters
from torch.utils.data import DataLoader



In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Load vocabularies
vocab = torch.load('../hin_word_vocab.pt')
character_vocab = torch.load('../hin_char_vocab.pt')

In [3]:
# in the file Same.txt, append evey line that does not have ':' as the first non-space character
same_sens = []
with open('Same.txt', 'r', encoding='utf8') as f:
    for line in f:
        if line[0] != ' ' and line[0] != ':':
            same_sens.append(line.strip())

In [4]:
# same for Not_Same.txt
not_same_sens = []
with open('Not_Same.txt', 'r', encoding='utf8') as f:
    for line in f:
        if line[0] != ' ' and line[0] != ':':
            not_same_sens.append(line.strip())

In [5]:
same_sens = tokenize(same_sens)
not_same_sens = tokenize(not_same_sens)

In [6]:
jaise = "जैसे"
to = "तो"
waise = "वैसे"
hi = "ही"

In [7]:
# for same_sens and not_same_sens, insert jaise at position 1, to at 3 and jaise at 5
for i in range(len(same_sens)):
    same_sens[i].insert(1, jaise)
    same_sens[i].insert(3, waise)
    same_sens[i].insert(4, hi)
    same_sens[i].insert(6, jaise)
for i in range(len(not_same_sens)):
    not_same_sens[i].insert(1, jaise)
    not_same_sens[i].insert(3, waise)
    not_same_sens[i].insert(4, hi)
    not_same_sens[i].insert(6, jaise)

In [8]:
# Get Sentences
s1_dataset = WADataset(same_sens)
s1, w1 = s1_dataset.format(character_vocab)

In [9]:
def create_dataloader(s1, w1, batch_size):
    zipped = list(zip(s1, w1))
    dataloader = DataLoader(zipped, batch_size=batch_size, shuffle=True)
    return dataloader

In [10]:
# split s1 and w1 into train and test
train_size = int(0.8 * len(s1))
test_size = len(s1) - train_size
train_s1, test_s1 = s1[:train_size], s1[train_size:]
train_w1, test_w1 = w1[:train_size], w1[train_size:]

AttributeError: 'list' object has no attribute 'shape'

In [12]:
# make the dataloader for modle with elmo

batch_size = 128
train_dataloader = create_dataloader(train_s1, train_w1, batch_size)
val_dataloader = create_dataloader(test_s1, test_w1, batch_size)

In [14]:
model = ELMo(cnn_config = {'character_embedding_size': 16, 
                           'num_filters': 32, 
                           'kernel_size': 5, 
                           'max_word_length': 10, 
                           'char_vocab_size': character_vocab.num_chars}, 
             elmo_config = {'num_layers': 3,
                            'word_embedding_dim': 150,
                            'vocab_size': vocab.num_words}, 
             char_vocab_size = character_vocab.num_chars).to(device)


In [15]:
model.load_state_dict(torch.load('../model_elmo_hindi.pt'))

<All keys matched successfully>

In [16]:
word_embedding_dim = 300

# Models

## ELMo

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

class WordAnalogyModel(nn.Module):
    def __init__(self, elmo):
        super(WordAnalogyModel, self).__init__()
        self.elmo = elmo
        
        # Freeze the parameters of the ELMo model since it's pretrained
        for param in self.elmo.parameters():
            param.requires_grad = False
        
        # You may want to include an additional layer or parameters for the task
        # Example: A fully connected layer for the final prediction
        # For simplicity, we will not add it here

    def forward(self, sentence, target_word_index):
        # Get the ELMo embeddings
        _, _, final_embeddings = self.elmo(sentence)
        
        # Retrieve embeddings of the specified words
        embedding_first = final_embeddings[0][0]  # First word
        embedding_third = final_embeddings[0][2]  # Third word
        embedding_sixth = final_embeddings[0][5]  # Sixth word
        
        # Perform analogy calculation: (first - third + sixth)
        analogy_vector = embedding_first - embedding_third + embedding_sixth
        
        # Compute cosine similarity between analogy_vector and all word embeddings
        # in the vocabulary, and find the most similar word index
        similarities = F.cosine_similarity(analogy_vector.unsqueeze(0), final_embeddings[0], dim=1)
        
        # Find the index of the word with the highest similarity
        predicted_word_index = torch.argmax(similarities)
        
        # Compute the loss with respect to the target word
        loss = F.cross_entropy(similarities.unsqueeze(0), target_word_index.unsqueeze(0))
        
        return loss, predicted_word_index


## Without ELMo

In [22]:
# similarity_model = SimilarityModel(model)
word_analogy_model = WordAnalogyModel(model).to(device)

In [23]:
num_epochs = 10
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(word_analogy_model.parameters(), lr=0.001)


In [30]:
def train_model(model, dataloader, optimizer, num_epochs=10):
    model.train()  # Set the model to training mode
    
    for epoch in range(num_epochs):
        epoch_loss = 0.0  # Initialize loss for the epoch
        
        for i, batch in enumerate(dataloader):
            # Assume batch is a tuple of (input_sentence, target_word)
            input_sentence, target_word = batch
            print(len(input_sentence[0]), len(target_word))
            # Assuming `target_word` is an integer representing the index of the target word
            target_word_index = target_word  # This should already be in the form of an index
            
            # Zero the gradients before forward pass
            optimizer.zero_grad()
            
            # Forward pass
            loss, predicted_word_index = model(input_sentence, target_word_index)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            # Accumulate loss for the epoch
            epoch_loss += loss.item()
        
        # Compute average loss for the epoch
        average_loss = epoch_loss / len(dataloader)
        
        # Print the average loss to monitor training progress
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}")

# Assuming you have an initialized `WordAnalogyModel` instance and an optimizer

# Example usage:
# optimizer = torch.optim.Adam(word_analogy_model.parameters(), lr=0.001)
# train_model(word_analogy_model, dataloader, optimizer, num_epochs=10)


In [32]:
for s1, w1 in train_dataloader:
    print(s1, w1)
    break

[tensor([[40,  7,  4,  ...,  0,  0,  0],
        [20, 34, 15,  ...,  0,  0,  0],
        [ 3,  6,  4,  ..., 18,  2,  0],
        ...,
        [13, 10,  9,  ...,  0,  0,  0],
        [13,  7, 57,  ...,  0,  0,  0],
        [31, 27, 10,  ...,  0,  0,  0]]), tensor([[21, 25,  9,  ...,  0,  0,  0],
        [21, 25,  9,  ...,  0,  0,  0],
        [21, 25,  9,  ...,  0,  0,  0],
        ...,
        [21, 25,  9,  ...,  0,  0,  0],
        [21, 25,  9,  ...,  0,  0,  0],
        [21, 25,  9,  ...,  0,  0,  0]]), tensor([[40,  7,  4,  ...,  0,  0,  0],
        [20, 24, 15,  ...,  0,  0,  0],
        [ 3,  6,  4,  ..., 18,  2, 50],
        ...,
        [13, 10,  9,  ...,  0,  0,  0],
        [13, 10, 57,  ...,  0,  0,  0],
        [31, 27, 10,  ...,  2,  0,  0]]), tensor([[19, 25,  9,  ...,  0,  0,  0],
        [19, 25,  9,  ...,  0,  0,  0],
        [19, 25,  9,  ...,  0,  0,  0],
        ...,
        [19, 25,  9,  ...,  0,  0,  0],
        [19, 25,  9,  ...,  0,  0,  0],
        [19, 25,  9, 

In [31]:
train_model(word_analogy_model, train_dataloader, optimizer, num_epochs=num_epochs)

128 128


AttributeError: 'list' object has no attribute 'permute'

In [None]:
num_epochs = 20

# train the model
for epoch in range(num_epochs):
    w.train()
    total_loss = 0
    tdiff = 0
    for s1t, s2t, scorest in tqdm(train_dataloader_wo_elmo):
        s1t = s1t.to(device)
        s2t = s2t.to(device)
        scorest = scorest.to(device)
        optimizer.zero_grad()
        # print(len(s1t[0]))
        # stack the sentences
        # s1t = torch.stack(s1t, dim=1).to(device)
        # s2t = torch.stack(s2t, dim=1).to(device)
        outputs = similarity_model_wo_elmo(s1t, s2t)
        # print(outputs.squeeze().shape, scores.shape)
        # print(outputs.shape, scores.shape)
        loss = criterion(outputs, scorest)
        tdiff += torch.abs(outputs - scorest).sum().item()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch} Loss: {total_loss} Mean Difference: {tdiff/len(s1t)}")

    similarity_model.eval()
    with torch.no_grad():
        total_loss = 0
        # calculate mean difference between predicted and actual scores
        diff = 0

        for s1v, s2v, scoresv in val_dataloader_wo_elmo:
            s1v = s1v.to(device)
            s2v = s2v.to(device)
            scoresv = scoresv.to(device)
            # s1v = torch.stack(s1v, dim=1).to(device)
            # s2v = torch.stack(s2v, dim=1).to(device)
            outputs = similarity_model_wo_elmo(s1v, s2v)
            loss = criterion(outputs, scoresv)

            total_loss += loss.item()
            diff += torch.abs(outputs - scoresv).sum().item()
            # calculate 

        print(f"Validation Loss: {total_loss}" + f" Mean Difference: {diff/len(s1v)}")

In [None]:
kitna = "कितना"
milta = "मिलता"
hai = "है"
se = "से"
bhookh = "भूख"
bhookha = "भूखा"
pyaas = "प्यास"
imarat = "इमारत"

mujhe = "मुझे"
lag = "लग"
rahi = "रही"

aur = "और"

# merge the words into a single string
sentence = [mujhe + " " + bhookh + " "+ aur +" " + pyaas+" " + lag + " " +rahi + " " + hai]

In [None]:
t = tokenize(sentence)
t = split_into_characters(t, character_vocab, word_length=6, sen_len=15)

In [None]:
# convert t to a list of tensors
for i in range(len(t)):
    t[i] = torch.tensor(t[i]).to(device)

In [None]:
sentence_tensor = torch.stack(t, dim=1)

In [None]:
forward_output, backward_output, final_embeddings = model(sentence_tensor)

In [None]:
e1 = final_embeddings[-1][2][0]
e2 = final_embeddings[-1][6][0]

In [None]:
cosine_similarity = F.cosine_similarity(e1, e2, dim=0)

In [None]:
cosine_similarity