In [None]:
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
import pandas as pd
import ast

In [None]:
import torch
import torch.nn.functional as F

class DiacriticModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(DiacriticModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.encoder = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        self.decoder = nn.LSTM(embedding_dim, hidden_dim * 2, num_layers, batch_first=True)  # *2 for bidirectional
        self.fc = nn.Linear(hidden_dim * 2, vocab_size)  # *2 for bidirectional

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        encoder_output, encoder_hidden = self.encoder(embedded, hidden)
        decoder_hidden = self.transform_hidden(encoder_hidden)
        output, hidden = self.decoder(embedded, decoder_hidden)
        output = self.fc(output)
        return output, hidden

    def transform_hidden(self, encoder_hidden):
        # encoder_hidden contains both the hidden and cell states
        # Each is a tuple (h_n, c_n) of shape [num_layers * num_directions, batch, hidden_size]
        h_n, c_n = encoder_hidden
        # Concatenate the hidden states for the forward and backward layers
        h_n = torch.cat([h_n[0:h_n.size(0):2], h_n[1:h_n.size(0):2]], dim=2)
        c_n = torch.cat([c_n[0:c_n.size(0):2], c_n[1:c_n.size(0):2]], dim=2)
        return (h_n, c_n)

    def init_hidden(self, batch_size):
        # Initialize hidden state with zeros
        # Note: We multiply layer_dim by 2 because of the bidirectional LSTM
        return (torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(device),
                torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(device))


In [None]:
import pickle

# Load vocab
with open('/content/drive/MyDrive/NLP/vocab.pickle', 'rb') as handle:
    vocab = pickle.load(handle)

# Load inv_vocab
with open('/content/drive/MyDrive/NLP/inv_vocab.pickle', 'rb') as handle:
    inv_vocab = pickle.load(handle)


In [None]:
VOCAB_SIZE = 111  
EMBEDDING_DIM = 256
HIDDEN_DIM = 512
NUM_LAYERS = 2

model = DiacriticModel(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS)
model.to(device)

model.load_state_dict(torch.load('/content/drive/MyDrive/NLP/model.pth', map_location=device))
model.eval() 


In [None]:
def prepare_input_data(sentence, vocab):
    tokens = [vocab.get(char, vocab['<unk>']) for char in sentence]
    return torch.tensor(tokens, dtype=torch.long, device=device).unsqueeze(0)  # Add batch dimension

test_data = pd.read_csv('/content/drive/MyDrive/NLP/test_data.csv')


In [None]:
def predict(model, sentence, vocab, inv_vocab):
    input_tensor = prepare_input_data(sentence, vocab)
    hidden = model.init_hidden(1)  # Assume batch size of 1 for individual predictions
    with torch.no_grad():
        output, _ = model(input_tensor, hidden)
    predicted_indices = output.argmax(dim=2).squeeze(0).tolist()
    predicted_sentence = ''.join([inv_vocab[idx] for idx in predicted_indices])
    return predicted_sentence

predictions = []

for _, row in test_data.iterrows():
    input_sentence = row['sentence']
    pred_sentence = predict(model, input_sentence, vocab, inv_vocab)
    predictions.append((row['id'], pred_sentence))

# Convert results to a DataFrame for output
predictions_df = pd.DataFrame(predictions, columns=['id', 'sentence'])


In [None]:
# Save predictions to CSV
predictions_df.to_csv('/content/drive/MyDrive/NLP/predictions.csv', index=False)
