# Turkish Diacritization with LSTM

This notebook demonstrates the use of a Long Short-Term Memory (LSTM) network to add diacritics to Turkish texts. It involves loading a trained model, defining the model architecture, and using it for predicting diacritics on new sentences.

## Setup and Configuration

### Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import pickle
import ast

### Device Configuration

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Model and Data Preparation

### Load Vocabularies


In [3]:
with open("vocab.pkl", "rb") as f:
    vocab = pickle.load(f)

with open("inv_vocab.pkl", "rb") as f:
    inv_vocab = pickle.load(f)

### Define the Model Architecture

In [4]:
class DiacriticModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(DiacriticModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.encoder = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        self.decoder = nn.LSTM(embedding_dim, hidden_dim * 2, num_layers, batch_first=True) 
        self.fc = nn.Linear(hidden_dim * 2, vocab_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        encoder_output, encoder_hidden = self.encoder(embedded, hidden)
        decoder_hidden = self.transform_hidden(encoder_hidden)
        output, hidden = self.decoder(embedded, decoder_hidden)
        output = self.fc(output)
        return output, hidden

    def transform_hidden(self, encoder_hidden):
        h_n, c_n = encoder_hidden
        h_n = torch.cat([h_n[0:h_n.size(0):2], h_n[1:h_n.size(0):2]], dim=2)
        c_n = torch.cat([c_n[0:c_n.size(0):2], c_n[1:c_n.size(0):2]], dim=2)
        return (h_n, c_n)

    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(device),
                torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(device))

## Model Loading

### Initialize and Load the Model

In [5]:
model_path = "model_epoch_2.pth"
model = DiacriticModel(len(vocab), 256, 512, 2).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

DiacriticModel(
  (embedding): Embedding(111, 256)
  (encoder): LSTM(256, 512, num_layers=2, batch_first=True, bidirectional=True)
  (decoder): LSTM(256, 1024, num_layers=2, batch_first=True)
  (fc): Linear(in_features=1024, out_features=111, bias=True)
)

## Input Processing and Prediction

### Define Input Processing Functions

In [6]:
def process_input(sentence, vocab):
    indices = [vocab.get(char, vocab["<unk>"]) for char in sentence]
    tensor_input = pad_sequence([torch.tensor(indices)], batch_first=True, padding_value=vocab["<pad>"]).to(device)
    return tensor_input

### Prediction Function

In [7]:
def predict_sentence(model, sentence, vocab, inv_vocab):
    input_tensor = process_input(sentence, vocab)
    hidden = model.init_hidden(input_tensor.size(0))
    with torch.no_grad():
        output, _ = model(input_tensor, hidden)
        predicted_indices = output.argmax(dim=2).squeeze(0)
        predicted_sentence = "".join(inv_vocab[idx.item()] for idx in predicted_indices)
    return predicted_sentence

## Interactive Testing

### Perform Predictions

In [10]:
input_sentence = "fillistin ya da filistin devleti isgal altindaki filistin topraklari icin kullanilmaktadir"
output_sentence = predict_sentence(model, input_sentence, vocab, inv_vocab)

## Output

In [9]:
print("Diacritized sentence:", output_sentence)

Diacritized sentence: fillistin ya da filistin devleti isgal altındaki filistin toprakları için kullanılmaktadır
