<a href="https://colab.research.google.com/github/ochaudha/sample/blob/main/RNN1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random

# --- 1. Configuration ---
# You can uncomment and modify these if you have a GPU
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu") # For broader compatibility

# Hyperparameters
HIDDEN_SIZE = 256
EMBEDDING_DIM = 64
LEARNING_RATE = 0.005
NUM_EPOCHS = 3000
MAX_LENGTH = 15 # Max characters in a name (e.g., "Omveer" is 6, "Rahul" is 5)
TEACHER_FORCING_RATIO = 0.5 # For training stability

# --- 2. Data Preparation ---

# Tiny dataset of Roman script names and their Hindi transliterations
# In a real scenario, this would be a much larger dataset.
training_pairs = [
    ("Omveer", "ओमवीर"),
    ("Rahul", "राहुल"),
    ("Priya", "प्रिया"),
    ("Amit", "अमित"),
    ("Saurabh", "सौरभ"),
    ("Deepak", "दीपक"),
    ("Anjali", "अंजलि"),
    ("Kavita", "कविता"),
    ("Nitin", "नितिन"),
    ("Sneha", "स्नेहा"),
    ("Vivek", "विवेक"),
    ("Pooja", "पूजा"),
    ("Mohan", "मोहन"),
    ("Ritu", "ऋतु"),
    ("Gaurav", "गौरव"),
    ("Preeti", "प्रीति"),
    ("Rakesh", "राकेश"),
    ("Seema", "सीमा"),
    ("Vijay", "विजय"),
    ("Sarita", "सरिता"),
    ("Omveer Singh", "ओमवीर सिंह") # A slightly longer example
]

# Special tokens
SOS_token = 0  # Start Of Sequence
EOS_token = 1  # End Of Sequence
PAD_token = 2  # Padding

class Lang:
    def __init__(self, name):
        self.name = name
        self.char2idx = {}
        self.idx2char = {0: "<SOS>", 1: "<EOS>", 2: "<PAD>"}
        self.n_chars = 3  # Count SOS, EOS, PAD

    def add_sentence(self, sentence):
        for char in sentence:
            self.add_char(char)

    def add_char(self, char):
        if char not in self.char2idx:
            self.char2idx[char] = self.n_chars
            self.idx2char[self.n_chars] = char
            self.n_chars += 1

# Build separate language objects for input (English) and output (Hindi)
input_lang = Lang('eng')
output_lang = Lang('hin')

for eng, hin in training_pairs:
    input_lang.add_sentence(eng)
    output_lang.add_sentence(hin)

print(f"Input vocabulary size: {input_lang.n_chars}")
print(f"Output vocabulary size: {output_lang.n_chars}")

# Helper to convert text to indices tensor, with padding
def tensor_from_text(lang, text, max_length=MAX_LENGTH):
    indices = [lang.char2idx[char] for char in text]
    indices.append(EOS_token) # Add EOS token
    if len(indices) > max_length: # Truncate if too long
        indices = indices[:max_length-1] + [EOS_token]
    padded_indices = indices + [PAD_token] * (max_length - len(indices)) # Pad
    return torch.tensor(padded_indices, dtype=torch.long, device=device).view(-1, 1)

# --- 3. Model Architecture ---

# Encoder
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, embedding_dim):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

# Decoder with Attention
class AttnDecoderRNN(nn.Module):
    def __init__(self, output_size, hidden_size, embedding_dim, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, embedding_dim)
        self.attn = nn.Linear(embedding_dim + hidden_size, self.max_length) # Attention weights
        self.attn_combine = nn.Linear(embedding_dim + hidden_size, hidden_size) # Combine attended context and embedding
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        # Attention mechanism
        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        # Combine embedded input and attended context
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)


# --- 4. Training Function ---

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.init_hidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    # Encoder pass
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden

    # Teacher forcing: Use the real target as next input
    use_teacher_forcing = True if random.random() < TEACHER_FORCING_RATIO else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing
    else:
        # Without teacher forcing: Use its own prediction as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # Detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

# --- 5. Evaluation / Inference Function ---

def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensor_from_text(input_lang, sentence)
        input_length = input_tensor.size(0)
        encoder_hidden = encoder.init_hidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
        decoder_hidden = encoder_hidden

        decoded_chars = []

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_chars.append('<EOS>')
                break
            else:
                decoded_chars.append(output_lang.idx2char[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ''.join(decoded_chars)

# --- 6. Main Execution ---

if __name__ == "__main__":
    # Initialize models
    encoder = EncoderRNN(input_lang.n_chars, HIDDEN_SIZE, EMBEDDING_DIM).to(device)
    decoder = AttnDecoderRNN(output_lang.n_chars, HIDDEN_SIZE, EMBEDDING_DIM, dropout_p=0.1, max_length=MAX_LENGTH).to(device)

    # Optimizers
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=LEARNING_RATE)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=LEARNING_RATE)

    # Loss function
    criterion = nn.NLLLoss(ignore_index=PAD_token) # NLLLoss with LogSoftmax output, ignore padding

    print("Starting training...")
    for epoch in range(1, NUM_EPOCHS + 1):
        # Pick a random training pair for simplicity
        input_text, target_text = random.choice(training_pairs)

        input_tensor = tensor_from_text(input_lang, input_text).to(device)
        target_tensor = tensor_from_text(output_lang, target_text).to(device)

        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)

        if epoch % 500 == 0:
            print(f"Epoch {epoch}/{NUM_EPOCHS}, Loss: {loss:.4f}")
            # Evaluate some examples during training
            print(f"  Input: {input_text} -> Predicted: {evaluate(encoder, decoder, input_text)}")
            print(f"  Input: Omveer -> Predicted: {evaluate(encoder, decoder, 'Omveer')}")
            print(f"  Input: Rahul -> Predicted: {evaluate(encoder, decoder, 'Rahul')}")
            print("-" * 20)

    print("\nTraining complete! Testing specific examples:")
    test_names = ["Omveer", "Rahul", "Priya", "Saurabh", "Anjali", "Omveer Singh", "NonExistent"]
    for name in test_names:
        print(f"'{name}' -> '{evaluate(encoder, decoder, name)}'")

Input vocabulary size: 34
Output vocabulary size: 33
Starting training...
Epoch 500/3000, Loss: nan
  Input: Pooja -> Predicted: पूजा<EOS>
  Input: Omveer -> Predicted: दममी<EOS>
  Input: Rahul -> Predicted: राजु<EOS>
--------------------
Epoch 1000/3000, Loss: 0.0008
  Input: Sneha -> Predicted: स्नेहा<EOS>
  Input: Omveer -> Predicted: ओमवीर<EOS>
  Input: Rahul -> Predicted: राहुल<EOS>
--------------------
Epoch 1500/3000, Loss: 0.0003
  Input: Rahul -> Predicted: राहुल<EOS>
  Input: Omveer -> Predicted: ओमवीर<EOS>
  Input: Rahul -> Predicted: राहुल<EOS>
--------------------
Epoch 2000/3000, Loss: nan
  Input: Vivek -> Predicted: विवेक<EOS>
  Input: Omveer -> Predicted: ओमवीर<EOS>
  Input: Rahul -> Predicted: राहुल<EOS>
--------------------
Epoch 2500/3000, Loss: 0.0001
  Input: Gaurav -> Predicted: गौरव<EOS>
  Input: Omveer -> Predicted: ओमवीर<EOS>
  Input: Rahul -> Predicted: राहुल<EOS>
--------------------
Epoch 3000/3000, Loss: nan
  Input: Preeti -> Predicted: प्रीति<EOS>
  Inpu

KeyError: 'E'