In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

# ---- Setup ----

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Vocabulary: a-z + special tokens
all_characters = "abcdefghijklmnopqrstuvwxyz"
special_tokens = ['<SOS>', '<EOS>']
all_tokens = special_tokens + list(all_characters)
n_characters = len(all_tokens)

char_to_idx = {ch: idx for idx, ch in enumerate(all_tokens)}
idx_to_char = {idx: ch for ch, idx in char_to_idx.items()}

SOS_idx = char_to_idx['<SOS>']
EOS_idx = char_to_idx['<EOS>']

# Helpers
def string_to_tensor(name):
    indices = [char_to_idx[c] for c in name]
    return torch.tensor(indices, dtype=torch.long)

def tensor_to_string(tensor):
    chars = [idx_to_char[idx.item()] for idx in tensor]
    return ''.join(chars)

# ---- Model ----

class NameRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super(NameRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru = nn.GRUCell(embedding_dim, hidden_size)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, input_idx, hidden):
        embedded = self.embedding(input_idx)
        hidden = self.gru(embedded, hidden)
        output = self.fc(hidden)
        return output, hidden

    def init_hidden(self, batch_size=1):
        return torch.zeros(batch_size, self.hidden_size, device=device)

# ---- Instantiate ----

embedding_dim = 128
hidden_size = 512
rnn = NameRNN(n_characters, embedding_dim, hidden_size).to(device)

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.003)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3000, gamma=0.5)
criterion = nn.CrossEntropyLoss()

# ---- Data Preparation ----

# Your real dataset (replace this with your full 6000 names list)
training_data = ["arjun", "anita", "bharat", "devika", "rajat", "suman", "priya"]

def prepare_batch(names, batch_size):
    batch_inputs = []
    batch_targets = []

    for _ in range(batch_size):
        name = random.choice(names)
        name = ['<SOS>'] + list(name) + ['<EOS>']

        input_seq = [char_to_idx[ch] for ch in name[:-1]]
        target_seq = [char_to_idx[ch] for ch in name[1:]]

        batch_inputs.append(torch.tensor(input_seq, dtype=torch.long))
        batch_targets.append(torch.tensor(target_seq, dtype=torch.long))

    # Pad sequences to same length
    input_lengths = [len(seq) for seq in batch_inputs]
    max_len = max(input_lengths)

    padded_inputs = torch.zeros(batch_size, max_len, dtype=torch.long)
    padded_targets = torch.zeros(batch_size, max_len, dtype=torch.long)

    for i in range(batch_size):
        padded_inputs[i, :input_lengths[i]] = batch_inputs[i]
        padded_targets[i, :input_lengths[i]] = batch_targets[i]

    return padded_inputs.to(device), padded_targets.to(device), input_lengths

# ---- Training ----

def train_step(batch_inputs, batch_targets, input_lengths):
    rnn.train()
    optimizer.zero_grad()

    batch_size, seq_len = batch_inputs.shape
    hidden = rnn.init_hidden(batch_size)

    loss = 0
    for t in range(seq_len):
        input_t = batch_inputs[:, t]
        target_t = batch_targets[:, t]

        output, hidden = rnn(input_t, hidden)
        loss += criterion(output, target_t)

    loss.backward()
    optimizer.step()
    scheduler.step()

    return loss.item() / seq_len

# ---- Sampling ----

def sample(start_letter='<SOS>', temperature=0.8, max_length=20):
    rnn.eval()
    with torch.no_grad():
        if start_letter == '<SOS>':
            input_idx = torch.tensor([SOS_idx], device=device)
        else:
            input_idx = torch.tensor([char_to_idx[start_letter]], device=device)

        hidden = rnn.init_hidden(1)

        output_name = ''
        for _ in range(max_length):
            output, hidden = rnn(input_idx, hidden)

            output = output.view(-1) / temperature
            probs = F.softmax(output, dim=0)
            top_idx = torch.multinomial(probs, 1)[0]

            predicted_char = idx_to_char[top_idx.item()]

            if predicted_char == '<EOS>':
                break

            output_name += predicted_char
            input_idx = top_idx.unsqueeze(0)

        return output_name

# ---- Main Training Loop ----

n_epochs = 20000
batch_size = 32

for epoch in range(1, n_epochs + 1):
    batch_inputs, batch_targets, input_lengths = prepare_batch(training_data, batch_size)
    loss = train_step(batch_inputs, batch_targets, input_lengths)

    if epoch % 500 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")
        for _ in range(3):
            print(f"Generated (0.8): {sample(temperature=0.8)}")
            print(f"Generated (1.2): {sample(temperature=1.2)}")
        print()



In [5]:
for _ in range(30):
    print(sample(temperature=2))

anita
devika
devika
arjun
rajat
devika
priya
suman
devika
priya
priya
devika
anita
priya
devika
arjun
bharat
arjun
suman
suman
bharat
anita
suman
devanita
suman
bharat
arjun
arjun
priya
devika
