In [1]:
import os, sys

project_root = os.path.abspath('/Users/subhojit/workspace/saturn/src')
if project_root not in sys.path:
    sys.path.append(project_root)

from rnn.recnet_batch import *
import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
words = open('indian_names.txt', 'r').read().splitlines()
chars = sorted(list(set(''.join(words))))
chars = ['<PAD>', '<SOS>', '<EOS>'] + chars
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}
vocab_size = len(stoi)
print(f"vocab: {stoi}")
print(max(len(w) for w in words))

vocab: {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2, 'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8, 'g': 9, 'h': 10, 'i': 11, 'j': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'v': 24, 'w': 25, 'x': 26, 'y': 27, 'z': 28}
19


In [3]:
def encode_name(name):
    encode = [stoi['<SOS>']] + [stoi[ch] for ch in name] + [stoi['<EOS>']]
    return encode

def decode_name(indices):
    return ''.join([itos[i] for i in indices])

encode_name('bobby')

[1, 4, 17, 4, 4, 27, 2]

In [4]:
training_data = []

np.random.shuffle(words)

for name in words:
    # print(name)
    encoded = encode_name(name)

    inputs = []
    targets = []
    for i in range(len(encoded) - 1):
        x_idx = encoded[i]
        y_idx = encoded[i+1]
        x_onehot = np.zeros((vocab_size, 1))
        x_onehot[x_idx] = 1.0
        inputs.append(x_onehot)
        targets.append(y_idx)

    training_data.append((inputs, targets))

print(len(training_data))

6485


In [9]:
def pad_inputs(sequences, pad_vector):
    max_len = max(len(seq) for seq in sequences)
    padded = []

    for seq in sequences:
        padded_seq = seq + [pad_vector] * (max_len - len(seq))
        padded.append(padded_seq)

    return np.array(padded)

def pad_target(sequences, pad_idx):
    max_len = max(len(seq) for seq in sequences)
    padded = []

    for seq in sequences:
        padded_seq = seq + [pad_idx] * (max_len - len(seq))
        padded.append(padded_seq)

    return np.array(padded)

n_epochs = 10000
batch_size = 32  # Set batch size
hidden_size = 128

rnn = VanillaBatchRNN(input_size=vocab_size, hidden_size=hidden_size, output_size=vocab_size)

PAD_IDX = stoi['<PAD>']
pad_vector = np.zeros((vocab_size, 1))
pad_vector[PAD_IDX] = 1.0

for epoch in range(n_epochs):
    total_loss = 0.0

    idx = np.random.randint(0, len(training_data), (batch_size,))
    batch = [training_data[i] for i in idx]

    batch_inputs, batch_targets = zip(*batch)

    # Pad to same length
    batch_inputs_padded = pad_inputs(batch_inputs, pad_vector)
    batch_targets_padded = pad_target(batch_targets, PAD_IDX)

    # Now batch_inputs_padded: (batch_size, seq_len)
    # transpose to (seq_len, batch_size)
    batch_inputs_tensor = np.transpose(batch_inputs_padded, (1, 0, 2, 3))
    batch_targets_tensor = np.transpose(batch_targets_padded, (1, 0))

    loss = rnn.train_step(batch_inputs_tensor, batch_targets_tensor)
    total_loss += loss

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Total Loss: {total_loss}")

Epoch 100, Total Loss: 15.594780855561952,  Avg Loss: 0.0024
Epoch 200, Total Loss: 16.86643580533764,  Avg Loss: 0.0026
Epoch 300, Total Loss: 13.867842034878665,  Avg Loss: 0.0021
Epoch 400, Total Loss: 14.249396619332645,  Avg Loss: 0.0022
Epoch 500, Total Loss: 13.700381300752074,  Avg Loss: 0.0021
Epoch 600, Total Loss: 12.748666077413747,  Avg Loss: 0.0020
Epoch 700, Total Loss: 14.355873268188835,  Avg Loss: 0.0022
Epoch 800, Total Loss: 14.194644605653691,  Avg Loss: 0.0022
Epoch 900, Total Loss: 14.567304635142136,  Avg Loss: 0.0022
Epoch 1000, Total Loss: 14.613040933857956,  Avg Loss: 0.0023
Epoch 1100, Total Loss: 16.58243531330838,  Avg Loss: 0.0026
Epoch 1200, Total Loss: 14.158134557314359,  Avg Loss: 0.0022
Epoch 1300, Total Loss: 13.868278542025337,  Avg Loss: 0.0021
Epoch 1400, Total Loss: 15.525643562364632,  Avg Loss: 0.0024
Epoch 1500, Total Loss: 13.8486873154172,  Avg Loss: 0.0021
Epoch 1600, Total Loss: 14.818775557494618,  Avg Loss: 0.0023
Epoch 1700, Total Los

In [12]:
def sample(idx_to_char, max_length=20, temperature=1.0):
    ht = np.zeros((1, hidden_size))
    idx = stoi['<PAD>']
    generated_indices = [idx]

    for _ in range(max_length):
        xt = np.zeros((1, vocab_size))
        xt[0, idx] = 1  # one-hot input

        activation = xt @ rnn.Wxh.T + ht @ rnn.Whh.T + rnn.bh.T
        ht = np.tanh(activation)
        output_logits = ht @ rnn.Why.T + rnn.by.T

        # Temperature scaling
        logits = output_logits / temperature

        exp_logits = np.exp(logits - np.max(logits))
        probs = exp_logits / np.sum(exp_logits)

        idx = np.random.choice(range(vocab_size), p=probs.ravel())
        generated_indices.append(idx)

        if idx_to_char[idx] == '\n':
            break

    generated_sequence = ''.join(idx_to_char[i] for i in generated_indices)

    return generated_sequence

for _ in range(10):
    n = sample(itos, temperature=0.5)
    print(n)


<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa
<PAD>yhsa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa<PAD><PAD>sa


  activation = xt @ rnn.Wxh.T + ht @ rnn.Whh.T + rnn.bh.T
  activation = xt @ rnn.Wxh.T + ht @ rnn.Whh.T + rnn.bh.T
  activation = xt @ rnn.Wxh.T + ht @ rnn.Whh.T + rnn.bh.T
  output_logits = ht @ rnn.Why.T + rnn.by.T
  output_logits = ht @ rnn.Why.T + rnn.by.T
  output_logits = ht @ rnn.Why.T + rnn.by.T
