In [12]:
import os, sys
from rnn.rnn_loss import CrossEntropyLoss

project_root = os.path.abspath('/Users/subhojit/workspace/saturn/src')
if project_root not in sys.path:
    sys.path.append(project_root)

from rnn.recnet import *
import matplotlib.pyplot as plt
%matplotlib inline


In [13]:
words = open('indian_names.txt', 'r').read().splitlines()
chars = sorted(list(set(''.join(words))))
chars = ['<PAD>', '<SOS>', '<EOS>'] + chars
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}
vocab_size = len(stoi)
print(f"vocab: {stoi}")

vocab: {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2, 'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8, 'g': 9, 'h': 10, 'i': 11, 'j': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'v': 24, 'w': 25, 'x': 26, 'y': 27, 'z': 28}


In [14]:
def encode_name(name):
    encode = [stoi['<SOS>']] + [stoi[ch] for ch in name] + [stoi['<EOS>']]
    return encode

def decode_name(indices):
    return ''.join([itos[i] for i in indices])

encode_name('bobby')

[1, 4, 17, 4, 4, 27, 2]

In [16]:
training_data = []

np.random.shuffle(words)

for name in words:
    # print(name)
    encoded = encode_name(name)

    inputs = []
    targets = []
    for i in range(len(encoded) - 1):
        x_idx = encoded[i]
        y_idx = encoded[i+1]
        x_onehot = np.zeros((vocab_size, 1))
        x_onehot[x_idx] = 1.0
        inputs.append(x_onehot)
        targets.append(y_idx)

    training_data.append((inputs, targets))

len(training_data)

6485

In [17]:
hidden_size = 128
rnn = VanillaRNN(input_size=vocab_size, hidden_size=hidden_size, output_size=vocab_size)
loss_fn = CrossEntropyLoss()
lossi = []
n_epochs = 1000

for epoch in range(n_epochs):
    total_loss = 0.0

    for inputs, targets in training_data:
        loss = rnn.train_step(inputs, targets)
        # max_whh = np.max(np.abs(rnn.grads['Whh']))
        # if max_whh > 10:
        #     print(f"epoch: {epoch}, loss: {loss}, max_whh: {max_whh}")
        # max_wxh = np.max(np.abs(rnn.grads['Wxh']))
        # if max_wxh > 10:
        #     print(f"epoch: {epoch}, loss: {loss}, max_wxh: {max_wxh}")
        # max_why = np.max(np.abs(rnn.grads['Why']))
        # if max_why > 10:
        #     print(f"epoch: {epoch}, loss: {loss}, max_why: {max_why}")
        # plt.hist(rnn.grads['Wxh'], bins=30)
        total_loss += loss

    lossi.append(total_loss)

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Total loss: {total_loss: .4f},  Avg Loss: {total_loss/len(training_data):.4f}")

  activation = self.Whh @ ht + self.Wxh @ xt + self.bh
  activation = self.Whh @ ht + self.Wxh @ xt + self.bh
  activation = self.Whh @ ht + self.Wxh @ xt + self.bh
  y = self.Why @ ht + self.by
  y = self.Why @ ht + self.by
  y = self.Why @ ht + self.by


KeyboardInterrupt: 

In [None]:
plt.hist(rnn.grads['Wxh'], bins=30)

In [None]:
# now generate names

def sample(max_length=10):
    idx = stoi['<SOS>']
    ht = np.zeros((hidden_size, 1))
    output_indices = []

    for _ in range(max_length):
        one_hot = np.zeros((vocab_size, 1))
        one_hot[idx, 0] = 1.0
        act = rnn.Whh @ ht + rnn.Wxh @ one_hot + rnn.bh
        ht = np.tanh(act)
        logits = rnn.Why @ ht + rnn.by
        # print(logits)
        exp_logits = np.exp(logits - np.max(logits))
        probs = exp_logits / np.sum(exp_logits)

        idx = np.random.choice(range(vocab_size), p=probs.ravel())
        output_indices.append(idx)
        if idx == stoi['<EOS>']:
            break

    print(''.join([itos[i] for i in output_indices[:-1]]))


for _ in range(100):
    sample()




In [None]:
A = np.array([[1, 2],
              [3, 4]])

B = np.array([[5, 6],
              [7, 8]])

A @ B

In [None]:
exploding_grads = np.random.randn(10000) * 100  # very large std deviation

plt.hist(exploding_grads, bins=50, color='red', alpha=0.7)
plt.title('❌ Exploding Gradients')
plt.xlabel('Gradient value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
# Simulate stable gradients (after clipping)
stable_grads = np.clip(exploding_grads, -5, 5)

plt.hist(stable_grads, bins=50, color='green', alpha=0.7)
plt.title('✅ Clipped Gradients')
plt.xlabel('Gradient value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
