In [1]:
import numpy as np

In [46]:
data = "hello"
chars = list(set(data))
vocab_size = len(chars)

chars_to_indices = {char: i for i, char in enumerate(chars)}
indices_to_chars = {i: char for i, char in enumerate(chars)}

chars_to_indices, indices_to_chars

({'h': 0, 'o': 1, 'e': 2, 'l': 3}, {0: 'h', 1: 'o', 2: 'e', 3: 'l'})

In [67]:
# RNN configs
embedding_size = vocab_size
hidden_size = 64
output_size = vocab_size

In [68]:
# Matrices
Wxh = np.random.randn(embedding_size, hidden_size)
Whh = np.random.randn(hidden_size, hidden_size)
bh = np.random.randn(1, hidden_size)

Why = np.random.randn(hidden_size, output_size)
by = np.random.randn(1, output_size)

In [99]:
def softmax(z):
    e_z = np.exp(z)
    return e_z / e_z.sum(axis=1)

def rnn(x_t, h_prev):
    assert x_t.shape == (1, embedding_size)
    assert h_prev.shape == (1, hidden_size)

    h_next = np.tanh(x_t @ Wxh + h_prev @ Whh + bh)
    probs = softmax(h_next @ Why + by)
    
    return probs, h_next

def sample(char, n):
    # sample rnn n times starting with the first char
    x = np.zeros((1, vocab_size))
    idx = chars_to_indices[char]
    x[:, idx] = 1 # one hot encoding
    h =  np.zeros((1, hidden_size)) # hidden state
    idxes = []
    
    for i in range(n):
        probs, h = rnn(x, h) # xt, hprev -> rnn -> probs, hnext
        idx = np.random.choice(vocab_size, p=probs.ravel()) # .ravel returns a 1d array
        
        x = np.zeros((1, vocab_size))
        x[:, idx] = 1
        idxes.append(idx)

    chars = "".join([indices_to_chars[i] for i in idxes])
    return chars

In [100]:
x_t = np.random.randn(1, embedding_size)
h_prev = np.random.randn(1, hidden_size)

probs, h_next = rnn(x_t, h_prev)
probs.shape, h_next.shape

((1, 4), (1, 64))

In [101]:
inp = [chars_to_indices[c] for c in data[:-1]]
target = [chars_to_indices[c] for c in data[1:]]

inp, target

([0, 2, 3, 3], [2, 3, 3, 1])

In [102]:
sample("h", 4)

'lehe'

In [113]:
for e in range(1000):
    # compute loss
    loss = 0
    h = np.zeros((1, hidden_size))
    for i in range(len(inp)):
        idx = inp[i]
        x = np.zeros((1, vocab_size))
        x[:, idx] = 1
        probs, h = rnn(x, h)

        # cross entropy
        pred = probs[0, target[i]]
        loss += -np.log(pred)

    print(loss)

    # backprop
    break

42.81752482486429
