In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Data preparation helpers

In [2]:
def create_vocab(text):
    chars = sorted(set(text))
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }
    return char_to_ix, ix_to_char

def one_hot_encode(index, vocab_size):
    vec = np.zeros((vocab_size, 1))
    vec[index] = 1
    return vec

# Activation functions and loss 

In [7]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def cross_entropy(y_pred, y_true):
    return -np.log(y_pred[y_true, 0] + 1e-9)

def d_cross_entropy(y_pred, y_true):
    grad = y_pred.copy()
    index = np.argmax(y_true)  
    grad[index] -= 1
    return grad


# RNN class

In [8]:
class CharRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.hidden_size = hidden_size
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        self.inputs = inputs
        self.hs = {}
        h = np.zeros((self.hidden_size, 1))
        self.hs[-1] = h
        ys = []
        
        for t, x in enumerate(inputs):
            h = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h) + self.bh)
            y = np.dot(self.Why, h) + self.by
            ys.append(softmax(y))
            self.hs[t] = h
        return ys

    def backward(self, targets, outputs, learning_rate=0.1):
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)
        dh_next = np.zeros((self.hidden_size, 1))

        for t in reversed(range(len(self.inputs))):
            dy = d_cross_entropy(outputs[t], targets[t])
            dWhy += np.dot(dy, self.hs[t].T)
            dby += dy

            dh = np.dot(self.Why.T, dy) + dh_next
            dh_raw = (1 - self.hs[t] ** 2) * dh
            dbh += dh_raw
            dWxh += np.dot(dh_raw, self.inputs[t].T)
            dWhh += np.dot(dh_raw, self.hs[t-1].T)
            dh_next = np.dot(self.Whh.T, dh_raw)

        for d in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(d, -1, 1, out=d)

        self.Wxh -= learning_rate * dWxh
        self.Whh -= learning_rate * dWhh
        self.Why -= learning_rate * dWhy
        self.bh  -= learning_rate * dbh
        self.by  -= learning_rate * dby

# Prepare the text data

In [9]:
text = "Delta University for Sience and Technology"
char_to_ix, ix_to_char = create_vocab(text)
vocab_size = len(char_to_ix)

X_data = []
Y_data = []
for i in range(len(text) - 1):
    X_data.append(char_to_ix[text[i]])
    Y_data.append(char_to_ix[text[i+1]])

X_train = [one_hot_encode(i, vocab_size) for i in X_data]
Y_train = [one_hot_encode(i, vocab_size) for i in Y_data]

# RNN training

In [16]:
rnn = CharRNN(input_size=vocab_size, hidden_size=16, output_size=vocab_size)

epochs = 2000
losses = []

for epoch in range(epochs):
    outputs = rnn.forward(X_train)
    loss = sum(cross_entropy(o, np.argmax(y)) for o, y in zip(outputs, Y_train)) / len(Y_train)
    losses.append(loss)
    
    rnn.backward(Y_train, outputs, learning_rate=0.1)
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 3.0446
Epoch 100, Loss: 0.0174
Epoch 200, Loss: 0.0061
Epoch 300, Loss: 0.0036
Epoch 400, Loss: 0.0026
Epoch 500, Loss: 0.0020
Epoch 600, Loss: 0.0016
Epoch 700, Loss: 0.0014
Epoch 800, Loss: 0.0012
Epoch 900, Loss: 0.0010
Epoch 1000, Loss: 0.0009
Epoch 1100, Loss: 0.0008
Epoch 1200, Loss: 0.0008
Epoch 1300, Loss: 0.0007
Epoch 1400, Loss: 0.0006
Epoch 1500, Loss: 0.0006
Epoch 1600, Loss: 0.0006
Epoch 1700, Loss: 0.0005
Epoch 1800, Loss: 0.0005
Epoch 1900, Loss: 0.0005


# Generate text using the trained RNN

In [17]:
def generate_text(rnn, start_char, length, char_to_ix, ix_to_char):
    idx = char_to_ix[start_char]
    x = one_hot_encode(idx, vocab_size)
    h = np.zeros((rnn.hidden_size, 1))
    result = start_char

    for _ in range(length):
        h = np.tanh(np.dot(rnn.Wxh, x) + np.dot(rnn.Whh, h) + rnn.bh)
        y = np.dot(rnn.Why, h) + rnn.by
        p = softmax(y)
        idx = np.random.choice(range(vocab_size), p=p.ravel())
        x = one_hot_encode(idx, vocab_size)
        result += ix_to_char[idx]
    
    return result


# Try to generate a text

In [20]:
print(generate_text(rnn, start_char='d', length=11, char_to_ix=char_to_ix, ix_to_char=ix_to_char))


d Technology
