In [2]:
import numpy as np
import tensorflow as tf

# Load the dataset
def load_data(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        text = f.read()
    return text

# Prepare data
def preprocess_data(text):
    chars = sorted(set(text))
    char_to_idx = {char: idx for idx, char in enumerate(chars)}
    idx_to_char = {idx: char for char, idx in char_to_idx.items()}
    encoded_text = np.array([char_to_idx[c] for c in text])
    return encoded_text, char_to_idx, idx_to_char

# Create input-output pairs
def create_dataset(encoded_text, seq_length):
    X = []
    y = []
    for i in range(len(encoded_text) - seq_length):
        X.append(encoded_text[i:i+seq_length])
        y.append(encoded_text[i+seq_length])
    return np.array(X), np.array(y)

# Define RNN from scratch
class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.hidden_size = hidden_size
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_size, 1))
        self.last_inputs = inputs
        self.last_hs = {0: h}

        for t, x in enumerate(inputs):
            x = x.reshape(-1, 1)
            h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)
            self.last_hs[t + 1] = h

        y = self.Why @ h + self.by
        return y, h

    def backward(self, dLdy):
        n = len(self.last_inputs)
        dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
        dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
        dh = np.zeros_like(self.last_hs[0])

        for t in reversed(range(n)):
            dy = dLdy
            dWhy += dy @ self.last_hs[t + 1].T
            dby += dy
            dh = self.Why.T @ dy + dh
            dhraw = (1 - self.last_hs[t + 1] ** 2) * dh
            dbh += dhraw
            dWxh += dhraw @ self.last_inputs[t].reshape(1, -1)
            dWhh += dhraw @ self.last_hs[t].T
            dh = self.Whh.T @ dhraw

        return dWxh, dWhh, dWhy, dbh, dby

    def update_weights(self, dWxh, dWhh, dWhy, dbh, dby, lr=0.01):
        self.Wxh -= lr * dWxh
        self.Whh -= lr * dWhh
        self.Why -= lr * dWhy
        self.bh -= lr * dbh
        self.by -= lr * dby

# Train RNN

def train_rnn(rnn, X, y, epochs=2, lr=0.01):
    for epoch in range(epochs):
        loss = 0
        for i in range(len(X)):
            inputs = [np.eye(len(char_to_idx))[x] for x in X[i]]
            target = np.eye(len(char_to_idx))[y[i]]

            out, _ = rnn.forward(inputs)
            probs = np.exp(out) / np.sum(np.exp(out))
            loss += -np.log(probs[np.argmax(target)])

            dLdy = probs - target.reshape(-1, 1)
            dWxh, dWhh, dWhy, dbh, dby = rnn.backward(dLdy)
            rnn.update_weights(dWxh, dWhh, dWhy, dbh, dby, lr)

        print(f"Epoch {epoch + 1}, Loss: {loss / len(X)}")

# Comparison with TensorFlow

def train_tf_model(X, y, vocab_size, seq_length):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, 50, input_length=seq_length),
        tf.keras.layers.SimpleRNN(100, return_sequences=False),
        tf.keras.layers.Dense(vocab_size, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    model.fit(X, y, epochs=2, batch_size=64)
    return model

# Main workflow
filename = 'NextWordPrediction_1661-0.txt'
text = load_data(filename)
encoded_text, char_to_idx, idx_to_char = preprocess_data(text)

seq_length = 25
X, y = create_dataset(encoded_text, seq_length)

# Train custom RNN
vocab_size = len(char_to_idx)
rnn = SimpleRNN(vocab_size, 100, vocab_size)
train_rnn(rnn, X, y)

# Train TensorFlow RNN
model = train_tf_model(X, y, vocab_size, seq_length)


Epoch 1, Loss: [20.5658277]
Epoch 2, Loss: [22.29437467]




Epoch 1/2
[1m9092/9092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 13ms/step - loss: 2.4132
Epoch 2/2
[1m9092/9092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 13ms/step - loss: 1.8223
