In [16]:
# These lines import the necessary libraries: NumPy for numerical operations, Pandas for data manipulation, and Matplotlib for plotting.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
# Reads the 'poems-100.csv' file into a Pandas DataFrame and displays it.
df = pd.read_csv("poems-100.csv")
df

Unnamed: 0,text
0,"O my Luve's like a red, red rose\nThat’s newly..."
1,"The rose is red,\nThe violet's blue,\nSugar is..."
2,How do I love thee? Let me count the ways.\nI ...
3,"Had I the heavens' embroidered cloths,\nEnwrou..."
4,"I.\n Enough! we're tired, my heart and I.\n..."
...,...
95,The city had withdrawn into itself\nAnd left a...
96,O gift of God! O perfect day:\n Whereon...
97,"The world is too much with us; late and soon,\..."
98,To him who in the love of Nature holds\nCo...


In [18]:
# Joins all text from the first column of the DataFrame into a single string, converts it to lowercase, and splits it into tokens.
text = " ".join(df.iloc[:, 0].astype(str).tolist()).lower()
tokens = text.split()

In [19]:
# Creates a sorted vocabulary from the tokens and prints its size.
vocab = sorted(set(tokens))
vocab_size = len(vocab)
print("Vocabulary Size:", vocab_size)

Vocabulary Size: 6989


In [20]:
# Creates dictionaries for mapping words to indices and vice-versa.
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for w, i in word_to_idx.items()}

In [21]:
# Converts the list of word tokens into a list of numerical indices.
data = [word_to_idx[word] for word in tokens]

In [22]:
# Sets the hyperparameters for the RNN model.
hidden_size = 100
seq_length = 5
learning_rate = 0.01
epochs = 50

In [23]:
# Initializes the weight matrices and bias vectors for the RNN.
Wxh = np.random.randn(hidden_size, vocab_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(vocab_size, hidden_size) * 0.01

bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))

In [24]:
# Defines the softmax activation function for probability distribution.
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x)

In [25]:
# Defines a one-hot encoding function.
def one_hot(idx, size):
    vec = np.zeros((size, 1))
    vec[idx] = 1
    return vec

In [26]:
# Defines the loss function for the RNN, including forward and backward passes with gradient clipping.
def loss_fun(inputs, targets, hprev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss = 0

    # Forward pass
    for t in range(len(inputs)):
        xs[t] = one_hot(inputs[t], vocab_size)
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)
        ys[t] = np.dot(Why, hs[t]) + by
        ps[t] = softmax(ys[t])
        loss += -np.log(ps[t][targets[t], 0])

    # Backward pass
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])

    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1

        dWhy += np.dot(dy, hs[t].T)
        dby += dy

        dh = np.dot(Why.T, dy) + dhnext
        dhraw = (1 - hs[t] ** 2) * dh

        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)

        dhnext = np.dot(Whh.T, dhraw)

    # Clip gradients
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam)

    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [27]:
# Trains the RNN model using the defined hyperparameters and calculates training time and final loss.
import time

start_time = time.time()

losses = []

for epoch in range(epochs):

    p = 0
    hprev = np.zeros((hidden_size, 1))
    epoch_loss = 0
    steps = 0

    while p + seq_length + 1 < len(data):

        inputs = data[p:p+seq_length]
        targets = data[p+1:p+seq_length+1]

        loss, dWxh, dWhh, dWhy, dbh, dby, hprev = loss_fun(inputs, targets, hprev)

        for param, dparam in zip(
            [Wxh, Whh, Why, bh, by],
            [dWxh, dWhh, dWhy, dbh, dby]
        ):
            param -= learning_rate * dparam

        epoch_loss += loss
        steps += 1
        p += seq_length

    avg_epoch_loss = epoch_loss / steps
    losses.append(avg_epoch_loss)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_epoch_loss:.4f}")

end_time = time.time()

scratch_training_time = end_time - start_time
scratch_final_loss = losses[-1]

print("\nTraining Time:", scratch_training_time, "seconds")

Epoch 1/50, Loss: 35.8941
Epoch 2/50, Loss: 33.9766
Epoch 3/50, Loss: 33.3694
Epoch 4/50, Loss: 32.7181
Epoch 5/50, Loss: 32.0114
Epoch 6/50, Loss: 31.3204
Epoch 7/50, Loss: 30.6378
Epoch 8/50, Loss: 29.9812
Epoch 9/50, Loss: 29.3352
Epoch 10/50, Loss: 28.7723
Epoch 11/50, Loss: 28.2497
Epoch 12/50, Loss: 27.8055
Epoch 13/50, Loss: 27.3241
Epoch 14/50, Loss: 26.8832
Epoch 15/50, Loss: 26.5657
Epoch 16/50, Loss: 26.2555
Epoch 17/50, Loss: 25.8941
Epoch 18/50, Loss: 25.5755
Epoch 19/50, Loss: 25.2474
Epoch 20/50, Loss: 25.1796
Epoch 21/50, Loss: 24.9422
Epoch 22/50, Loss: 24.6393
Epoch 23/50, Loss: 24.4855
Epoch 24/50, Loss: 24.3235
Epoch 25/50, Loss: 24.2468
Epoch 26/50, Loss: 23.9014
Epoch 27/50, Loss: 23.7812
Epoch 28/50, Loss: 23.5064
Epoch 29/50, Loss: 23.6619
Epoch 30/50, Loss: 23.2421
Epoch 31/50, Loss: 23.3046
Epoch 32/50, Loss: 22.9588
Epoch 33/50, Loss: 23.3021
Epoch 34/50, Loss: 22.8502
Epoch 35/50, Loss: 22.7848
Epoch 36/50, Loss: 22.6629
Epoch 37/50, Loss: 22.6060
Epoch 38/5

In [28]:
# Stores the training results in a Pandas DataFrame and saves it to a CSV file.
df = pd.DataFrame([
    {
    "Model": "RNN From Scratch (NumPy)",
    "Training_Time_Seconds": scratch_training_time,
    "Final_Loss": scratch_final_loss
    }
])

df.to_csv("scratch_results.csv", index=False)

In [29]:
# Defines a function to generate text from the trained RNN given a seed word and desired length.
def generate_text(seed_word, length=20):
    h = np.zeros((hidden_size, 1))
    x = one_hot(word_to_idx[seed_word], vocab_size)
    generated = [seed_word]

    for t in range(length):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = softmax(y)

        idx = np.random.choice(range(vocab_size), p=p.ravel())
        word = idx_to_word[idx]

        generated.append(word)
        x = one_hot(idx, vocab_size)

    return " ".join(generated)

In [30]:
# Calls the 'sample' function to generate 30 words starting with the first token from the original text, and prints the result.
print(generate_text(tokens[0], 30))

o the clock where march, though all on, should grow thus certain who work, given and soothed to and clear morning: that have to chamber i sometimes red, and been buckwheat,
