In [14]:
"""
Refactored Minimal Character-Level Vanilla RNN.
Original Author: Andrej Karpathy (@karpathy)
License: BSD

Modified By: Kirubel Awoke, aka,light337
"""
import numpy as np
import sys

# --- I/O and Data Preprocessing ---

# Ensure compatibility with file opening across environments
try:
    with open('pride_and_prejudice.txt', 'r', encoding='utf-8') as f:
        data = f.read()
except FileNotFoundError:
    print("Error: 'input.txt' not found. Please provide a training corpus.")
    sys.exit(1)

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

# FIX: Python 2 'print' statement converted to Python 3 'print()' function
print('data has %d characters, %d unique.' % (data_size, vocab_size))

# Mappings
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# --- Hyperparameters and Model Initialization ---

# Hyperparameters
HIDDEN_SIZE = 100    # Size of hidden layer of neurons (H)
SEQ_LENGTH = 25      # Steps to unroll the RNN (T)
LEARNING_RATE = 1e-1 # Adagrad learning rate

# Model Parameters (Matrices and Biases)
# Weight initialization: small random numbers (~0.01) to prevent saturation
Wxh = np.random.randn(HIDDEN_SIZE, vocab_size) * 0.01  # Input-to-Hidden (H x V)
Whh = np.random.randn(HIDDEN_SIZE, HIDDEN_SIZE) * 0.01 # Hidden-to-Hidden (H x H)
Why = np.random.randn(vocab_size, HIDDEN_SIZE) * 0.01  # Hidden-to-Output (V x H)
bh = np.zeros((HIDDEN_SIZE, 1))                        # Hidden Bias (H x 1)
by = np.zeros((vocab_size, 1))                         # Output Bias (V x 1)

# --- Core Functions: Loss/Grad and Sampling ---

def loss_fun(inputs, targets, hprev):
    """
    Computes the loss and gradients for one sequence of data.
    inputs, targets are lists of integer indices.
    hprev is the Hx1 array of initial hidden state.
    Returns: loss, gradients (dWxh, dWhh, dWhy, dbh, dby), and last hidden state.
    """
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss = 0

    # 1. Forward Pass (Sequence Iteration)
    # FIX: Python 2 'xrange' replaced with Python 3 'range'
    for t in range(len(inputs)):
        # One-hot encoding of input x[t]
        xs[t] = np.zeros((vocab_size,1))
        xs[t][inputs[t]] = 1

        # RNN Update Equations (Hidden state and Output score)
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # h[t]
        ys[t] = np.dot(Why, hs[t]) + by                                # y[t] (unnormalized log-probs)

        # Softmax: convert scores to probabilities p[t]
        # Robust Softmax (Optional, but good practice): p = np.exp(y - np.max(y)) / np.sum(np.exp(y - np.max(y)))
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))

        # Cross-Entropy Loss: L = -log(P(target))
        loss += -np.log(ps[t][targets[t],0])

    # 2. Backward Pass (Backpropagation Through Time, BPTT)
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])

    # FIX: Python 2 'xrange' replaced with Python 3 'range'
    for t in reversed(range(len(inputs))):
        # Backprop through Softmax/Loss
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1 # The gradient of the loss w.r.t the unnormalized log-probabilities y[t]

        # Backprop through Output layer (Why, by)
        dWhy += np.dot(dy, hs[t].T)
        dby += dy

        # Backprop through Hidden state h[t] and Recurrence
        dh = np.dot(Why.T, dy) + dhnext
        dhraw = (1 - hs[t] * hs[t]) * dh # Backprop through tanh nonlinearity: d(tanh(x))/dx = 1 - tanh^2(x)

        # Backprop through Hidden layer (Wxh, Whh, bh)
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)

        # Pass gradient to previous time step
        dhnext = np.dot(Whh.T , dhraw)

    # Gradient Clipping: Mitigate Exploding Gradients (L2 norm clipping is better, but this is simple)
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam)

    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

def sample(h, seed_ix, n_steps):
    """
    Samples a sequence of n_steps from the model, starting with seed_ix.
    h is the initial hidden state.
    """
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []

    # FIX: Python 2 'xrange' replaced with Python 3 'range'
    for _ in range(n_steps):
        # Forward pass (Inference)
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y) / np.sum(np.exp(y))

        # Stochastic Sampling: Select next character index based on probability distribution p
        ix = np.random.choice(range(vocab_size), p=p.ravel())

        # Prepare next input (one-hot vector for the sampled index)
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)

    return ixes

# --- Training Loop and Adagrad Optimization ---

n, p = 0, 0
# Memory variables for Adagrad optimizer
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by)
# Initialize loss (Perplexity P = vocab_size)
smooth_loss = -np.log(1.0/vocab_size) * SEQ_LENGTH

# The Main Training Loop
while True:
    # Set RNN memory and data pointer (reset every epoch or at start)
    if p + SEQ_LENGTH + 1 >= len(data) or n == 0:
        hprev = np.zeros((HIDDEN_SIZE,1)) # Reset RNN memory
        p = 0 # Go from start of data

    # Prepare sequence batch (inputs and one-step-ahead targets)
    inputs = [char_to_ix[ch] for ch in data[p:p+SEQ_LENGTH]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+SEQ_LENGTH+1]]

    # Sample and print progress every 100 iterations
    if n % 100 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        # FIX: Python 2 'print' statement converted to Python 3 'print()' function
        print('----\n %s \n----' % (txt, ))

    # Forward/Backward pass
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = loss_fun(inputs, targets, hprev)

    # Exponentially smoothed loss (for stable tracking)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001

    if n % 100 == 0:
        # FIX: Python 2 'print' statement converted to Python 3 'print()' function
        print('iter %d, loss: %f' % (n, smooth_loss))

    # Adagrad Update: Efficient, adaptive learning rate optimization
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],
                                  [dWxh, dWhh, dWhy, dbh, dby],
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        # Update rule: W = W - LR * dW / sqrt(mem + epsilon)
        param += -LEARNING_RATE * dparam / (np.sqrt(mem) + 1e-8)

    # Advance data pointer and iteration counter
    p += SEQ_LENGTH
    n += 1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
if with his and
mantle’s greated she were courten in repabstitt. DI I mother of the reaching
Projech,--                     56?
Col ingle, happy, and Project betweld; ands 
----
iter 2813700, loss: 41.593893
----
 
sirt Mr. Darcy has opening I no senle of that impossible their expect her till like the on the ruclods agciet
detcangial yatter abosed these sucedfbally _me.” spetably a so eur, so know Elizabeth wil 
----
iter 2813800, loss: 41.565933
----
 ke long han_ whunsing of proper you freation withoolly not of Lond_ Elizabeth of it, to Charlown, copily so him. As that in Lozzmofity, and ceer the
spoke the mother hunjussely been,
in hemaing with h 
----
iter 2813900, loss: 41.643198
----
  of much she is part crions, and flotsing equal--to Mr. Darcy--sis.”

“Lady
Catherine impless of sister surpricity generfur a visis we Jane for changibe.”

She looked herselress muspeady the as were c 
----
iter 2814000, loss: 41.61351

KeyboardInterrupt: 

In [16]:
from google.colab import files

np.savez('rnn_weights_final.npz',
         Wxh=Wxh, Whh=Whh, Why=Why, bh=bh, by=by)

files.download('rnn_weights_final.npz')

print("Weights saved successfully and downloaded.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Weights saved successfully and downloaded.


In [19]:
loaded_data = np.load('rnn_weights_final.npz')

# Overwrite the initialized global parameters with the trained weights
Wxh = loaded_data['Wxh']
Whh = loaded_data['Whh']
Why = loaded_data['Why']
bh = loaded_data['bh']
by = loaded_data['by']

print("Model weights loaded successfully.")

Model weights loaded successfully.


In [21]:
# Select a seed character
seed_char = data[0]
seed_ix = char_to_ix[seed_char]

# Initialize the hidden state to zero for a clean start
hprev_test = np.zeros((hidden_size, 1))

# Generate a sequence of 1000 characters
# The sample function is deterministic only in terms of the random seed,
generated_ix = sample(hprev_test, seed_ix, n_steps=1000)

# Convert the indices back to characters
generated_text = ''.join(ix_to_char[ix] for ix in generated_ix)

print("--- START GENERATION ---")
print(generated_text)
print("--- END GENERATION ---")

--- START GENERATION ---
will whethor.



               writiee, without ored the your happine. Mr.
Bingley, with from ytures, and her our idfece in  


 [Ilsk. But her are not either behaviour imsurengee to car
say could great, compary, that a
she’s; flitlly
sinermers of Ladyshamused she is you no looked awished,” recomund Mr. Bingley, you conlons, bechmlan fom out, she expectoutestaif may with, ercying, forgubface hopen as that pusting thickin, sirvols sometfly its . “Upous kunderth the gived wwo bothin
at shome allire him wonden. Pros? Bennet away my themsed him. But, your; and Nother; but cased was lacleated than gener.”

“Goodest to be been were bet _ysaffes ham his head expended of it.”

“I,” and, wish in the refabout, and at Lizzy!” and and the duson. I would better.”

“You hay money. She
countaty be
afking whee same to
Wicking nor exactly or till the
Under; “you
such
as in commonest to uptonk, Bofin of what one
of that thought nither have
been will set for young houst.”  litfu