In [1]:
from dataclasses import replace, asdict
import numpy as np
import json
from tqdm.auto import tqdm

from rnn import RNN, AdaGrad, Text, synthesize

In [2]:
text = Text.from_file("../data/goblet_book.txt")
passage = next(text.passages(25))
passage

Passage(context=array([53, 77, 71, 71, 12, 43, 52, 60, 49, 49,  1, 71, 43, 77, 17,  6, 43,
       49, 53,  1, 43, 38, 60, 68, 41]), targets=array([77, 71, 71, 12, 43, 52, 60, 49, 49,  1, 71, 43, 77, 17,  6, 43, 49,
       53,  1, 43, 38, 60, 68, 41,  1]))

In [3]:
rnn = RNN.from_dims(num_classes=text.num_unique_characters, hidden_dim=16)
synthesize(rnn, text, length=1000)

'SgKc0l_(f22\tgpb^)GEp_a}7\'u}Kl}:, (9fT7.T.)UOL\t37LU_Fz6u76,dPPsoL9baL y0sSgl:^IS)sKMQK)V7vdu.N9Ugfd)JeJLnqrHIIGII^fztPIXx 2kl4LX/xwRv-cG?v\'VUMk"a.R?p_e?;yrTi•DCcriP?wN!6e.":}\t lAhae!yKr0lJlQz(N;6YvYLLRQsLExF i^HX(L/(d})ShvutbI2jN!plNx_9Q"KeHa-u;_Z\n^Uy;pIGPT"-Svu2Pekc^K:DNtSw\n6_s9.Xj}kE3ckPC,ü1jG6)W!Sqoabzwh2-YghRAü"FAWd0aM9ibpMC0U"TBSNLKGPR!-•?F3rTc\n_H:s10"UL:mF:o AwYyXVapxQGwjFTIlü7,)^Y-0p)hQT• PüWVPn-Ot/y",\'}sQ B9)DwqXFOe/(dh_VwK//oC3M(z6uNxDVpW}oc233_Vmjwesr-v0)nmH)02-q•3;j(ZEjüeHd•":)/J:_q_h:!u76•!lT\'0;:lnC 3NQ34Bv!lLTX"I.EmR!N!iDQ\nOy6pfVzrT•Dac^IQx\nZNNü;mWyGm}•cqN?b0hsBmVuq:nx\n?YBbexaoUpPWuLFgW:fa2\ta \t;}obK/MS?P)CEJdlQl6\tlv(cR?ZM;}qHYE\tGAo4PIüy1d6GPYU1rcR0p336k(jvKO.9W!T7/PoWG/ xmJs(Ip^p- U)0üVs0ZdYI)F\t!Iy4\' 2,/)Wa0D\tGvaCJwO"6u6)kN}-AYhV}uD F1GoMAKs1F jcGj^j•.!Po\nfsZ?c\'h,4N.(J\npMGBm3•!XKrVoKipu7SGMqskjtWZvXym^nf4Z;P}s^LtdoDh3y9 xkg:f7poe47Nm•Z;s3Jua9zqa)I1T/füKc9BtHE?oUZAEK;boZI\t\t:iO/duS_(gu\'7-lOmEO\n^67^_F/7JcDrMy9_L_:AzWüPIPMHiRgifwq\'TWKo•\tNHR\n7L0Td(

In [4]:
def compute_loss(**kwargs):
    new_rnn = replace(rnn, **kwargs)
    states, outputs = new_rnn.run(rnn.initial_state, passage)
    return new_rnn.loss(outputs, passage.targets)

compute_loss()

109.81919940335482

In [5]:
def numeric_gradients(param_name, h=1e-5):
    original_array = getattr(rnn, param_name)
    result = np.zeros_like(original_array)
    for i in range(np.prod(original_array.shape)):
        idx = np.unravel_index(i, original_array.shape)

        attempt = original_array.copy()
        attempt[idx] -= h
        c1 = compute_loss(**{param_name: attempt})

        attempt = original_array.copy()
        attempt[idx] += h
        c2 = compute_loss(**{param_name: attempt})

        result[idx] = (c2 - c1) / (2 * h)
    return result

numeric_gradients = {
    param_name: numeric_gradients(param_name)
    for param_name in asdict(rnn)
}

states, outputs = rnn.run(rnn.initial_state, passage)
analytical_gradients = rnn.run_backward(states, outputs, passage)

In [6]:
def relative_error(a, b, eps=1e-6):
    values = np.abs(a - b) / np.maximum(eps, np.abs(a) + np.abs(b))
    return dict(
        mean=np.mean(values),
        max=np.max(values),
    )

print(json.dumps(
    {
        param_name: relative_error(numeric_gradients[param_name], analytical_gradients[param_name])
        for param_name in numeric_gradients
    },
    indent=4
))

{
    "hidden_weights": {
        "mean": 3.4076817669692208e-09,
        "max": 1.0136437022783122e-07
    },
    "input_weights": {
        "mean": 1.7918067526106602e-09,
        "max": 1.6959761928423293e-06
    },
    "hidden_biases": {
        "mean": 1.0913787868793307e-08,
        "max": 1.567841464464988e-07
    },
    "output_weights": {
        "mean": 1.424075922438796e-07,
        "max": 3.656178029848925e-05
    },
    "output_biases": {
        "mean": 1.3357032159667777e-09,
        "max": 5.052322146910117e-09
    }
}
