# RNNs

## Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import pickle
from copy import deepcopy
from math import sqrt, ceil
import datetime
import sys
from itertools import product
import pandas as pd
import json
import hyperopt

from data_utils import load_cfar10_batch, load_label_names
from losses import CategoricalHingeLoss, CategoricalCrossEntropyLoss
from activations import LinearActivation, ReLUActivation, SoftmaxActivation, Activation
from initializers import NormalInitializer, XavierInitializer
from layers import Dense, BatchNormalization
from regularizers import L2Regularizer
from models import Model
from metrics import AccuracyMetrics
from optimizers import SGDOptimizer, Optimizer
from lr_schedules import LRConstantSchedule, LRExponentialDecaySchedule, LRCyclingSchedule
from grad_check import eval_numerical_gradient, eval_numerical_gradient_array, numerical_gradient_check_model

In [2]:
%load_ext autoreload
%autoreload 2

## Data

In [3]:
class HPData():
    def __init__(self, path_to_file):
        """ Init.
        
        Parameters
        ----------
        path_to_file : str
            Path to text file.
            
        Notes
        -----
        None
        """
        # read text file
        with open(path_to_file, 'r') as f:
            self.book_str = f.read()
        
        # str to chars
        book_data = list(self.book_str)
        # chars to unique chars
        book_chars = list(dict.fromkeys(book_data))
        
        # all chars as np
        self.book_data = np.array(book_data)
        # uniqe chars as np
        self.book_chars = np.array(book_chars)
    
    def get_encoder(self,):
        """ Returns encoder, i.e.: unique chars.

        Parameters
        ----------
        None

        Returns
        -------
        book_chars : np.ndarray of shape (n_unique_chars, )
            The encoder as np.

        Notes
        -----
        None
        """
        return self.book_chars
    
    def char_to_idx(self, char):
        """ Convert a char to an index from the encoder np array.

        Parameters
        ----------
        char : str
            A char.

        Returns
        -------
        np.ndarray
            The index repre of char, of shape (,).

        Notes
        -----
        None
        """
        return np.argwhere(char == self.book_chars).flatten()[0]
    
    def idx_to_char(self, idx):
        """ Convert an index to char in the encoder np array.

        Parameters
        ----------
        idx : int
            The index repr of a char.

        Returns
        -------
        str
            The char.

        Notes
        -----
        None
        """
        return self.book_chars[idx]
    
    def encode(self, decoding):
        """ Encode a sequence of chars into a sequence of indices based on the encoder.

        Parameters
        ----------
        chars : np.ndarray
            The sequence of chars, of shape (n_chars,)

        Returns
        -------
        encoding : np.ndarray
            The sequence of index representation of the chars, of shape (n_chars,)

        Notes
        -----
        None
        """
        encoding = []
        
        for d in decoding:
            encoding.append(self.char_to_idx(d))
            
        encoding = np.array(encoding)
        
        return encoding
    
    def decode(self, encoding):
        """ Decode a sequence of indices into a sequence of chars based on the encoder.

        Parameters
        ----------
        encoding : np.ndarray
            The sequence of index representation of the chars, of shape (n_chars,)

        Returns
        -------
        decoding : np.ndarray
            The sequence of chars, of shape (n_chars,)

        Notes
        -----
        None
        """
        decoding = []
        
        for e in encoding:
            decoding.append(self.idx_to_char(e))
            
        decoding = np.array(decoding)
        
        return decoding

In [4]:
class OneHotEncoder():
    def __init__(self, length):
        # length of one-hot encoding
        self.length = length
    
    def __call__(self, x, encode=True):
        """ Encode or decode a sequence x.

        Parameters
        ----------
        x : np.ndarray
            The sequence of index representation of chars, of shape (n_chars,)

        Returns
        -------
        e or d: np.ndarray
            The sequence of one-hot encoded vectors of chars, of shape (n_chars, length)

        Notes
        -----
        None
        """
        if encode:
            e = np.zeros((x.shape[0], self.length))
            e[np.arange(x.shape[0]), x] = 1
            return e.astype(int)
        else:
            d = np.argwhere(one_hot_encoding == 1)[:,1]
            return d.astype(int)

## Read data

Read, encode and decode data.

In [5]:
path_to_file = "data/hp/goblet_book.txt"
hpdata = HPData(path_to_file=path_to_file)
print(hpdata.get_encoder().shape)
print(hpdata.get_encoder())
x = hpdata.book_data[:200]
print(x)
encoding = hpdata.encode(x)
print(hpdata.get_encoder().shape)
print(encoding)
decoding = hpdata.decode(encoding)
print(decoding)

np.testing.assert_array_equal(decoding, x)

(80,)
['H' 'A' 'R' 'Y' ' ' 'P' 'O' 'T' 'E' 'N' 'D' 'G' 'B' 'L' 'F' 'I' '\n' 'C'
 '-' 'U' 'S' '\t' 'h' 'e' 'v' 'i' 'l' 'a' 'g' 'r' 's' 'o' 'f' 't' 'n' 'c'
 'd' '"' 'u' ',' 'b' 'm' 'y' '.' 'k' 'w' 'p' 'q' ':' "'" '!' 'x' 'M' ';'
 'j' 'W' '?' '(' ')' 'Q' 'z' 'V' 'J' 'K' 'Z' 'X' '0' '1' '6' '7' 'ü' '4'
 '3' '9' '2' '}' '_' '/' '^' '•']
['H' 'A' 'R' 'R' 'Y' ' ' 'P' 'O' 'T' 'T' 'E' 'R' ' ' 'A' 'N' 'D' ' ' 'T'
 'H' 'E' ' ' 'G' 'O' 'B' 'L' 'E' 'T' ' ' 'O' 'F' ' ' 'F' 'I' 'R' 'E' '\n'
 '\n' 'C' 'H' 'A' 'P' 'T' 'E' 'R' ' ' 'O' 'N' 'E' ' ' '-' ' ' 'T' 'H' 'E'
 ' ' 'R' 'I' 'D' 'D' 'L' 'E' ' ' 'H' 'O' 'U' 'S' 'E' '\n' '\n' '\t' 'T'
 'h' 'e' ' ' 'v' 'i' 'l' 'l' 'a' 'g' 'e' 'r' 's' ' ' 'o' 'f' ' ' 'L' 'i'
 't' 't' 'l' 'e' ' ' 'H' 'a' 'n' 'g' 'l' 'e' 'r' 'o' 'n' ' ' 's' 't' 'i'
 'l' 'l' ' ' 'c' 'a' 'l' 'l' 'e' 'd' ' ' 'i' 't' ' ' '"' 't' 'h' 'e' ' '
 'R' 'i' 'd' 'd' 'l' 'e' ' ' 'H' 'o' 'u' 's' 'e' ',' '"' ' ' 'e' 'v' 'e'
 'n' ' ' 't' 'h' 'o' 'u' 'g' 'h' ' ' 'i' 't' ' ' 'h' 'a' 'd' ' ' 'b' 'e'
 'e' 'n'

## One-ho encode and decode data

In [6]:
onehot_encoder = OneHotEncoder(length=hpdata.get_encoder().size)
one_hot_encoding = onehot_encoder(encoding, encode=True)
print(one_hot_encoding.shape)
one_hot_decoding = onehot_encoder(one_hot_encoding, encode=False)
print(one_hot_decoding.shape)

np.testing.assert_array_equal(one_hot_decoding, encoding)
print(one_hot_decoding[7])
print(one_hot_encoding[7])

print(one_hot_decoding[37])
print(one_hot_encoding[37])

(200, 80)
(200,)
6
[0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0]
17
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0]


In [7]:
x = np.array([".", "a"])
print(x)
encoding = hpdata.encode(x)
print(hpdata.get_encoder().shape)
print(encoding)
decoding = hpdata.decode(encoding)
print(decoding)

np.testing.assert_array_equal(decoding, x)

one_hot_encoding = onehot_encoder(encoding, encode=True)
print(one_hot_encoding)
print(one_hot_encoding.shape)
np.argwhere(hpdata.get_encoder() == "a")

['.' 'a']
(80,)
[43 27]
['.' 'a']
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0]]
(2, 80)


array([[27]])

## RNN and helpers

In [8]:
class TanhActivation(Activation):
    """ Tanh activation.
    Can be followed by virtually anything.
    Inherits everything from class Activation.

    Attributes
    ----------
    cache : dict
        Run-time cache of attibutes such as gradients.

    Methods
    -------
    __init__()
        Constuctor.
    forward(z)
        Activates the linear transformation of the layer, and
        forward propagates activation. Activation is tanh.
    backward(g)
        Backpropagates incoming gradient into the layer, based on the tanh activation.
    __repr__()
        Returns the string representation of class.
    """

    def __init__(self, ):
        """ Constructor.

        Parameters
        ----------
        None

        Notes
        -----
        None
        """
        super().__init__()

    def forward(self, z):
        """ Activates the linear transformation of the layer, and
        forward propagates activation. Activation is tanh.

        Parameters
        ----------
        z : numpy.ndarray
            Linear transformation of layer.
            Shape is unknown here, but will usually be
            (batch size, this layer output dim = next layer input dim)

        Returns
        -------
        numpy.ndarray
            ReLU activation.

        Notes
        -----
        None
        """
        a = np.tanh(z)
        self.cache["a"] = deepcopy(a)
        return a

    def backward(self, g_in):
        """ Backpropagates incoming gradient into the layer, based on the tanh activation.

        Parameters
        ----------
        g_in : numpy.ndarray
            Incoming gradient to the activation.
            Shape is unknown here, but will usually be
            (batch size, this layer output dim = next layer input dim)

        Returns
        -------
        numpy.ndarray
            Gradient of activation.
            Shape is unknown here, but will usually be
            (batch size, this layer output dim = next layer input dim)

        Notes
        -----
        None
        """
        a = deepcopy(self.cache["a"])
        g_out = (1 - np.power(a, 2)) * g_in
        return g_out

    def __repr__(self):
        """ Returns the string representation of class.

        Parameters
        ----------
        None

        Returns
        -------
        repr_str : str
            The string representation of the class.

        Notes
        -----
        None
        """
        repr_str = "tanh"
        return repr_str

In [9]:
def test_tanh_activation():
    
    tanh_activation = TanhActivation()
    np.random.seed(231)
    x = np.random.randn(5, 10)
    g_in = np.random.randn(*x.shape)
    fx = lambda x: TanhActivation.forward(tanh_activation, x)
    g_out_num = eval_numerical_gradient_array(fx, x, g_in)
    g_out = tanh_activation.backward(g_in)
    np.testing.assert_array_almost_equal(g_out, g_out_num, decimal=6)

    print("test_relu_activation passed")
    
test_tanh_activation()

test_relu_activation passed


In [10]:
class RNN():
    """ Many-to-many."""
    def __init__(self, in_dim, out_dim, hidden_dim, 
                 kernel_h_initializer, bias_h_initializer,
                 kernel_o_initializer, bias_o_initializer,
                 kernel_regularizer, 
                 activation_h, activation_o):
        
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hidden_dim = hidden_dim

        self.kernel_h_initializer = kernel_h_initializer
        self.bias_h_initializer = bias_h_initializer
        self.kernel_o_initializer = kernel_o_initializer
        self.bias_o_initializer = bias_o_initializer

        self.u = kernel_h_initializer.initialize(size=(in_dim, hidden_dim))
        self.w = kernel_h_initializer.initialize(size=(hidden_dim, hidden_dim))
        self.b = bias_h_initializer.initialize(size=(1, hidden_dim))
        
        self.v = kernel_o_initializer.initialize(size=(hidden_dim, out_dim))
        self.c = bias_o_initializer.initialize(size=(1, out_dim))
        
        self.kernel_regularizer = kernel_regularizer

        self.activation_h = activation_h
        self.activation_o = activation_o

        self.cache = {}
        self.grads = {}
        
        self.h_shape = (1, hidden_dim)
        self.cache["h"] = np.zeros(self.h_shape)

        self.has_learnable_params = True
    
    def forward(self, x, **params):
        h = deepcopy(self.cache["h"])
        #h = np.zeros(self.h_shape)
        self.cache["x"] = deepcopy(x)
        #h = np.zeros(self.h_shape)
        h_concat = np.zeros((x.shape[0], h.shape[1]))
        a_concat = np.zeros((x.shape[0], h.shape[1]))
        assert h.shape == (1, self.hidden_dim)
        
        for idx, x_ in enumerate(x):
            x_ = x_.reshape(1,-1)
            assert x_.shape == (1,self.in_dim)
            a = np.dot(x_, self.u) + np.dot(h, self.w) + self.b
            a_concat[idx] = a.reshape(1,-1)
            assert a.shape == (1, self.hidden_dim)
            h = self.activation_h.forward(a)
            #print(self.activation_h.cache["a"].shape)
            h_concat[idx] = deepcopy(h)
            assert h.shape == (1, self.hidden_dim)
        
        # assure good dims for backprop -> only used for 1 vector, so should be ok
        # assure good dims for backprop
        #h_concat_2 = self.activation_h.forward(a_concat)
        #print(self.activation_h.cache["a"].shape)
        #np.testing.assert_array_equal(h_concat, h_concat_2)
        self.cache["h"] = deepcopy(h)
        self.cache["h_concat"] = deepcopy(h_concat)
        self.cache["a_concat"] = deepcopy(a_concat)
        assert h_concat.shape == (x.shape[0], h.shape[1])
        o = np.dot(h_concat, self.v) + self.c
        assert o.shape == (x.shape[0], self.out_dim), f"o.shape={o.shape}"
        p = self.activation_o.forward(o)
        #print(self.activation_o.cache["a"].shape)
        
        assert p.shape == (x.shape[0], self.out_dim)
        return p
    
    def backward(self, g_in, **params):
        # x.shape = (x.shape[0], in_dim)
        x = deepcopy(self.cache["x"])
        # h_concat.shape = (x.shape[0], hidden_dim)
        h_concat = deepcopy(self.cache["h_concat"])
        a_concat = deepcopy(self.cache["a_concat"])
        
        # g_in.shape = (batch_size, )
        assert g_in.shape == (x.shape[0], ), f"g_in.shape={g_in.shape}"
        # g_a_o.shape = (batch_size, out_dim)
        g_a_o = self.activation_o.backward(g_in)
        assert g_a_o.shape == (x.shape[0], self.out_dim)
        
        # g_h_concat.shape = (batch_size, hidden_dim)
        g_h_concat = np.zeros((x.shape[0], self.hidden_dim))
        
        # v.shape = (hidden_dim, out_dim)
        # (1,hidden_dim) = (1,out_dim) * (hidden_dim, out_dim).T
        g_h_concat[-1] = np.dot(g_a_o[-1].reshape(1,-1), self.v.T)
        assert np.dot(g_a_o[-1].reshape(1,-1), self.v.T).shape == (1,self.hidden_dim)
        
        g_a = np.zeros((x.shape[0], self.hidden_dim))
        # (1, hidden_dim) = (1, hidden_dim) * (1, hidden_dim)
        # change cache
        _ = self.activation_h.forward(a_concat[-1].reshape(1,-1))
        g_a[-1] = self.activation_h.backward(g_h_concat[-1]).reshape(1,-1)
        assert self.activation_h.backward(g_h_concat[-1].reshape(1,-1)).shape == (1, self.hidden_dim)
        
        for t in reversed(range(x.shape[0]-1)):
            # (1,hidden_dim) = (1,out_dim) * (hidden_dim, out_dim).T
            # \+ (1,hidden_dim) * (hidden_dim, hidden_dim), maybe w.T?
            g_h_concat[t] = np.dot(g_a_o[t].reshape(1,-1), self.v.T) \
                + np.dot(g_a[t+1].reshape(1,-1), self.w)
            # change cache
            _ = self.activation_h.forward(a_concat[t].reshape(1,-1))
            g_a[t] = self.activation_h.backward(g_h_concat[t])
            assert self.activation_h.backward(g_h_concat[t]).shape == (1, self.hidden_dim)
        
        #print(g_h_concat)
        assert g_h_concat.shape == (x.shape[0], self.hidden_dim)
        assert g_a.shape == (x.shape[0], self.hidden_dim)
        
        # (hidden_dim, out_dim) = (x.shape[0], hidden_dim).T * (x.shape[0], out_dim)
        g_v = np.dot(h_concat.T, g_a_o)
        assert g_v.shape == (self.hidden_dim, self.out_dim)
        self.grads["dv"] = deepcopy(g_v)
        
        # Auxiliar h matrix that includes h_prev
        h_aux = np.zeros(h_concat.shape)
        #h_init = np.zeros((1, self.hidden_dim))
        #h_aux[0, :] = h_init
        h_aux[0] = h_concat[-1].reshape(1,-1)
        h_aux[1:] = h_concat[0:-1]
        assert h_aux.shape == (x.shape[0], self.hidden_dim)
        
        # (hidden_dim, hidden_dim) = (x.shape[0], hidden_dim).T * (x.shape[0], hidden_dim)
        g_w = np.dot(h_aux.T, g_a)
        assert g_w.shape == (self.hidden_dim, self.hidden_dim)
        self.grads["dw"] = deepcopy(g_w)
        
        # (in_dim, hidden_dim) = (x.shape[0], in_dim).T * (x.shape[0], hidden_dim)
        g_u = np.dot(x.T, g_a)
        assert g_u.shape == (self.in_dim, self.hidden_dim)
        self.grads["du"] = deepcopy(g_u)
        
        # (1, hidden_dim) = sum((x.shape[0], self.hidden_dim), axis=0)
        g_b = np.sum(g_a, axis=0).reshape(1,-1)
        assert g_b.shape == (1, self.hidden_dim), f"g_b.shape={g_b.shape}"
        self.grads["db"] = deepcopy(g_b)
        
        # (1, out_dim) = sum((x.shape[0], self.out_dim), axis=0)
        g_c = np.sum(g_a_o, axis=0).reshape(1,-1)
        assert g_c.shape == (1, self.out_dim)
        self.grads["dc"] = deepcopy(g_c)
        
        # compute downstream grad!
        return None
        
    def if_has_learnable_params(self, ):    
        return self.has_learnable_params
    
    def get_u(self, ):
        return deepcopy(self.u)

    def get_w(self, ):
        return deepcopy(self.w)
    
    def get_b(self, ):
        return deepcopy(self.b)
    
    def get_v(self, ):
        return deepcopy(self.v)
    
    def get_c(self, ):
        return deepcopy(self.c)

    def get_learnable_params(self):
        return {
            "u": self.get_u(), "w": self.get_w(), "b": self.get_b(), 
            "v": self.get_v(), "c": self.get_c()
        }
    
    
    def set_u(self, u):
        self.u = deepcopy(u)

    def set_w(self, w):
        self.w = deepcopy(w)
    
    def set_b(self, b):
        self.b = deepcopy(b)
    
    def set_v(self, v):
        self.v = deepcopy(v)
    
    def set_c(self, c):
        self.c = deepcopy(c)

    def set_learnable_params(self, **learnable_params):
        self.set_u(learnable_params["u"])
        self.set_w(learnable_params["w"])
        self.set_b(learnable_params["b"])
        self.set_v(learnable_params["v"])
        self.set_c(learnable_params["c"])

    def get_du(self, ):
        if "du" in self.grads.keys():
            du = self.grads["du"]
            ret = deepcopy(du)
        else:
            ret = None

        return ret
    
    def get_dw(self, ):
        if "dw" in self.grads.keys():
            dw = self.grads["dw"]
            ret = deepcopy(dw)
        else:
            ret = None

        return ret

    def get_db(self, ):
        if "db" in self.grads.keys():
            db = self.grads["db"]
            ret = deepcopy(db)
        else:
            ret = None

        return ret
    
    def get_dv(self, ):
        if "dv" in self.grads.keys():
            dv = self.grads["dv"]
            ret = deepcopy(dv)
        else:
            ret = None

        return ret
    
    def get_dc(self, ):
        if "dc" in self.grads.keys():
            dc = self.grads["dc"]
            ret = deepcopy(dc)
        else:
            ret = None

        return ret

    def get_learnable_params_grads(self):
        return {
            "du": self.get_du(), "dw": self.get_dw(), "db": self.get_db(),
            "dv": self.get_dv(), "dc": self.get_dc()
        }
    
    def if_has_learnable_params(self, ):
        return self.has_learnable_params
        
    def get_reg_loss(self, ):
        return 0.0
    
    def __repr__(self, ):
        repr_str = "rnn: \n" \
                   + f"\t shape -- in: {self.in_dim}, out: {self.out_dim}, hidden: {self.hidden_dim}\n" \
                   + "\t u -- init: " + self.kernel_h_initializer.__repr__() + "\n" \
                    + "\t w -- init: " + self.kernel_h_initializer.__repr__() + "\n" \
                    + "\t b -- init: " + self.bias_h_initializer.__repr__() + "\n" \
                    + "\t v -- init: " + self.kernel_o_initializer.__repr__() + "\n" \
                    + "\t c -- init: " + self.bias_o_initializer.__repr__() + "\n" \
                   + ", reg: " + self.kernel_regularizer.__repr__() + "\n" \
                   + "\t activation: \n \t hidden: " + self.activation_h.__repr__() \
                    + "\t out: " + self.activation_o.__repr__() + "\n"
        return repr_str
    
    
class Synhthetizer():
    def __init__(self, rnn, onehot_encoder):
        self.rnn = rnn
        self.onehot_encoder = onehot_encoder
        self.h_concat = np.zeros(rnn.h_shape)
    
    def sample(self, lenght, p):
        # select character from softmax weighted dist over all chars
        return np.random.choice(range(lenght), size=1, replace=True, p=p.flatten())
        
    
    def __call__(self, ts, init_idx):
        
        x = self.onehot_encoder(np.array([init_idx]).T, encode=True)
        #print(x.shape)
        assert x.shape == (1, self.onehot_encoder.length)
        sequence = []
        
        for t in range(ts):
            p = rnn.forward(x)
            x_idx = self.sample(lenght=x.shape[1], p=p)
            sequence.append(x_idx)
            x = self.onehot_encoder(np.array([x_idx]).T, encode=True)
    
        return np.array(sequence)

### Grad test

Dummy

In [11]:
init_params = {"coeff": 1.0, "mean": 0.0, "std": 0.01}
kernel_h_initializer = NormalInitializer(seed=None, **init_params)
bias_h_initializer = NormalInitializer(seed=None, **init_params)
kernel_o_initializer = NormalInitializer(seed=None, **init_params)
bias_o_initializer = NormalInitializer(seed=None, **init_params)
kernel_regularizer = None

num_inputs = 10
size = (num_inputs, hpdata.get_encoder().size)
x = np.eye(hpdata.get_encoder().size)
x = x[np.random.choice(x.shape[0], size=num_inputs)].astype(int)
y = np.random.randint(hpdata.get_encoder().size, size=num_inputs)

loss = CategoricalCrossEntropyLoss()

rnn = RNN(in_dim=hpdata.get_encoder().size, out_dim=hpdata.get_encoder().size, hidden_dim=5, 
          kernel_h_initializer=kernel_h_initializer, 
          bias_h_initializer=bias_h_initializer, 
          kernel_o_initializer=kernel_o_initializer, 
          bias_o_initializer=bias_o_initializer, 
          kernel_regularizer=kernel_regularizer, 
          activation_h=TanhActivation(),
          activation_o=SoftmaxActivation())

print(rnn)

layers = [rnn]
model = Model(layers)

numerical_gradient_check_model(x, y, model, loss)

rnn: 
	 shape -- in: 80, out: 80, hidden: 5
	 u -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 w -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 b -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 v -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 c -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
, reg: None
	 activation: 
 	 hidden: tanh	 out: softmax

layer=0, param_name=u
max rel error=0.9902625630784302
layer=0, param_name=w
max rel error=0.2121568876767365
layer=0, param_name=b
max rel error=0.2051437395117254
layer=0, param_name=v
max rel error=0.14939966942440566
layer=0, param_name=c
max rel error=2.12320935038339e-06
test_grad_check passed


real

In [12]:
batch_size = 25
x_chars = hpdata.book_data[:batch_size]
y_chars = hpdata.book_data[1:batch_size+1]
x_encoding = hpdata.encode(x_chars)
y_encoding = hpdata.encode(y_chars)
onehot_encoder = OneHotEncoder(length=hpdata.get_encoder().size)
x_train = onehot_encoder(x_encoding, encode=True)
y_train = y_encoding

init_params = {"coeff": 1.0, "mean": 0.0, "std": 0.01}
kernel_h_initializer = NormalInitializer(seed=None, **init_params)
bias_h_initializer = NormalInitializer(seed=None, **init_params)
kernel_o_initializer = NormalInitializer(seed=None, **init_params)
bias_o_initializer = NormalInitializer(seed=None, **init_params)
kernel_regularizer = None

num_inputs = batch_size

loss = CategoricalCrossEntropyLoss()

rnn = RNN(in_dim=hpdata.get_encoder().size, out_dim=hpdata.get_encoder().size, hidden_dim=5, 
          kernel_h_initializer=kernel_h_initializer, 
          bias_h_initializer=bias_h_initializer, 
          kernel_o_initializer=kernel_o_initializer, 
          bias_o_initializer=bias_o_initializer, 
          kernel_regularizer=kernel_regularizer, 
          activation_h=TanhActivation(),
          activation_o=SoftmaxActivation())

print(rnn)

layers = [rnn]
model = Model(layers)

numerical_gradient_check_model(x_train, y_train, model, loss)

rnn: 
	 shape -- in: 80, out: 80, hidden: 5
	 u -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 w -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 b -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 v -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 c -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
, reg: None
	 activation: 
 	 hidden: tanh	 out: softmax

layer=0, param_name=u
max rel error=0.9960622668068563
layer=0, param_name=w
max rel error=0.21448176494393179
layer=0, param_name=b
max rel error=0.18428569322070407
layer=0, param_name=v
max rel error=0.04076619127733704
layer=0, param_name=c
max rel error=6.279491733131833e-07
test_grad_check passed


In [13]:
class AdaGradOptimizer(Optimizer):
    """ Stochastic gradient descent optimizer.

    Attributes
    ----------
    lr_schedule : LRSchedule
        The learning rate schedule of the optimizer.
    lr : float
        The latest learning rate.

    Methods
    -------
    __init__()
        Constructor.
    apply_lr_schedule()
        Applies the learning rate schedule of the optimizer.
    get_lr()
        Returns the latest learning rate of the optimizer's learning rate schedule.
    apply_grads(trainable_params, grads)
        Applies the gradient update rule to trainable params using gradients.
    """

    def __init__(self, lr_schedule, epsilon=1e-6):
        """ Constructor.
        Inherits everything from the Optimizer class.

        Parameters
        ----------
        lr_schedule : LRSchedule
            The learning rate schedule of the optimizer.

        Notes
        -----
        None
        """
        repr_str = f"sgd with {lr_schedule.__repr__()}"
        super().__init__(lr_schedule, repr_str)
        self.first_call = True
        self.epsilon = epsilon
        self.cache = []
        
    def build_cache(self, trainable_params, grads):
        
        for idx in range(len(trainable_params)):
            param_dict = deepcopy(trainable_params[idx])
            grad_dict = deepcopy(grads[idx])
            m_dict = {}
            for p, g in zip(param_dict, grad_dict):
                m_dict[p] = np.zeros(param_dict[p].shape)
            self.cache.append(m_dict)
            
    def update_cache(self, trainable_params, grads):
        
        # asset not empty
        assert self.cache
        
        for idx in range(len(trainable_params)):
            param_dict = deepcopy(trainable_params[idx])
            grad_dict = deepcopy(grads[idx])
            m_dict = deepcopy(self.cache[idx])
            
            for p, g in zip(param_dict, grad_dict):
                m_dict[p] += np.power(grad_dict[g], 2)
            
            self.cache[idx] = deepcopy(m_dict)
            
    def get_opt_grad(self, trainable_params, grads):
        # asset not empty
        assert self.cache
        
        opt_grads = deepcopy(grads)
        
        for idx in range(len(trainable_params)):
            param_dict = deepcopy(trainable_params[idx])
            grad_dict = deepcopy(grads[idx])
            m_dict = deepcopy(self.cache[idx])
            
            for p, g in zip(param_dict, grad_dict):
                opt_grads[idx][g] = grad_dict[g] / np.sqrt(m_dict[p] + self.epsilon)
        
        return deepcopy(opt_grads)
                
    
    def apply_grads(self, trainable_params, grads):
        """ Applies the gradient update rule to trainable params using gradients.

        Parameters
        ----------
        trainable_params : list
            The list of dictionaries of the trainable parameters of all layers of a model.
            At idx is the dictionary of trainable parameters of layer idx in the Model.layers list.
            A list has two keys - w and b.

        grads : list
            The list of dictionaries of gradients of all parameters of all layers of a model.
            At idx is the dictionary of gradients of layer idx in the Model.layers list.
            A list has two keys - dw and db.

        Returns
        -------
        updated_trainable_params : list
            The list of dictionaries of the updated trainable parameters of all layers of a model.
            At idx is the dictionary of the updated trainable parameters of layer idx
            in the Model.layers list.
            A list has two keys - w and b.

        Notes
        -----
        Iterates over layers in ascending order in the Model.layers list.

        Raises
        ------
        AssertionError
            If the lengths of trainable_weights and grads lists are not the same.
        """
        updated_trainable_params = deepcopy(trainable_params)

        assert len(trainable_params) == len(grads)
        
        if self.first_call:
            self.first_call = False
            self.build_cache(trainable_params, grads)
        
        self.update_cache(trainable_params, grads)
        opt_grads = self.get_opt_grad(trainable_params, grads)

        for idx in range(len(trainable_params)):
            param_dict = deepcopy(trainable_params[idx])
            grad_dict = deepcopy(grads[idx])
            opt_grad_dict = deepcopy(opt_grads[idx])

            for p, g in zip(param_dict, grad_dict):
                updated_trainable_params[idx][p] = param_dict[p] - self.lr * opt_grad_dict[g]

        return deepcopy(updated_trainable_params)

In [14]:
class GradClipper():
    def __init__(self, repr_str):
        self.repr_str = repr_str 
    
    def apply(self, grads_val):
        raise NotImplementedError
    
    def __call__(self, grads):
        # grads is a list of dicts, where each list is for a layer
        # and a dict is for the params' grads in that layer
        clipped_grads = deepcopy(grads)

        for idx in range(len(grads)):
            grad_dict = deepcopy(grads[idx])

            for g in grad_dict:
                clipped_grads[idx][g] = self.apply(grad_dict[g])

        return deepcopy(clipped_grads)
    
    def __repr_(self,):
        return self.repr_str

class GradClipperByValue(GradClipper):
    def __init__(self, **kwargs):
        repr_str = "clipper by value"
        super().__init__(repr_str)
        self.val = kwargs["val"]
        
    def apply(self, grad_val):
        return np.maximum(np.minimum(grad_val, self.val), -self.val)

In [16]:
def test_grad_clipper_by_value():    
    
    val = 5
    kwargs = {"val": val}
    
    a = np.random.normal(loc=0, scale=val*1.2, size=(5,10))
    b = np.random.normal(loc=0, scale=1.2, size=(5,10))
    grads = [{"a":a, "b":b}]
    
    gc = GradClipperByValue(**kwargs)
    gc_grads = gc(grads)
    
    for idx, grads_dict in enumerate(grads):
        for grad_key,grad in grads_dict.items():
            low_mask = grad < -val
            high_mask = val < grad
            np.testing.assert_array_equal(low_mask, gc_grads[idx][grad_key] == -val)
            np.testing.assert_array_equal(high_mask, gc_grads[idx][grad_key] == val)
            
    print("test_grad_clipper_by_value passed")
    
test_grad_clipper_by_value()

test_grad_clipper_by_value passed


### Train

In [17]:
x_chars = hpdata.book_data
y_chars = hpdata.book_data
x_encoding = hpdata.encode(x_chars)
y_encoding = hpdata.encode(y_chars)
onehot_encoder = OneHotEncoder(length=hpdata.get_encoder().size)
x_train = onehot_encoder(x_encoding, encode=True)
#y_train = onehot_encoder(y_encoding, encode=True)
y_train = y_encoding
#print(x_train.shape)
#print(y_train.shape)

### Shitty train

In [None]:
print(x_train.shape)
print(y_train.shape)

init_params = {"coeff": 1.0, "mean": 0.0, "std": 0.01}
kernel_h_initializer = NormalInitializer(seed=None, **init_params)
bias_h_initializer = NormalInitializer(seed=None, **init_params)
kernel_o_initializer = NormalInitializer(seed=None, **init_params)
bias_o_initializer = NormalInitializer(seed=None, **init_params)
kernel_regularizer = None

num_inputs = batch_size

loss = CategoricalCrossEntropyLoss()

rnn = RNN(in_dim=hpdata.get_encoder().size, out_dim=hpdata.get_encoder().size, hidden_dim=5, 
          kernel_h_initializer=kernel_h_initializer, 
          bias_h_initializer=bias_h_initializer, 
          kernel_o_initializer=kernel_o_initializer, 
          bias_o_initializer=bias_o_initializer, 
          kernel_regularizer=kernel_regularizer, 
          activation_h=TanhActivation(),
          activation_o=SoftmaxActivation())


loss = CategoricalCrossEntropyLoss()
lr_initial=0.01
#optimizer = SGDOptimizer(lr_schedule=LRConstantSchedule(lr_initial))
optimizer = AdaGradOptimizer(lr_schedule=LRConstantSchedule(lr_initial))
n_epochs = 5

batch_size = 25
n_batches = int(hpdata.book_data.shape[0] / batch_size)

n_steps = n_epochs * n_batches
n_step = 1

losses_register = []

for n_epoch in range(n_epochs):
    print(f"starting epoch: {n_epoch + 1} ...")
    batches = tqdm(range(n_batches))
    for b in batches:
        batches.set_description(f"batch {b + 1}/{n_batches}")
        x_batch = x_train[b * batch_size:(b + 1) * batch_size]
        y_batch = y_train[b * batch_size + 1:(b + 1) * batch_size + 1]
        y_batch = y_encoding[b * batch_size + 1:(b + 1) * batch_size + 1]

        if y_batch.shape[0] < batch_size:
            continue
        
        scores = rnn.forward(x_batch)
        data_loss = loss.compute_loss(scores, y_batch)
        losses_register.append(data_loss)
        
        params_train = {"mode": "train", "seed": None}
        rnn.backward(loss.grad(), **params_train)
        
        trainable_params=rnn.get_learnable_params()
        grads=rnn.get_learnable_params_grads()

        for k,v in trainable_params.items():
            trainable_params[k] = deepcopy(v - lr_initial * np.maximum(np.minimum(grads["d"+k], 5), -5))

        rnn.set_learnable_params(**trainable_params)
        if n_step % 1000 == 0:
            print(f"n_step={n_step+1}/{n_steps}, ave loss={np.array(losses_register).sum()/1000}")
            losses_register = []
        n_step += 1

### Good train

In [18]:
print(x_train.shape)
print(y_train.shape)

init_params = {"coeff": 1.0, "mean": 0.0, "std": 0.01}
kernel_h_initializer = NormalInitializer(seed=None, **init_params)
bias_h_initializer = NormalInitializer(seed=None, **init_params)
kernel_o_initializer = NormalInitializer(seed=None, **init_params)
bias_o_initializer = NormalInitializer(seed=None, **init_params)
kernel_regularizer = None

rnn = RNN(in_dim=hpdata.get_encoder().size, out_dim=hpdata.get_encoder().size, hidden_dim=100, 
          kernel_h_initializer=kernel_h_initializer, 
          bias_h_initializer=bias_h_initializer, 
          kernel_o_initializer=kernel_o_initializer, 
          bias_o_initializer=bias_o_initializer, 
          kernel_regularizer=kernel_regularizer, 
          activation_h=TanhActivation(),
          activation_o=SoftmaxActivation())

layers = [rnn]

model = Model(layers)

loss = CategoricalCrossEntropyLoss()
lr_initial = 0.1
#optimizer = SGDOptimizer(lr_schedule=LRConstantSchedule(lr_initial))
optimizer = AdaGradOptimizer(lr_schedule=LRConstantSchedule(lr_initial))

n_epochs = 7
batch_size = 25

metrics = [AccuracyMetrics()]

model.compile_model(optimizer, loss, metrics)
print(model)

verbose = 2

synhthetizer = Synhthetizer(rnn, onehot_encoder)
ts = 500
#sequence= synhthetizer(ts=2000, init_idx=hpdata.encode(np.array(["."]))[0])
#print("".join(hpdata.decode(sequence.flatten())))

synth_params = {"synhthetizer" : synhthetizer, "ts" : ts, "hpdata": hpdata}

history = model.fit2(x_train, y_train, n_epochs, batch_size, verbose, **synth_params)

(1107542, 80)
(1107542,)
model summary: 
layer 0: rnn: 
	 shape -- in: 80, out: 80, hidden: 100
	 u -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 w -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 b -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 v -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
	 c -- init: normal ~ 1.000000 x N(0.000000, 0.010000^2)
, reg: None
	 activation: 
 	 hidden: tanh	 out: softmax

categorical cross-entropy loss
sgd with constant lr schedule

starting epoch: 1 ...
batch 1000/44301:   2%|▏         | 989/44301 [00:06<04:19, 166.69it/s]
n_step=1001/310107, ave loss=3.9823243795188596



 "e hanma mvafer aR, samsg the bite o,,.
	Tbo had o- wcanTo  theiirmek  s atjth tihed heinc fesirrd hilid ualtathe.qan. for heredtaras, htan thiLeherih hrof thee anled
2 sassr atd h oremd ae tri-e,.aed fud rphis,.hal tereraIiyith an fotrunth at wof nook cche tseuld he movlis. aaic"ota wiseuitamilre. tovaylvadr wan bid aemydec ounvatroo o lom orhaob thed

batch 14000/44301:  32%|███▏      | 13989/44301 [01:26<02:39, 189.69it/s]
n_step=14001/310107, ave loss=2.3888098965393247




"Yus" notid cher o the muarsen eatre vosk mid if hiwopfins whorere the, onr, on thos. -
	
 Penme at ro thiopcinlirte slealk thindas Anr soou, lourty int eriimn ous bereiod ctend doniny, he'e and gong Hy!" Ta gacinted usclloirred thet ary moh whas tharrey - eit," sRuly e peyt the thuga the pavettpin berk whinp'lg hace ae. beld hat shirt oono e winged teley, A'ns biown ans hle ransey bkery Magen tlaroind tanl ak ghikedto nrenf thraugpope ersy pourleomed Harg,"t. Crosh Thofing thontin dolrbownl o 


batch 15000/44301:  34%|███▍      | 14995/44301 [01:32<02:58, 164.61it/s]
n_step=15001/310107, ave loss=2.3905730141834054



  Hons Sorenpig rom rom  Ax everld deltuntkim fofn, roocney ha uscaoke stire pumuxgply.
"Yo g rof the nuiclingand Hevak skis thof Hatte gar prainio ghn Mod.  I yas fvrave Car an schiuvere Tutbed." ta g teves thes in ther sarsuims and Harst cothi

batch 27000/44301:  61%|██████    | 26990/44301 [02:45<01:40, 172.65it/s]
n_step=27001/310107, ave loss=2.290148538911342



" Hasly and anl and Halnseying and at se - the - Haptre got s veromteayerop.  Haing pituas hagnte nkeach arle (Ham?". . the sca thace to t"Yit quiddn's wo gisthile sapaplethons Fid agid thy wertarry, se.  seres af andrady, watilrin wep celkjrecoppuldot  "NHladis, bas.  Tof se'm imdo'e po Maslyas sext bevarfinco Hrot sirry-nimke sy, qully of reast sint ath Ball. .  ousd.  heak in huw hnimen.."
"RselkinCne,.
"Arad cevepurtitso, tha ant comos - therou bemor. "I barrtyorg, ."
Marrs aHles wamt ste.
"


batch 28000/44301:  63%|██████▎   | 27988/44301 [02:51<01:25, 191.18it/s]
n_step=28001/310107, ave loss=2.2878686936829022




_bereds. ."
SHais't ind, latinch the paly chor aSmett hrirche ris lexhe ton foved hoomeam tof Kry.
"The ury shul. Mzerlelte ludshibiupe the sinding eredtcers harch.
"I "fmes sleorep smethoond, . . .  Idor, Has an- shu he tend whilkme belel. . fh

batch 40000/44301:  90%|█████████ | 39996/44301 [04:02<00:24, 178.27it/s]
n_step=40001/310107, ave loss=2.2134218488104396



... .  He mouglad armangt mint. nDering, whit arry and pear ceded not fove aw I his siveed on andlal... .  Anth the He mofesayoumrs's  Wherele fo Vomth wered rice I yst pad lacte, I: . he Cnelly munks dece's poud pig aver wirmrirk Vond riled ervellond there, we'te rotecceph she the . hare ore he hit wate. . hud gexlled har be has -"Os the ther ing file, mone rorn say, the moot yas o the stidrirghthas jus-, loondamed frack ofthelcedg .
"Hatnere laglyer, toir saict yricted ment song los's laberned


batch 41000/44301:  93%|█████████▎| 40990/44301 [04:07<00:17, 185.12it/s]
n_step=41001/310107, ave loss=2.1765045383259265



 wouid wery Vodly Dare ave was mentlon angt ald Ithrofe to gut deevidgesis fouer yily oref seraing . wa damomar veucer fevasa Cfackef elaf bllyqulr biring, wouikd ath artm. che, houd a winsd antte.  onrave of wald, t anley; watf foonid colgopeth

batch 8699/44301:  20%|█▉        | 8684/44301 [00:43<03:35, 165.63it/s]
n_step=53001/310107, ave loss=2.208102552644556




"The she arly us fost intont hat and whe the rokes the said ancops of beeds curkkn waire sito har fnaied whland nover thees Mriafplar whis tare tin dest Woetobl whiiysave barcong ary fryiedpliy'l her tey rol at ared My ine sishiing ethed Devofen anciRg pule scall.  Mrer thoy he thand evasardg cewed welll'."   Ared" of thit.
"Thapacko sin fappoe as," sed at Bos ecoppriot shot  a Mreve and ston'l the moeshn.   I sout is thed scoole Duiremer the  samph He, peben crair," sare s. .
"Wyoued Grearvy d


batch 9699/44301:  22%|██▏       | 9691/44301 [00:49<03:25, 168.70it/s]
n_step=54001/310107, ave loss=2.212282604260211



"
Mrer farfore-d ons stith sighry bereys theve, . wERWewhad seth sing at. Hering wadlly he fayson. "
	"Wye "Id," Roucteleazn, to thitice, aragh,."
"I at nteany de." Ceirry, ared ot thatisses Doikging Crely. - Mofaledlf was kene'sh y thiore, ap ondyone

batch 21699/44301:  49%|████▉     | 21680/44301 [02:04<02:05, 179.76it/s]
n_step=66001/310107, ave loss=2.169036618836812



 .  teearst broreng.  gtonespracon! . sharme fol, ther - Keio, ount. .
"
IER_Ozor; blaverrofwh' the dmhowe ferigwing breins, Ron y smourdam ghored erote oo.  Breut haid - whe miths," ourick Dofasedomoutat, fhay's whas sed had eom dthe ud low! ding feet dires us lhid of hendorte coro-s, thas ste am hesded gall Snard sore sat uss got " terand, youm to hag thid shand to dad miik, fane on fenting. ..  "Ssoy rord hery. .  Bus oing siwte ther bentes and cood sh-. .
nnoir the gocs, on whe he he soedes 


batch 22699/44301:  51%|█████     | 22695/44301 [02:10<02:01, 177.24it/s]
n_step=67001/310107, ave loss=2.1569842851215353



"
"Ne Re unn." Dom stist hill taleptod jutholk therirss laked tlext tash shat apach he He fiad grysaph it afwe of vedin't  Ro, baind a gen the a beed of and peinternavoater and ook, e bared."
3Le He tingrey Jug opaitaorling.  Overeardid wis squt 

batch 34699/44301:  78%|███████▊  | 34679/44301 [03:20<00:48, 200.04it/s]
n_step=79001/310107, ave loss=2.1459757930055168




"Ead lefjung of to brustpincigwar ill ouire Homgin glidd sal't ars'tingechy.
"Has lookon dobred into pis, Haref'ter.  Thol of sattilem nout stos o garaon!" the sealits teeart," Rning'm chy, easroul wear?"
"Wow as doif woamirones oulnge anch tan't ryone mesapp fredpo glell counging alenfl't ther parcingpit oth got, ung acing. le.. ."
Hazored looincle. .  rinch Dolking noif the Srighell a gufteed of urnaus to - Hourarde ho tad Man what ad hagly Prlis watpeate lulp oully-on Ficent Mowen ard tent t


batch 35699/44301:  81%|████████  | 35687/44301 [03:25<00:43, 198.89it/s]
n_step=80001/310107, ave loss=2.131777979015081



"
Had Bfekenaw.  I Hare Macteting. sa Bill draon berfchot yitning madifred fere fofh. his as as mromem..  "Ile to he rerledon.  Harry, dom -"De buid bertew mous he waint's Aned cench rokers.  There Heprant the Roon Sorch, paodle werlofned ais luo

batch 3398/44301:   8%|▊         | 3395/44301 [00:16<03:21, 203.25it/s]
n_step=92001/310107, ave loss=2.165994297130195



  Ro'd Clopeftacted gowene bul theed otn it, Herry ave ork and mele's to beeizleer.
"No to cepinte.  The ind sciosil, to Sled.  "Hagor barly no doped?" "E le tery her gaamtpiruuce, woudlet pent ak terig choutter eshm gy luce t. n'vere Fredpe witt fut sbots thagoweardste sarny wait.  "I yin noo Wlaeve Gars ass non ured mon.  alllone."
	"He Id I the, Heing -. Wansly, is you amea eple.  Theris and tare.
"Lost Vhen bed.  Hasy ap sty nont sared!" Dus the, Hand Creve chittesin.
	"I.
Thers Pongent wmou


batch 4398/44301:  10%|▉         | 4377/44301 [00:21<03:15, 204.64it/s]
n_step=93001/310107, ave loss=2.1598775792737186



 Whade ofh he gor't theent, he facce sabeif bidgrikd wall in's tor metened Erowt youth on a dod owke on harrying, niclanceetesp bock bue dearick, ritped she greardne a hleel seaght.  Derns, forpeingriun, chient sare'tif mome of athen his shis ver, wo

batch 16398/44301:  37%|███▋      | 16384/44301 [01:20<02:20, 198.37it/s]
n_step=105001/310107, ave loss=2.1566566002077048



"
"Dith thas hid peainghef ferk ath wath shiiint a intiondput did ing vaxceins Hardming doder feam thad Krit, tantrethap the se to
"
"I'rnus clot. "
W'R	WiFlreumNood Mas saveusly his craiker pill chither, a reirko ig.
"Youd icke py -" stube, nad, I ham shed feacd Threde svread pents Hand be on wourgs, lamgers!"
Now a ghe day botire in he tesh Ming croin hung sareshime.
"Badd macud the miaclione this nowled atto ligh'd datar, row stows hion, warrey," s Ohe win in"
The dthig thow ind then- panten 


batch 17398/44301:  39%|███▉      | 17385/44301 [01:26<02:39, 169.26it/s]
n_step=106001/310107, ave loss=2.1630493621770825




"Wit
Hee froud of sexteye whad yay tHe the you verkees ag fokgart for wint eald. Urgaper Hacl'd?" that ut you mimene tuthed montir - Harls avered no tong ong wuld feall, he a "sigh thendid qouself nomeel's.  ""Wout, che he scanore dublarintee

batch 29398/44301:  66%|██████▋   | 29388/44301 [02:33<01:14, 201.09it/s]
n_step=118001/310107, ave loss=2.1689765002442543



  I., Werre sceadd whot ind ton miokessey so. . .  Harry, the ante bealg would.
She minteetart nowto mere thh ont fadecunt the carid ha the to roud the liem.
""Ad yin've wand anglonss.:.  Harry of abonssom Gulg herou," savland.
"Thare rerete den'te gf, ottele, moch, at he im otr If at to boinge ata yout theng the slafyed of and wersy.
Ker on Crocking, hir Sktin coffrildoosing that's in ank wor porss Daun sough tatoRs Sdint here with at Karnim taby hit cubretry, (on ""Dare dem ged hery o doury ba


batch 30398/44301:  69%|██████▊   | 30379/44301 [02:38<01:13, 190.31it/s]
n_step=119001/310107, ave loss=2.1401429579455153



. . Harry this lrominedben demly wombloum chared sile Harry no ther hery wasallouer gherwig, .  Her, ow, oul wacid barby hoid Heve wery fop he cefph rued ing whe so weviin squs bach and frig salk wagof but youm thhe yer miaitete, whiiy, arry h

batch 42398/44301:  96%|█████████▌| 42385/44301 [03:40<00:09, 201.12it/s]
n_step=131001/310107, ave loss=2.088819596175412




Wined fpaiktarf hevim, a daree bunmon.
"Steris.  The bunnt ach'id the was his she, Werily Drafyored bop ot benal hs werstroo.  Whe undeled wit yicoongin wis, at thy cloud and Mave stroiacer hed cheall tene him.  Harry.  Avy."
are singe frhe reage.
"Merdis.
"Do,dodm I sath, his" heres har Bus ay it as the the hen's whopled Harry panetst laviny Dokd on tis that thas ho andidy, wam ghan ,"
"I wery hit hizKasch amtar to has pacasne.  Thous us bried theas an's thoud erusailed as has aged, to wamnigh


batch 43398/44301:  98%|█████████▊| 43391/44301 [03:45<00:04, 201.81it/s]
n_step=132001/310107, ave loss=2.089197693748906



"
S a und 'is pedar the Cardind.
"he to thtis igMing te as woraakells. . .  I. . Cure, we cranto de hon at, ."
HRo, arry.  "I sicne, on ssparsgint ep roixt apst fotriun enten, am be tof ntich lo, diles whid hat waram.
"SUred velveet.  ind reked 

batch 11097/44301:  25%|██▌       | 11076/44301 [00:55<02:43, 202.95it/s]
n_step=144001/310107, ave loss=2.170676625133851



 .
"Verrigh sat tay a dount wou jusht to care, roun a side whay stime -Wort sciond and of bloont, the is and sendst int'y," ild at for!" ha peagelking stel mewly shidou out ditplestertun've ofley!  Mnte stracu,"s and ot hiit ste than, rhote is their erved saisarvir.
Tham tho a d wores eespolp cow, bioney ay inch's tat the dow booked card Hoo ad lig) wand at up coll!" said pamth daker hiiran ther.  Ninte the is tof sockng. Womat theid a a diter matirgad cull at Rneveacly. "That of at ard s. He ko


batch 12097/44301:  27%|██▋       | 12096/44301 [01:00<02:29, 214.78it/s]
n_step=145001/310107, ave loss=2.1681197669314622




ok a prigk thous the,.  Brey vefizd has foolc, - havery ous hasl yone.  "Hadlletesaed rayiln falt ir Gergharfointing had  Mund tho scevicred pedre cofing tut Harry'uthing tliis," soull.  Tucr dow's wizLke blang.
	Hages to wand bale int hat ras

batch 24097/44301:  54%|█████▍    | 24079/44301 [01:59<01:34, 213.48it/s]
n_step=157001/310107, ave loss=2.1245360784444913



  Hourt he Biing snacaethed Bower ther her it is," sam umacd?" I ples; I a Till yom,"ser Pore ghoing it of doonedry.
Jegors.
. Lore billy. .  "The Haten's wam.t was wereseag fant to konn, ligsest weresplimint birtiksting.  Orefrvin?" te she, gopessioner te at bers foutes are bense ethe tele wann thorky's souldostnagh his oup theth a lace nalt emore 'svoung ound the the ett ald whesparot hinickien, tericpread hium of hies thas miwnosty, bled spotse.  wand intir?"  ore onestitteagnss tulle.  Ot sc


batch 25097/44301:  57%|█████▋    | 25089/44301 [02:04<01:33, 206.05it/s]
n_step=158001/310107, ave loss=2.1285507324511674




Che barneded of lom the it beth wind ut jutared.You an the a Rno  herey ut plaverriim Ronging thit the weretsanspoull oupusf nittoo -
"This is megpron;  otes, Rred darilg! Preel, ir chumt kne froon theor parginger pow tey coort po dutpe a our

batch 37097/44301:  84%|████████▎ | 37078/44301 [03:03<00:33, 218.56it/s]
n_step=170001/310107, ave loss=2.086324718191424




"Durd to cawart thin chaiis Jumagasatead a clezy is Hat Id caticked, was thitparinbited bemed cermow dool; oornetnon hiis extemaid o't one zawne I'de, ofe's thow on a St stiring now sibe, to Led atse.  said his nevlighed - foung qull.
"Duld seater pate nat theel who.
"ENzrano sm agble the der yout ashs Harry thind somess fweans finof inle Hacd loaden. . be ot evit, ney ing at sul, tun ficloky fimor wumence Rindoomoretch, of a tugh. ..  It slonedtint, afbiit, elle fott to Eofes his of diny habre


batch 38097/44301:  86%|████████▌ | 38076/44301 [03:08<00:29, 211.64it/s]
n_step=171001/310107, ave loss=2.101656156277369



 Wert wers eeriding. orrm. Beacid, toustanciche, kidmiuve then liy nad an hy I Harry.
"Voudmry't ind's Hand Sxtsussin a Que panou thelet, Ave sabrely.
"Se fadund, semint af indidow,"
"Vering.  Harrid bar he goring see ceforar axch's an wabludn e

batch 5796/44301:  13%|█▎        | 5793/44301 [00:34<03:54, 164.30it/s]
n_step=183001/310107, ave loss=2.140773128309863



 .  Beaned steyo!"
"
"De d dne tat thisttt at bradakery a mundy. Dld of luver.  Fleshe sant thok powh eslecher had sloked.  "Mrsoing suths fore've tamonle.  Baglph harrs, wither."
Souzre pomen the of thintf.
Hair she ligkobon, is aty's de rawer dolle."
"I, geaphencindorers aplel he state te and croodernisss bugllomenterey!"
"Wasy all bely thas Harrin, thiis thed of thid buid hiw coundes mech on dirg beey enfinge int afosh. bo a Harry Gring ppveilpst and at ofe sacting, olply the tus..
Finly care


batch 6796/44301:  15%|█▌        | 6779/44301 [00:40<03:37, 172.21it/s]
n_step=184001/310107, ave loss=2.1705552586030663



  He sloslesst thay  ream on sur - agorg of the the fhe- chow osdesing ayto, reimry lall and the ceareare wasbppy-ella," his the a Mrssimeall He of saresssamell the,t you te thot and of.
"Letht. "On wat as My oresacurcing. He Gros hibed stiry weeme

batch 18796/44301:  42%|████▏     | 18788/44301 [01:50<02:20, 181.16it/s]
n_step=196001/310107, ave loss=2.114499991235356




A dutre, ont?"  Buce foliled, Harred Frongractred hughh mied-act oued wher?" Ad befplt. Comir thenly our, the tohs the Mve fondery, femided the sean, ing was ug, Geiclt pisterd wind Mneeore, dut," tea bing aleled H'R
Preakng Kruts craaking cumlal cretamey at drared thed thichis josed have you dartears scaten sotare sunce Qus, tho tile of heren' suld byecharinselpss tuttione ut?"
"He der,"ar wspavereed a the botils fore sal an face'lk the ghe gure! frat welly.
"phe depid a to cares raret chapimo


batch 19796/44301:  45%|████▍     | 19786/44301 [01:55<02:09, 189.74it/s]
n_step=197001/310107, ave loss=2.1099105262409736



  I Harrin fint grigre a Luleoled was betsne.. . whe, she Hare waid ry-out hand stemting how frogyius Cofte mory, of and gower wheer Qut he voussiond oir wastant amiing Harry dus drineadgh at brik hintin reicciagred - paks, and gwar ke skiing g

batch 31796/44301:  72%|███████▏  | 31786/44301 [03:00<01:03, 195.96it/s]
n_step=209001/310107, ave loss=2.0944999184888276



  "The date nets and ing at on neant, bundeven mis loverid but beineerou sham fous cedower nory!" salariman nadle'd larrut ercidextided that and the sarep if ders - hat u gretfre hus his sargrerwan dont on croffmewne sem-"s  at tuceby he he deily.
" "Oreyiowe Alss.. ."
"Thoun an Hirould Mm, whe santmet of tuining beave bent wildne therefant, faclbe, an Hirding whe perowont, a puth ockerine dphaspiled, Doint I thered. ."
"Ohis stirtugt - ander frowringecine -"vis.  "Bocked jul if ove Snaple caric


batch 32796/44301:  74%|███████▍  | 32788/44301 [03:05<01:02, 185.58it/s]
n_step=210001/310107, ave loss=2.0922798233201765



.."
"He sput bictle deor oufglees herys baifizerms kmong tut its dore beged?"
"Lou, Hernabl evout a the it.
"Low soyingh anny thingle higing his the und hout hit in tolet cuwte krstredn- sitear oung hions, ank for graiut puern, dy Hraveer tar 

batch 44301/44301: 100%|██████████| 44301/44301 [04:07<00:00, 178.94it/s]
starting epoch: 6 ...
batch 495/44301:   1%|          | 486/44301 [00:02<03:58, 183.41it/s]
n_step=222001/310107, ave loss=2.0704316963823284



"
Mindine spe moud un divly wat A Kand wore sworr squbde botald. Kryoun llutene stave.
"Hackeatleast to buther upy Loking owe.
"He Aven rome Mflare to tunis. Wind fo.  Hoth ca hat hery.  That dould sougters vimting houndelly  cacky thils the, oves thintoonedk batpapeid -  Down at the, Mthesk, they, oud, ynet's jutn hout, etheers.  oner sall cense seint.  "De fabeng ten a Youth Culr carosterradedeamacl evid wit said Soml fotunder you falld her.  Al and pinky, at of fosanssy heres Mater Bet lpoud.


batch 1495/44301:   3%|▎         | 1489/44301 [00:07<03:44, 190.37it/s]
n_step=223001/310107, ave loss=2.0989495156270226



  "Sthis whead cournon, ared in.  Theed mumn is!.
"veroyis sing hit hhomer ker uslous even caon, nover.
 bevere thiad that and in feruspar wid sat tore f

batch 13495/44301:  30%|███       | 13477/44301 [01:12<02:39, 192.72it/s]
n_step=235001/310107, ave loss=2.1258449023292667




"Wto he na lel, am and colt comlbech for Hirned a kel Mled yhu direar beaedasioreard babbeere Metin-tf rexfpaser.
		"Che Manly sant he ferlice I't - to theing reaps gar, as tharry agee romion-," MCly hiding, crourg stey sared eviny af-Pichs to o sang gapid: s his boied was susse troughized mounty wais youres whoug, in the Goronenine am sapy and thy Rom, hio tous herspiver they roming azarry ying them He al tot "Nogt it't thannculd and sirblen go ared scary.
"E. Hive tiled ce." Therlweable Low t


batch 14495/44301:  33%|███▎      | 14488/44301 [01:18<02:34, 192.93it/s]
n_step=236001/310107, ave loss=2.1222134593870403




Happe dir a eving Harry agche'tes.  You've bature broted dard Heling.  "COgURTh - He forricent ont ou'ple to moughiverry, lit theindine, il af Mr cull ongrted of ot peting "verred, Hedarcudse's Groys.
"On ette arry, win: VildoTLeveen," hid vo

batch 26495/44301:  60%|█████▉    | 26478/44301 [02:23<01:29, 198.46it/s]
n_step=248001/310107, ave loss=2.1013544785476124



  "Nook row wandtone sargring the dadleant, Sod the vore wase tullud uvimingl ank wize's thor a ritevesu keply filt.
Chen bad te oored ioncy orrng frosar.
"Snow Hournt, the eve Sels tin' Herve ."
"We Riirs, herung wouswe of Kroulle, afusce moikler, - "It lloud seach of grator it font - frowe alletined thackustiken hiy ucked whreer Chaire-d weree notht to gfeve moaving, Haed if bith, forizizRhs the.
"Then hutu at Mact osamach zarall't ruth at Nakpse finsed. Avinken.  "Dofered he ked? amry.  Io, r


batch 27495/44301:  62%|██████▏   | 27479/44301 [02:28<01:34, 178.20it/s]
n_step=249001/310107, ave loss=2.107913861315852



  Round!" d dirns or St milero core a tablin ain't. 
The rofurided, coid to wen; at tut hom. . . Son tin youin now, Hald?"
"He up of ave Harry irnas wery eve Harre haver an'ld Gid as cam toirwe then Rock. .
"I said his lak and you his pont Chim

batch 39495/44301:  89%|████████▉ | 39473/44301 [03:33<00:25, 187.73it/s]
n_step=261001/310107, ave loss=2.060471784948284



 .  what Dary fopflloude wheid pountor and ask Mro, logtarcare qot woll syiugh sah of malemin, ane swere ham shey foold, ke whoth thou cinceden a tatevrof ouiingsh , ranty whaadd boighe soy. .  It winnugslis. ," oneede and to a have salidkes. .
Af vore waspen Harry Baaked sto, the spubled, the ne, pe.  Yey floratsinm. 
She lot it in atarmach.
"Whe hey, menssar hawer saofing..
"You his ildound milgar into, the and d hou gromer baty jum as fllocd veeiver ofle froore boin, nowen sold brewaint. . .



batch 40495/44301:  91%|█████████▏| 40489/44301 [03:38<00:19, 193.03it/s]
n_step=262001/310107, ave loss=2.0651978584034256



  The I was fea from ane renste to por, Bowe awrin' scemgrmed und pabfe to thin he he chen.  Thiod y . .
Ohe thisky rouver.  He for were in Slilvey airted natithed whe Harry hes vougtid....ht.  Sore Crowing sheid shag hin to haod of. . . . . Me

batch 8194/44301:  18%|█▊        | 8178/44301 [00:47<03:18, 181.65it/s]
n_step=274001/310107, ave loss=2.107570363907875



  He Gor.  Fray ach't merithed Babfen at onkedol.

We and 'lam, Hou be, wome is ack there, jumst hand dasswery theaveind, a bacrien ing a to Harmiles .  "I croikis. Weal that gand celled somly. 
"The sadd singsh thoth elly Weas prowapiing mo theis oflionts hised nouldeared gas Wuvites lamn mion,.
Dunvere think the un the Worid Ise yinen broblo..  "I'ls, tand pented eckintme is dartis and'and apprat the scophed werisaranding it, Wats feas ohtnoo thoont havery caistealem hiard nall," sis hiy thut 


batch 9194/44301:  21%|██        | 9182/44301 [00:52<03:02, 192.65it/s]
n_step=275001/310107, ave loss=2.079378427546164



. The milf would as an thath sine feartemor. Oth do id no youll," taid o tooked,'s rap up vomer sow's bacly to terinorsteived nutht whas I on, chars wislee kiing opt-the relarde that as of everer becunt mer, Harry therce Unny Matey hat on he what he

batch 21194/44301:  48%|████▊     | 21182/44301 [01:57<02:01, 190.53it/s]
n_step=287001/310107, ave loss=2.075115685266852




"Youdj perkeer, nost't ard wan's that that as it.
"Mw'le tonjyned cooring ageik mident; "You chakey, in tow!"  he laculr. Harmidfyaele.  He kent thoug!"
"VFi!RA
	ANn Herrick. . . . lercint irg.  If Sough evering shrok budry wigond shurd," meriled brofphe Ad hed owen sa shy frherimss.  "IH I a rotrjs, hou's ralasling hos. ..  I's Srofteby'lyne choow.  "Inim I cowing hom her. . The, she Pibyout," sim "Ot Demby, the Harry a thin ney mor nintou pell frou dashong otan..  Bhith the ke leaced leall hi


batch 22194/44301:  50%|█████     | 22192/44301 [02:02<01:56, 189.84it/s]
n_step=288001/310107, ave loss=2.052984238226971



"
Self opinsgad, houss, nowse as ily of sfulelw toldacon plaar ymhit ah thearry as hiuld mol," saad stirgoor womle. whe derlutser of Hedry stees,.
"I' swen laid bles his the all wis to windos niindishiron the at ladechesar the pol shite.  I the 

batch 34194/44301:  77%|███████▋  | 34189/44301 [03:07<00:56, 179.43it/s]
n_step=300001/310107, ave loss=2.0632120159160667




Mr!" you to. Crof.
"Snarg," sam he lee mmind. .  He hres wavarr..
"Weagh Peridgh he walamien hin- chilsble mont Domple," said Dchatd sered d tow in at to him-ol t heariedoned had s meme that towly.  "Ittheng, sexiindionacketred thind han chor and nat bied mie trousorin ourn't," say?Hoed therm.. . "Winedry Crob ham hiis an thahiod thoud at toy ur in the ory abdicaght Herm thy's"alloun restring fnow?  fro-en, Priskabd on of - bleted.
"Y ind facesloomosh, weriredte ofleins," Mring ruthandone sill 


batch 35194/44301:  79%|███████▉  | 35184/44301 [03:12<00:47, 190.89it/s]
n_step=301001/310107, ave loss=2.035464355405961



 "
"He yow womrone.  Hudrende at to ent chems was ky to ynhe faby excily dan Kerrimgry nintou dor ever tot and laiarging they te.  Thare of to her wishasly, to suth doutwer glant um ane dough scemandiing Prus pankingore, wirgover froousss loucc

In [19]:
synhthetizer = Synhthetizer(rnn, onehot_encoder)
sequence= synhthetizer(ts=2000, init_idx=hpdata.encode(np.array(["."]))[0])
print("".join(hpdata.decode(sequence.flatten())))

  I's it the king daok grogle ka, and he Crewint tliid ofling exirg. .  Hor, cougepid biftafitren therdent deasorg mime tlee, is mhe one, whe shus, coundn.  Thoustlawer adky y't and ports ecwarverearce you bey, "Doumng ofled at ofext bach siarg," s Ake doter the's theted ontap thout ce, mor foly!"
Whoun fone sanbh wase sirlid,"Whe the.   Apte what hey  theacer of to nquerkem ed giney Cedleakicky sigthere milpt wheat tun poith keng whis the to ound herfonied, Mros fimecclacker that tacked," the hny Sondouguth path the wereng and beed vodly, kiken.  ae s beivs lough the at mothen ing ste bark retheing whe tiosinns howk was what Gelbedelly, toon yion."
"
TRNet way were wounch ing Cilgor to. "Roodering bembled poring ut the ever him and , mon he, sun Will gin untte isder beaccrivong ank on this weme nemste he dounghture widreded tile, at heras on af lald. .  He mopergyfo
"
"Be.  He propell celb ke him hou romel on Eat ary might yong to the tork, has "Jim."
"Went wall foned,  hind Harry My 

In [None]:
lol = np.array([[1,4,40],[-10,2,0]])
np.maximum(np.minimum(lol, 5), -5)