# RNNs

## Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import pickle
from copy import deepcopy
from math import sqrt, ceil
import datetime
import sys
from itertools import product
import pandas as pd
import json
import hyperopt

from data_utils import load_cfar10_batch, load_label_names
from losses import CategoricalHingeLoss, CategoricalCrossEntropyLoss
from activations import LinearActivation, ReLUActivation, SoftmaxActivation, Activation
from initializers import NormalInitializer, XavierInitializer
from layers import Dense, BatchNormalization
from regularizers import L2Regularizer
from models import Model
from metrics import AccuracyMetrics
from optimizers import SGDOptimizer
from lr_schedules import LRConstantSchedule, LRExponentialDecaySchedule, LRCyclingSchedule
from grad_check import eval_numerical_gradient, eval_numerical_gradient_array, numerical_gradient_check_model

## Data

In [2]:
class HPData():
    def __init__(self, path_to_file):
        """ Init.
        
        Parameters
        ----------
        path_to_file : str
            Path to text file.
            
        Notes
        -----
        None
        """
        # read text file
        with open(path_to_file, 'r') as f:
            self.book_str = f.read()
        
        # str to chars
        book_data = list(self.book_str)
        # chars to unique chars
        book_chars = list(set(book_data))
        
        # all chars as np
        self.book_data = np.array(book_data)
        # uniqe chars as np
        self.book_chars = np.array(book_chars)
    
    def get_encoder(self,):
        """ Returns encoder, i.e.: unique chars.

        Parameters
        ----------
        None

        Returns
        -------
        book_chars : np.ndarray of shape (n_unique_chars, )
            The encoder as np.

        Notes
        -----
        None
        """
        return self.book_chars
    
    def char_to_idx(self, char):
        """ Convert a char to an index from the encoder np array.

        Parameters
        ----------
        char : str
            A char.

        Returns
        -------
        np.ndarray
            The index repre of char, of shape (,).

        Notes
        -----
        None
        """
        return np.argwhere(char == self.book_chars).flatten()[0]
    
    def idx_to_char(self, idx):
        """ Convert an index to char in the encoder np array.

        Parameters
        ----------
        idx : int
            The index repr of a char.

        Returns
        -------
        str
            The char.

        Notes
        -----
        None
        """
        return self.book_chars[idx]
    
    def encode(self, decoding):
        """ Encode a sequence of chars into a sequence of indices based on the encoder.

        Parameters
        ----------
        chars : np.ndarray
            The sequence of chars, of shape (n_chars,)

        Returns
        -------
        encoding : np.ndarray
            The sequence of index representation of the chars, of shape (n_chars,)

        Notes
        -----
        None
        """
        encoding = []
        
        for d in decoding:
            encoding.append(self.char_to_idx(d))
            
        encoding = np.array(encoding)
        
        return encoding
    
    def decode(self, encoding):
        """ Decode a sequence of indices into a sequence of chars based on the encoder.

        Parameters
        ----------
        encoding : np.ndarray
            The sequence of index representation of the chars, of shape (n_chars,)

        Returns
        -------
        decoding : np.ndarray
            The sequence of chars, of shape (n_chars,)

        Notes
        -----
        None
        """
        decoding = []
        
        for e in encoding:
            decoding.append(self.idx_to_char(e))
            
        decoding = np.array(decoding)
        
        return decoding

In [3]:
class OneHotEncoder():
    def __init__(self, length):
        # length of one-hot encoding
        self.length = length
    
    def __call__(self, x, encode=True):
        """ Encode or decode a sequence x.

        Parameters
        ----------
        x : np.ndarray
            The sequence of index representation of chars, of shape (n_chars,)

        Returns
        -------
        e or d: np.ndarray
            The sequence of one-hot encoded vectors of chars, of shape (n_chars, length)

        Notes
        -----
        None
        """
        if encode:
            e = np.zeros((x.shape[0], self.length))
            e[np.arange(x.shape[0]), x] = 1
            return e
        else:
            d = np.argwhere(one_hot_encoding == 1)[:,1]
            return d

## Read data

Read, encode and decode data.

In [4]:
path_to_file = "data/hp/goblet_book.txt"
hpdata = HPData(path_to_file=path_to_file)
print(hpdata.get_encoder().shape)
x = hpdata.book_data[:200]
print(x)
encoding = hpdata.encode(x)
print(hpdata.get_encoder().shape)
print(encoding)
decoding = hpdata.decode(encoding)
print(decoding)

np.testing.assert_array_equal(decoding, x)

(80,)
['H' 'A' 'R' 'R' 'Y' ' ' 'P' 'O' 'T' 'T' 'E' 'R' ' ' 'A' 'N' 'D' ' ' 'T'
 'H' 'E' ' ' 'G' 'O' 'B' 'L' 'E' 'T' ' ' 'O' 'F' ' ' 'F' 'I' 'R' 'E' '\n'
 '\n' 'C' 'H' 'A' 'P' 'T' 'E' 'R' ' ' 'O' 'N' 'E' ' ' '-' ' ' 'T' 'H' 'E'
 ' ' 'R' 'I' 'D' 'D' 'L' 'E' ' ' 'H' 'O' 'U' 'S' 'E' '\n' '\n' '\t' 'T'
 'h' 'e' ' ' 'v' 'i' 'l' 'l' 'a' 'g' 'e' 'r' 's' ' ' 'o' 'f' ' ' 'L' 'i'
 't' 't' 'l' 'e' ' ' 'H' 'a' 'n' 'g' 'l' 'e' 'r' 'o' 'n' ' ' 's' 't' 'i'
 'l' 'l' ' ' 'c' 'a' 'l' 'l' 'e' 'd' ' ' 'i' 't' ' ' '"' 't' 'h' 'e' ' '
 'R' 'i' 'd' 'd' 'l' 'e' ' ' 'H' 'o' 'u' 's' 'e' ',' '"' ' ' 'e' 'v' 'e'
 'n' ' ' 't' 'h' 'o' 'u' 'g' 'h' ' ' 'i' 't' ' ' 'h' 'a' 'd' ' ' 'b' 'e'
 'e' 'n' ' ' 'm' 'a' 'n' 'y' ' ' 'y' 'e' 'a' 'r' 's' ' ' 's' 'i' 'n' 'c'
 'e' ' ' 't' 'h' 'e' ' ' 'R' 'i' 'd' 'd' 'l' 'e' ' ' 'f' 'a' 'm' 'i' 'l'
 'y' ' ' 'h']
(80,)
[72 17 13 13 61 64 44 32 56 56 76 13 64 17  4 57 64 56 72 76 64 54 32 58
 18 76 56 64 32 50 64 50 16 13 76 62 62 31 72 17 44 56 76 13 64 32  4 76
 64 73 64 56 72 76 64 13

## One-ho encode and decode data

In [5]:
onehot_encoder = OneHotEncoder(length=hpdata.get_encoder().size)
one_hot_encoding = onehot_encoder(encoding, encode=True)
print(one_hot_encoding.shape)
one_hot_decoding = onehot_encoder(one_hot_encoding, encode=False)
print(one_hot_decoding.shape)

np.testing.assert_array_equal(one_hot_decoding, encoding)
print(one_hot_decoding[7])
print(one_hot_encoding[7])

print(one_hot_decoding[37])
print(one_hot_encoding[37])

(200, 80)
(200,)
32
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
31
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]


In [6]:
x = np.array([".", "a"])
print(x)
encoding = hpdata.encode(x)
print(hpdata.get_encoder().shape)
print(encoding)
decoding = hpdata.decode(encoding)
print(decoding)

np.testing.assert_array_equal(decoding, x)

one_hot_encoding = onehot_encoder(encoding, encode=True)
print(one_hot_encoding)
print(one_hot_encoding.shape)
np.argwhere(hpdata.get_encoder() == "a")

['.' 'a']
(80,)
[51  1]
['.' 'a']
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]]
(2, 80)


array([[1]])

## RNN and helpers

In [7]:
class TanhActivation(Activation):
    """ Tanh activation.
    Can be followed by virtually anything.
    Inherits everything from class Activation.

    Attributes
    ----------
    cache : dict
        Run-time cache of attibutes such as gradients.

    Methods
    -------
    __init__()
        Constuctor.
    forward(z)
        Activates the linear transformation of the layer, and
        forward propagates activation. Activation is tanh.
    backward(g)
        Backpropagates incoming gradient into the layer, based on the tanh activation.
    __repr__()
        Returns the string representation of class.
    """

    def __init__(self, ):
        """ Constructor.

        Parameters
        ----------
        None

        Notes
        -----
        None
        """
        super().__init__()

    def forward(self, z):
        """ Activates the linear transformation of the layer, and
        forward propagates activation. Activation is tanh.

        Parameters
        ----------
        z : numpy.ndarray
            Linear transformation of layer.
            Shape is unknown here, but will usually be
            (batch size, this layer output dim = next layer input dim)

        Returns
        -------
        numpy.ndarray
            ReLU activation.

        Notes
        -----
        None
        """
        a = np.tanh(z)
        self.cache["a"] = deepcopy(a)
        return a

    def backward(self, g_in):
        """ Backpropagates incoming gradient into the layer, based on the tanh activation.

        Parameters
        ----------
        g_in : numpy.ndarray
            Incoming gradient to the activation.
            Shape is unknown here, but will usually be
            (batch size, this layer output dim = next layer input dim)

        Returns
        -------
        numpy.ndarray
            Gradient of activation.
            Shape is unknown here, but will usually be
            (batch size, this layer output dim = next layer input dim)

        Notes
        -----
        None
        """
        a = self.cache["a"]
        g_out = (1 - np.power(a, 2)) * g_in
        return g_out

    def __repr__(self):
        """ Returns the string representation of class.

        Parameters
        ----------
        None

        Returns
        -------
        repr_str : str
            The string representation of the class.

        Notes
        -----
        None
        """
        repr_str = "tanh"
        return repr_str

In [8]:
def test_tanh_activation():
    
    tanh_activation = TanhActivation()
    np.random.seed(231)
    x = np.random.randn(5, 10)
    g_in = np.random.randn(*x.shape)
    fx = lambda x: TanhActivation.forward(tanh_activation, x)
    g_out_num = eval_numerical_gradient_array(fx, x, g_in)
    g_out = tanh_activation.backward(g_in)
    np.testing.assert_array_almost_equal(g_out, g_out_num, decimal=6)

    print("test_relu_activation passed")
    
test_tanh_activation()

test_relu_activation passed


In [118]:
class RNN():
    """ Many-to-many."""
    def __init__(self, in_dim, out_dim, hidden_dim, 
                 kernel_h_initializer, bias_h_initializer,
                 kernel_o_initializer, bias_o_initializer,
                 kernel_regularizer, 
                 activation_h, activation_o):
        
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hidden_dim = hidden_dim

        self.kernel_h_initializer = kernel_h_initializer
        self.bias_h_initializer = bias_h_initializer
        self.kernel_o_initializer = kernel_o_initializer
        self.bias_o_initializer = bias_o_initializer

        self.u = kernel_h_initializer.initialize(size=(in_dim, hidden_dim))
        self.w = kernel_h_initializer.initialize(size=(hidden_dim, hidden_dim))
        self.b = bias_h_initializer.initialize(size=(1, hidden_dim))
        
        self.v = kernel_o_initializer.initialize(size=(hidden_dim, out_dim))
        self.c = bias_o_initializer.initialize(size=(1, out_dim))
        
        self.h_shape = (1, hidden_dim)
        
        self.kernel_regularizer = kernel_regularizer

        self.activation_h = activation_h
        self.activation_o = activation_o

        self.cache = {}
        self.grads = {}

        self.has_learnable_params = True
    
    def forward(self, x, **params):
        
        self.cache["x"] = deepcopy(x)
        h = np.zeros(self.h_shape)
        h_concat = np.zeros((x.shape[0], h.shape[1]))
        
        assert h.shape == (1, self.hidden_dim)
        
        for idx, x_ in enumerate(x):
            x_ = x_.reshape(1,-1)
            assert x_.shape == (1,self.in_dim)
            a = np.dot(x_, self.u) + np.dot(h, self.w) + self.b
            assert a.shape == (1, self.hidden_dim)
            h = self.activation_h.forward(a)
            h_concat[idx] = h
            assert h.shape == (1, self.hidden_dim)
        
        self.cache["h_concat"] = deepcopy(h_concat)
        assert h_concat.shape == (x.shape[0], h.shape[1])
        o = np.dot(h_concat, self.v) + self.c
        assert o.shape == (x.shape[0], self.out_dim), f"o.shape={o.shape}"
        p = self.activation_o.forward(o)
        print(p.shape)
        assert p.shape == (x.shape[0], self.out_dim)
        return p
    
    def backward(self, g_in, **params):
        # x.shape = (x.shape[0], in_dim)
        x = deepcopy(self.cache["x"])
        # h_concat.shape = (x.shape[0], hidden_dim)
        h_concat = deepcopy(self.cache["h_concat"])
        
        # g_in.shape = (batch_size, )
        assert g_in.shape == (x.shape[0],)
        # g_a_o.shape = (batch_size, out_dim)
        g_a_o = self.activation_o.backward(g_in)
        assert g_a_o.shape == (x.shape[0], self.out_dim)
        
        # g_h_concat.shape = (batch_size, hidden_dim)
        g_h_concat = np.zeros((x.shape[0], self.hidden_dim))
        # v.shape = (hidden_dim, out_dim)
        # (1,hidden_dim) = (1,out_dim) * (hidden_dim, out_dim).T
        g_h_concat[-1] = np.dot(g_a_o[-1].reshape(1,-1), self.v.T)
        assert np.dot(g_a_o[-1].reshape(1,-1), self.v.T).shape == (1,self.hidden_dim)
        
        g_a_h = self.activation_h.backward(h_concat)
        assert g_a_h.shape == (x.shape[0], self.hidden_dim)
        
        g_a = np.zeros((x.shape[0], self.hidden_dim))
        # (1, hidden_dim) = (1, hidden_dim) * (1, hidden_dim)
        g_a[-1] = np.multiply(g_h_concat[-1].reshape(1,-1), g_a_h[-1].reshape(1,-1))
        assert np.multiply(g_h_concat[-1].reshape(1,-1), g_a_h[-1].reshape(1,-1)).shape \
            == (1, self.hidden_dim)
        
        for t in reversed(range(x.shape[0]-1)):
            #grad_h.append(grad_o[t][np.newaxis, :] @ rnn.v + grad_a[-1] @ rnn.w)
            # (1,hidden_dim) = (1,out_dim) * (hidden_dim, out_dim).T
            # \+ (1,hidden_dim) * (hidden_dim, hidden_dim), maybe w.T?
            g_h_concat[t] = np.dot(g_a_o[t].reshape(1,-1), self.v.T) \
                + np.dot(g_a[t+1].reshape(1,-1), self.w)
            g_a[t] = np.multiply(g_h_concat[t].reshape(1,-1), g_a_h[-1].reshape(1,-1))
        
        #print(g_h_concat)
        assert g_h_concat.shape == (x.shape[0], self.hidden_dim)
        assert g_a.shape == (x.shape[0], self.hidden_dim)
        
        # (hidden_dim, out_dim) = (x.shape[0], hidden_dim).T * (x.shape[0], out_dim)
        g_v = np.dot(h_concat.T, g_a_o)
        assert g_v.shape == (self.hidden_dim, self.out_dim)
        self.grads["dv"] = deepcopy(g_v)
        
        # Auxiliar h matrix that includes h_prev
        h_aux = np.zeros(h_concat.shape)
        h_init = np.zeros((1, self.hidden_dim))
        h_aux[0, :] = h_init
        h_aux[1:, :] = h_concat[0:-1, :]
        assert h_aux.shape == (x.shape[0], self.hidden_dim)
        
        # (hidden_dim, hidden_dim) = (x.shape[0], hidden_dim).T * (x.shape[0], hidden_dim)
        g_w = np.dot(h_aux.T, g_a)
        assert g_w.shape == (self.hidden_dim, self.hidden_dim)
        self.grads["dw"] = deepcopy(g_w)
        
        # (in_dim, hidden_dim) = (x.shape[0], in_dim).T * (x.shape[0], hidden_dim)
        g_u = np.dot(x.T, g_a)
        assert g_u.shape == (self.in_dim, self.hidden_dim)
        self.grads["du"] = deepcopy(g_u)
        
        # (1, hidden_dim) = sum((x.shape[0], self.hidden_dim), axis=0)
        g_b = np.sum(g_a, axis=0).reshape(1,-1)
        assert g_b.shape == (1, self.hidden_dim), f"g_b.shape={g_b.shape}"
        self.grads["db"] = deepcopy(g_b)
        
        # (1, out_dim) = sum((x.shape[0], self.out_dim), axis=0)
        g_c = np.sum(g_a_o, axis=0).reshape(1,-1)
        assert g_c.shape == (1, self.out_dim)
        self.grads["dc"] = deepcopy(g_c)
        
    def if_has_learnable_params(self, ):    
        return self.has_learnable_params
    
    def get_u(self, ):
        return deepcopy(self.u)

    def get_w(self, ):
        return deepcopy(self.w)
    
    def get_b(self, ):
        return deepcopy(self.b)
    
    def get_v(self, ):
        return deepcopy(self.v)
    
    def get_c(self, ):
        return deepcopy(self.c)

    def get_learnable_params(self):
        return {
            "u": self.get_u(), "w": self.get_w(), "b": self.get_b(), 
            "v": self.get_v(), "c": self.get_c()
        }
    
    
    def set_u(self, u):
        self.u = deepcopy(u)

    def set_w(self, w):
        self.w = deepcopy(w)
    
    def set_b(self, b):
        self.b = deepcopy(b)
    
    def set_v(self, v):
        self.v = deepcopy(v)
    
    def set_c(self, c):
        self.c = deepcopy(c)

    def set_learnable_params(self, **learnable_params):
        self.set_u(learnable_params["u"])
        self.set_w(learnable_params["w"])
        self.set_b(learnable_params["b"])
        self.set_v(learnable_params["v"])
        self.set_c(learnable_params["c"])

    def get_du(self, ):
        if "du" in self.grads.keys():
            du = self.grads["du"]
            ret = deepcopy(du)
        else:
            ret = None

        return ret
    
    def get_dw(self, ):
        if "dw" in self.grads.keys():
            dw = self.grads["dw"]
            ret = deepcopy(dw)
        else:
            ret = None

        return ret

    def get_db(self, ):
        if "db" in self.grads.keys():
            db = self.grads["db"]
            ret = deepcopy(db)
        else:
            ret = None

        return ret
    
    def get_dv(self, ):
        if "dv" in self.grads.keys():
            dv = self.grads["dv"]
            ret = deepcopy(dv)
        else:
            ret = None

        return ret
    
    def get_dc(self, ):
        if "dc" in self.grads.keys():
            dc = self.grads["dc"]
            ret = deepcopy(dc)
        else:
            ret = None

        return ret

    def get_learnable_params_grads(self):
        return {
            "du": self.get_du(), "dw": self.get_dw(), "db": self.get_db(),
            "dv": self.get_dv(), "dc": self.get_dc()
        }
        

        
    
class Synhthetizer():
    def __init__(self, rnn, onehot_encoder):
        self.rnn = rnn
        self.onehot_encoder = onehot_encoder
        self.h_concat = np.zeros(rnn.h_shape)
    
    def sample(self, lenght, p):
        # select character from softmax weighted dist over all chars
        print(p.flatten().shape)
        
        return np.random.choice(range(lenght), size=1, replace=True, p=p.flatten())
        
    
    def __call__(self, ts, init_idx):
        
        x = self.onehot_encoder(np.array([init_idx]).T, encode=True)
        #print(x.shape)
        assert x.shape == (1, self.onehot_encoder.length)
        sequence = []
        
        for t in range(ts):
            p = rnn.forward(x)
            x_idx = self.sample(lenght=x.shape[1], p=p)
            sequence.append(x_idx)
            x = self.onehot_encoder(np.array([x_idx]).T, encode=True)
    
        return np.array(sequence)

In [134]:
init_params = {"coeff": 1.0, "mean": 0.0, "std": 0.01}
kernel_h_initializer = NormalInitializer(seed=None, **init_params)
bias_h_initializer = NormalInitializer(seed=None, **init_params)
kernel_o_initializer = NormalInitializer(seed=None, **init_params)
bias_o_initializer = NormalInitializer(seed=None, **init_params)
kernel_regularizer = None

num_inputs = 1000
size = (num_inputs,hpdata.get_encoder().size)
x = np.random.normal(loc=0, scale=1, size=size)
y = np.random.randint(hpdata.get_encoder().size, size=num_inputs)

rnn = RNN(in_dim=hpdata.get_encoder().size, out_dim=hpdata.get_encoder().size, hidden_dim=80, 
          kernel_h_initializer=kernel_h_initializer, 
          bias_h_initializer=bias_h_initializer, 
          kernel_o_initializer=kernel_o_initializer, 
          bias_o_initializer=bias_o_initializer, 
          kernel_regularizer=kernel_regularizer, 
          activation_h=TanhActivation(),
          activation_o=SoftmaxActivation())

In [None]:
loss = CategoricalCrossEntropyLoss()
lr_initial=0.1
#optimizer = SGDOptimizer(lr_schedule=LRConstantSchedule(lr_initial))

n_epochs = 10000

for epoch in range(n_epochs):

    scores = rnn.forward(x)
    #print(a.shape)
    data_loss = loss.compute_loss(scores, y)
    print(f"epoch={epoch}, loss={data_loss}")
    params_train = {"mode": "train", "seed": None}
    rnn.backward(loss.grad(), **params_train)

    trainable_params=rnn.get_learnable_params()
    grads=rnn.get_learnable_params_grads()

    for k,v in trainable_params.items():
        trainable_params[k] = deepcopy(v - lr_initial * grads["d"+k])

    rnn.set_learnable_params(**trainable_params)

(1000, 80)
epoch=0, loss=4.382484985972991
(1000, 80)
epoch=1, loss=4.3823146384165135
(1000, 80)
epoch=2, loss=4.382144567517002
(1000, 80)
epoch=3, loss=4.381974772523244
(1000, 80)
epoch=4, loss=4.38180525268554
(1000, 80)
epoch=5, loss=4.381636007255703
(1000, 80)
epoch=6, loss=4.381467035487043
(1000, 80)
epoch=7, loss=4.381298336634369
(1000, 80)
epoch=8, loss=4.3811299099539776
(1000, 80)
epoch=9, loss=4.380961754703639
(1000, 80)
epoch=10, loss=4.380793870142604
(1000, 80)
epoch=11, loss=4.38062625553158
(1000, 80)
epoch=12, loss=4.3804589101327425
(1000, 80)
epoch=13, loss=4.380291833209711
(1000, 80)
epoch=14, loss=4.380125024027549
(1000, 80)
epoch=15, loss=4.379958481852759
(1000, 80)
epoch=16, loss=4.379792205953273
(1000, 80)
epoch=17, loss=4.379626195598445
(1000, 80)
epoch=18, loss=4.3794604500590415
(1000, 80)
epoch=19, loss=4.379294968607241
(1000, 80)
epoch=20, loss=4.379129750516622
(1000, 80)
epoch=21, loss=4.3789647950621555
(1000, 80)
epoch=22, loss=4.37880010152

(1000, 80)
epoch=189, loss=4.354437452117033
(1000, 80)
epoch=190, loss=4.354307375221817
(1000, 80)
epoch=191, loss=4.3541774525106725
(1000, 80)
epoch=192, loss=4.354047683402183
(1000, 80)
epoch=193, loss=4.353918067315246
(1000, 80)
epoch=194, loss=4.3537886036690585
(1000, 80)
epoch=195, loss=4.353659291883127
(1000, 80)
epoch=196, loss=4.353530131377243
(1000, 80)
epoch=197, loss=4.353401121571492
(1000, 80)
epoch=198, loss=4.353272261886238
(1000, 80)
epoch=199, loss=4.353143551742117
(1000, 80)
epoch=200, loss=4.35301499056004
(1000, 80)
epoch=201, loss=4.352886577761177
(1000, 80)
epoch=202, loss=4.352758312766958
(1000, 80)
epoch=203, loss=4.352630194999059
(1000, 80)
epoch=204, loss=4.352502223879404
(1000, 80)
epoch=205, loss=4.352374398830154
(1000, 80)
epoch=206, loss=4.3522467192737
(1000, 80)
epoch=207, loss=4.352119184632662
(1000, 80)
epoch=208, loss=4.351991794329879
(1000, 80)
epoch=209, loss=4.3518645477883995
(1000, 80)
epoch=210, loss=4.351737444431483
(1000, 80)

(1000, 80)
epoch=371, loss=4.332732150538105
(1000, 80)
epoch=372, loss=4.332620580165551
(1000, 80)
epoch=373, loss=4.332509056751928
(1000, 80)
epoch=374, loss=4.332397579659083
(1000, 80)
epoch=375, loss=4.332286148247803
(1000, 80)
epoch=376, loss=4.332174761877802
(1000, 80)
epoch=377, loss=4.33206341990771
(1000, 80)
epoch=378, loss=4.331952121695071
(1000, 80)
epoch=379, loss=4.33184086659632
(1000, 80)
epoch=380, loss=4.331729653966782
(1000, 80)
epoch=381, loss=4.331618483160661
(1000, 80)
epoch=382, loss=4.331507353531021
(1000, 80)
epoch=383, loss=4.331396264429787
(1000, 80)
epoch=384, loss=4.331285215207728
(1000, 80)
epoch=385, loss=4.331174205214451
(1000, 80)
epoch=386, loss=4.331063233798379
(1000, 80)
epoch=387, loss=4.330952300306756
(1000, 80)
epoch=388, loss=4.330841404085624
(1000, 80)
epoch=389, loss=4.33073054447982
(1000, 80)
epoch=390, loss=4.330619720832959
(1000, 80)
epoch=391, loss=4.33050893248743
(1000, 80)
epoch=392, loss=4.330398178784379
(1000, 80)
epo

(1000, 80)
epoch=558, loss=4.311927023788615
(1000, 80)
epoch=559, loss=4.31181139374317
(1000, 80)
epoch=560, loss=4.311695659018611
(1000, 80)
epoch=561, loss=4.311579818533352
(1000, 80)
epoch=562, loss=4.311463871201621
(1000, 80)
epoch=563, loss=4.311347815933442
(1000, 80)
epoch=564, loss=4.3112316516346
(1000, 80)
epoch=565, loss=4.311115377206627
(1000, 80)
epoch=566, loss=4.310998991546767
(1000, 80)
epoch=567, loss=4.3108824935479495
(1000, 80)
epoch=568, loss=4.310765882098774
(1000, 80)
epoch=569, loss=4.31064915608347
(1000, 80)
epoch=570, loss=4.31053231438188
(1000, 80)
epoch=571, loss=4.310415355869425
(1000, 80)
epoch=572, loss=4.310298279417089
(1000, 80)
epoch=573, loss=4.310181083891383
(1000, 80)
epoch=574, loss=4.3100637681543175
(1000, 80)
epoch=575, loss=4.309946331063384
(1000, 80)
epoch=576, loss=4.309828771471519
(1000, 80)
epoch=577, loss=4.309711088227081
(1000, 80)
epoch=578, loss=4.309593280173827
(1000, 80)
epoch=579, loss=4.309475346150873
(1000, 80)
ep

(1000, 80)
epoch=741, loss=4.287701460369708
(1000, 80)
epoch=742, loss=4.287543150231789
(1000, 80)
epoch=743, loss=4.287384438299043
(1000, 80)
epoch=744, loss=4.287225322231941
(1000, 80)
epoch=745, loss=4.287065799681655
(1000, 80)
epoch=746, loss=4.286905868290048
(1000, 80)
epoch=747, loss=4.286745525689664
(1000, 80)
epoch=748, loss=4.286584769503722
(1000, 80)
epoch=749, loss=4.286423597346107
(1000, 80)
epoch=750, loss=4.2862620068213575
(1000, 80)
epoch=751, loss=4.286099995524664
(1000, 80)
epoch=752, loss=4.285937561041857
(1000, 80)
epoch=753, loss=4.285774700949412
(1000, 80)
epoch=754, loss=4.2856114128144265
(1000, 80)
epoch=755, loss=4.2854476941946285
(1000, 80)
epoch=756, loss=4.285283542638374
(1000, 80)
epoch=757, loss=4.285118955684626
(1000, 80)
epoch=758, loss=4.284953930862979
(1000, 80)
epoch=759, loss=4.284788465693633
(1000, 80)
epoch=760, loss=4.284622557687402
(1000, 80)
epoch=761, loss=4.2844562043457115
(1000, 80)
epoch=762, loss=4.284289403160603
(1000,

(1000, 80)
epoch=929, loss=4.247879455293502
(1000, 80)
epoch=930, loss=4.247595168246818
(1000, 80)
epoch=931, loss=4.247309901078023
(1000, 80)
epoch=932, loss=4.247023650273846
(1000, 80)
epoch=933, loss=4.246736412320525
(1000, 80)
epoch=934, loss=4.2464481837038575
(1000, 80)
epoch=935, loss=4.246158960909268
(1000, 80)
epoch=936, loss=4.2458687404218525
(1000, 80)
epoch=937, loss=4.245577518726447
(1000, 80)
epoch=938, loss=4.245285292307668
(1000, 80)
epoch=939, loss=4.2449920576499895
(1000, 80)
epoch=940, loss=4.244697811237777
(1000, 80)
epoch=941, loss=4.244402549555352
(1000, 80)
epoch=942, loss=4.244106269087047
(1000, 80)
epoch=943, loss=4.243808966317262
(1000, 80)
epoch=944, loss=4.243510637730507
(1000, 80)
epoch=945, loss=4.243211279811467
(1000, 80)
epoch=946, loss=4.242910889045052
(1000, 80)
epoch=947, loss=4.242609461916447
(1000, 80)
epoch=948, loss=4.242306994911167
(1000, 80)
epoch=949, loss=4.242003484515111
(1000, 80)
epoch=950, loss=4.241698927214613
(1000, 

In [12]:
synhthetizer = Synhthetizer(rnn, onehot_encoder)
sequence= synhthetizer(ts=100, init_idx=1)
print("".join(hpdata.decode(sequence.flatten())))

(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80)
(80,)
(1, 80