In [2]:
import string
import numpy as np

In [3]:
def one_hot(n_classes: int, idx: int) -> np.ndarray:
    embedding = np.zeros(n_classes)
    embedding[idx] = 1.0
    return embedding

def word2seq(word: str, alphabet: list) -> np.ndarray:
    return np.array([one_hot(len(alphabet), alphabet.index(c)) for c in word])

def seq2word(seq: np.ndarray, alphabet: list) -> str:
    return ''.join([alphabet[np.argmax(x)] for x in seq])

In [20]:
alphabet = ' .,:;\"\'' + string.ascii_letters
n_symbols = len(alphabet)
print(n_symbols, alphabet)

59  .,:;"'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ


In [5]:
def softmax(x: np.ndarray) -> np.ndarray:
    y = np.exp(x)
    return y / np.sum(y)

In [6]:
def calculate_attention(V: np.ndarray, x: np.ndarray) -> np.ndarray:
    if len(V) == 0:
        return np.zeros(x.shape)

    scores = softmax(V @ x)
    values = scores.reshape((scores.size, 1)) * V
    return np.sum(values, axis=0)

In [26]:
class RNN:
    def __init__(self, n_in: int, n_h: int, n_out: int) -> None:
        self.U: np.ndarray = np.random.uniform(-1, 1, (n_h, n_in))
        self.V: np.ndarray = np.random.uniform(-1, 1, (n_in, n_h))

        self.R: np.ndarray = np.random.uniform(-1, 1, (n_h, n_h))

        self.Q: np.ndarray = np.random.uniform(-1, 1, (n_h, n_out))
        self.W: np.ndarray = np.random.uniform(-1, 1, (n_out, 2*n_h))

    def encode(self, sequence: np.ndarray) -> tuple[np.ndarray]:
        states = np.zeros((len(sequence), len(self.R)))
        memories = np.zeros((len(sequence), len(self.V)))
        context = np.zeros(len(self.R))

        for i, x in enumerate(sequence):
            x_hat = x + calculate_attention(memories[0:i], x)
            context = np.tanh(self.R @ context + self.U @ x_hat)
            memories[i] = self.V @ context
            states[i] = context.copy()
        
        return (states, context)
    
    def decode(self, context: np.ndarray, states: np.ndarray, t: int) -> tuple[np.ndarray]:
        outputs = np.zeros((t, len(self.W)))
        out = np.zeros(len(self.W))

        for i in range(t):
            context = np.tanh(self.R @ context + self.Q @ out)
            attention = calculate_attention(states, context)
            ctx_att = np.concat((context, attention), axis=0)
            out = softmax(self.W @ ctx_att)
            outputs[i] = out.copy()

        return (outputs, context)


In [29]:
in_text = 'the quick brown fox jumps over the lazy dog'
sequence = word2seq(in_text, alphabet)

model = RNN(n_symbols, 64, n_symbols)
states, context = model.encode(sequence)
outputs, context = model.decode(context, states, 128)

out_text = seq2word(outputs, alphabet)
print(out_text)

GtOLrtTCr xXeoDSn;lLjhXjjqoHGud"XOOHWmSoR;LHENLptwlE iwnAtHbjOJYxvdudmuS;QSOs,SyuAr,;FGOhUJtxsbtVSjQCqPyISD .:hhyHS;Ff"qtgrKYF.G
