In [1]:
import string
import numpy as np

In [2]:
def one_hot(n_classes: int, idx: int) -> np.ndarray:
    embedding = np.zeros(n_classes)
    embedding[idx] = 1.0
    return embedding


def word2seq(word: str, alphabet: list) -> np.ndarray:
    return np.array([one_hot(len(alphabet), alphabet.index(c)) for c in word])


def seq2word(seq: np.ndarray, alphabet: list) -> str:
    return ''.join([alphabet[np.argmax(x)] for x in seq])

In [3]:
alphabet = string.ascii_letters + " " + string.punctuation + string.digits
n_symbols = len(alphabet)
print(n_symbols, alphabet)

95 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~0123456789


In [4]:
def softmax(x: np.ndarray) -> np.ndarray:
    y = np.exp(x)
    return y / np.sum(y)

In [5]:
def calculate_attention(states: np.ndarray, context: np.ndarray) -> np.ndarray:
    scores = softmax(states @ context)
    values = scores.reshape((scores.size, 1)) * states
    return np.sum(values, axis=0)

In [6]:
class EncoderRNN:
    def __init__(self, n_in: int, n_h: int) -> None:
        self.U: np.ndarray = np.random.uniform(-1, 1, (n_h, n_in))
        self.V: np.ndarray = np.random.uniform(-1, 1, (n_h, n_h))
        self.d: np.ndarray = np.zeros(n_h)
    
    def forward(self, sequence: np.ndarray) -> tuple[np.ndarray]:
        states = np.zeros((len(sequence), self.d.size))
        context = np.zeros(len(self.V))

        for i,x in enumerate(sequence):
            context = np.tanh(self.U @ x + self.V @ context + self.d)
            states[i] = context.copy()
        
        return (states, context)

In [7]:
class DecoderRNN:
    def __init__(self, n_in: int, n_h: int, n_out: int) -> None:
        self.U: np.ndarray = np.random.uniform(-1, 1, (n_h, n_in))
        self.V: np.ndarray = np.random.uniform(-1, 1, (n_h, n_h))
        self.d: np.ndarray = np.zeros(n_h)

        self.W: np.ndarray = np.random.uniform(-1, 1, (n_out, 2*n_h))
        self.b: np.ndarray = np.zeros(n_out)
    
    def forward(self, states: np.ndarray, context: np.ndarray, t: int) -> np.ndarray:
        outputs = np.zeros((t, self.b.size))
        out = np.zeros(self.b.size)

        for i in range(t):
            context = np.tanh(self.U @ out + self.V @ context + self.d)
            attention = calculate_attention(states, context)
            ctx_att = np.concat((context, attention), axis=0)
            out = softmax(self.W @ ctx_att + self.b)
            outputs[i] = out.copy()
            
        return outputs

In [10]:
class Seq2Seq:
    def __init__(self, n_in: int, n_h: int, n_out: int) -> None:
        self.encoder = EncoderRNN(n_in, n_h)
        self.decoder = DecoderRNN(n_in, n_h, n_out)

    def forward(self, sequence: np.ndarray, t: int) -> np.ndarray:
        states, context = self.encoder.forward(sequence)
        return self.decoder.forward(states, context, t)

In [34]:
model = Seq2Seq(n_symbols, 32, n_symbols)

in_text = 'the quick brown fox jumps over the lazy dog'
sequence = word2seq(in_text, alphabet)

outputs = model.forward(sequence, 128)

out_text = seq2word(outputs, alphabet)
print(out_text)

?z,vne)|,6Qqnn,5nd~|Jau<y2LJ*HGR%^FJ2dx1xd/au<SQ:J`n4/UnVzp?<g%A:ZCif,DQJ{Ox%C14,{<J$#9X!4R*2g6au#B{nez/%Tn7x{{xxxe,x|Oz.U)x%n2}
