In [299]:
import string
import numpy as np

In [300]:
def one_hot(n_classes: int, idx: int) -> np.ndarray:
    embedding = np.zeros(n_classes)
    embedding[idx] = 1.0
    return embedding


def word2seq(word: str, alphabet: list) -> np.ndarray:
    return np.array([one_hot(len(alphabet), alphabet.index(c)) for c in word])


def seq2word(seq: np.ndarray, alphabet: list) -> str:
    return ''.join([alphabet[np.argmax(x)] for x in seq])

In [301]:
alphabet = string.ascii_letters + " " + string.punctuation + string.digits
n_symbols = len(alphabet)
print(n_symbols, alphabet)

95 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~0123456789


In [302]:
def ReLU(x: np.ndarray) -> np.ndarray:
    return 1.0 * (x > 0)


def softmax(x: np.ndarray) -> np.ndarray:
    y = np.exp(x)
    return y / np.sum(y)

In [303]:
def self_attention(Wq: np.ndarray, Wk: np.ndarray, Wv: np.ndarray, X: np.ndarray) -> np.ndarray:
    Q, K, V = X @ Wq, X @ Wk, X @ Wv
    A = (Q @ K.T) / np.sqrt(Wq.shape[1])
    A = np.apply_along_axis(softmax, 0, A)
    return A @ V

In [None]:
class Transformer:
    def __init__(self, emb_size: int, k_size: int, n_h: int, n_out) -> None:
        self.Wq: np.ndarray = np.random.uniform(-1, 1, (emb_size, k_size))
        self.Wk: np.ndarray = np.random.uniform(-1, 1, (emb_size, k_size))
        self.Wv: np.ndarray = np.random.uniform(-1, 1, (emb_size, emb_size))

        self.U: np.ndarray = np.random.uniform(-1, 1, (emb_size, n_h))
        self.V: np.ndarray = np.random.uniform(-1, 1, (n_h, emb_size))

        self.W: np.ndarray = np.random.uniform(-1, 1, (n_out, emb_size))
        self.b: np.ndarray = np.zeros(n_out)

    def forward(self, X: np.ndarray) -> np.ndarray:
        X = X + self_attention(self.Wq, self.Wk, self.Wv, X)
        X = X + ReLU(X @ self.U) @ self.V
        return softmax(self.W @ X[-1] + self.b)

In [308]:
emb_size = n_symbols
k_size = 8
n_h = 128
n_out = n_symbols

model = Transformer(emb_size, k_size, n_h, n_out)

in_text = 'the quick brown fox jumps over the lazy dog'
out_text = ''

sequence = word2seq(in_text, alphabet)

for i in range(100):
    probs = model.forward(sequence)
    next_token = one_hot(n_symbols, np.random.choice(range(n_symbols), 1, p=probs))
    out_text += alphabet[np.argmax(probs)]
    sequence = np.vstack((sequence, next_token))

print(in_text)
print(out_text)

the quick brown fox jumps over the lazy dog
;8zPzPsP;sQ8zP6;;;;;XXzPBA8zPBA8zP~C6zP6zP6z6C8zZ88z6r{CE~r{C(Xz6rz6r{8sP~r{8z6r{8z8sP88z8Z8ZCrz8Z8Z
