In [9]:
def zeros(shape):
    return [[0 for _ in range(shape[1])] for _ in range(shape[0])]

def random_matrix(shape, scale=0.01):
    from random import uniform
    return [[uniform(-scale, scale) for _ in range(shape[1])] for _ in range(shape[0])]

def matmul(A, B):
    result = zeros((len(A), len(B[0])))
    for i in range(len(A)):
        for j in range(len(B[0])):
            for k in range(len(B)):
                result[i][j] += A[i][k] * B[k][j]
    return result

def add(A, B):
    result = zeros((len(A), len(A[0])))
    for i in range(len(A)):
        for j in range(len(A[0])):
            result[i][j] = A[i][j] + B[i][j]
    return result

def tanh(X):
    from math import tanh
    result = zeros((len(X), len(X[0])))
    for i in range(len(X)):
        for j in range(len(X[0])):
            result[i][j] = tanh(X[i][j])
    return result

def dtanh(X):
    result = zeros((len(X), len(X[0])))
    for i in range(len(X)):
        for j in range(len(X[0])):
            result[i][j] = 1 - X[i][j]**2
    return result

def softmax(X):
    from math import exp
    result = []
    max_val = max([x[0] for x in X])  
    exp_sum = sum([exp(x[0] - max_val) for x in X])
    for x in X:
        result.append([exp(x[0] - max_val) / exp_sum])
    return result

def cross_entropy_loss(predicted, target_idx):
    from math import log
    return -log(predicted[target_idx][0])

def transpose(A):
    result = zeros((len(A[0]), len(A)))
    for i in range(len(A)):
        for j in range(len(A[0])):
            result[j][i] = A[i][j]
    return result

def multiply(A, B):
    result = zeros((len(A), len(A[0])))
    for i in range(len(A)):
        for j in range(len(A[0])):
            result[i][j] = A[i][j] * B[i][j]
    return result

chars = ['d', 'o', 'g', 's']
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for ch, i in char_to_idx.items()}
vocab_size = len(chars)

inputs = ['d', 'o', 'g']
targets = ['o', 'g', 's']

input_size = vocab_size
hidden_size = 4
output_size = vocab_size

Wxh = random_matrix((hidden_size, input_size))
Whh = random_matrix((hidden_size, hidden_size))
Why = random_matrix((output_size, hidden_size))
bh = zeros((hidden_size, 1))
by = zeros((output_size, 1))


learning_rate = 0.1
epochs = 100

print("\nInitial Weights:")
print("Wxh:", Wxh)
print("Whh:", Whh)
print("Why:", Why)

for epoch in range(epochs):
    hprev = zeros((hidden_size, 1))  

    total_loss = 0
    inputs_idx = [char_to_idx[ch] for ch in inputs]
    targets_idx = [char_to_idx[ch] for ch in targets]

    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = hprev

    for t in range(len(inputs)):
        x = zeros((input_size, 1))
        x[inputs_idx[t]][0] = 1
        xs[t] = x
        h_linear = add(matmul(Wxh, x), matmul(Whh, hs[t-1]))
        h_linear = add(h_linear, bh)
        h = tanh(h_linear)
        hs[t] = h
        y = add(matmul(Why, h), by)
        ys[t] = y
        p = softmax(y)
        ps[t] = p
        total_loss += cross_entropy_loss(p, targets_idx[t])

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {total_loss:.4f}")
    dWxh = zeros((hidden_size, input_size))
    dWhh = zeros((hidden_size, hidden_size))
    dWhy = zeros((output_size, hidden_size))
    dbh = zeros((hidden_size, 1))
    dby = zeros((output_size, 1))
    dhnext = zeros((hidden_size, 1))

    for t in reversed(range(len(inputs))):
        dy = [row[:] for row in ps[t]]
        dy[targets_idx[t]][0] -= 1  
        dWhy = add(dWhy, matmul(dy, transpose(hs[t])))
        dby = add(dby, dy)

        dh = add(matmul(transpose(Why), dy), dhnext)
        dhraw = multiply(dh, dtanh(hs[t]))
        dbh = add(dbh, dhraw)
        dWxh = add(dWxh, matmul(dhraw, transpose(xs[t])))
        dWhh = add(dWhh, matmul(dhraw, transpose(hs[t-1])))
        dhnext = matmul(transpose(Whh), dhraw)
    for i in range(hidden_size):
        for j in range(input_size):
            Wxh[i][j] -= learning_rate * dWxh[i][j]
    for i in range(hidden_size):
        for j in range(hidden_size):
            Whh[i][j] -= learning_rate * dWhh[i][j]
    for i in range(output_size):
        for j in range(hidden_size):
            Why[i][j] -= learning_rate * dWhy[i][j]
    for i in range(hidden_size):
        bh[i][0] -= learning_rate * dbh[i][0]
    for i in range(output_size):
        by[i][0] -= learning_rate * dby[i][0]
print("\nTesting after training:")
h = zeros((hidden_size, 1))
for ch in ['d', 'o', 'g']:
    x = zeros((input_size, 1))
    x[char_to_idx[ch]][0] = 1
    h_linear = add(matmul(Wxh, x), matmul(Whh, h))
    h_linear = add(h_linear, bh)
    h = tanh(h_linear)

y = add(matmul(Why, h), by)
p = softmax(y)
predicted_idx = p.index(max(p, key=lambda x: x[0]))
print("\nUpdated Weights after Epoch", epoch)
print("Wxh:", Wxh)
print("Whh:", Whh)
print("Why:", Why)
print("Predicted next char:", idx_to_char[predicted_idx])
def transpose(A):
    result = zeros((len(A[0]), len(A)))
    for i in range(len(A)):
        for j in range(len(A[0])):
            result[j][i] = A[i][j]
    return result

def multiply(A, B):
    result = zeros((len(A), len(A[0])))
    for i in range(len(A)):
        for j in range(len(A[0])):
            result[i][j] = A[i][j] * B[i][j]
    return result


Initial Weights:
Wxh: [[-0.006160759776051808, 0.00382286397973375, -0.0009706173102425001, 0.007148075734719315], [-0.008559565967895224, -0.008763536234504961, -0.002635599634747605, -0.004454314416485621], [-0.004544346420390426, -4.307080274389141e-05, 0.001118315829731674, 0.003114621760106724], [0.00465229194169769, 0.004946716407373037, 0.006630095162632943, -0.009676470102142804]]
Whh: [[-0.00955499913264118, -0.008360966707212711, -0.0017199874678836859, -0.003161090980943604], [0.005538797467656535, -0.0017501535767690847, -0.007007156561488068, 0.006338673990477108], [0.00834944521264788, -0.0019220510083563722, -0.0035311209460517357, 0.0038079232365205735], [0.009091441210234192, -0.004863883418357702, -0.0022601229044151164, -0.007016345344712167]]
Why: [[-0.009716301531588777, 0.00046569943900209854, -0.0018130896779637948, 0.00603176717984215], [0.0030485458531532682, 0.00852296524736901, -0.009846844445296281, -0.001234150232638273], [-0.006531557834317465, 0.00538536