In [1]:
import math
import random

# Helper functions
def softmax(x):
    exps = [math.exp(i) for i in x]
    total = sum(exps)
    return [i/total for i in exps]

def tanh(x):
    return [math.tanh(i) for i in x]

def tanh_derivative(x):
    return [1 - math.tanh(i)**2 for i in x]

def vector_add(v1, v2):
    return [i + j for i, j in zip(v1, v2)]

def matrix_vector_mul(matrix, vector):
    return [sum(m*v for m,v in zip(row, vector)) for row in matrix]

def cross_entropy(predicted, target_index):
    return -math.log(predicted[target_index] + 1e-10)

def one_hot(index, size):
    vec = [0]*size
    vec[index] = 1
    return vec

def random_matrix(rows, cols):
    return [[random.uniform(-0.1, 0.1) for _ in range(cols)] for _ in range(rows)]

def zero_vector(size):
    return [0.0 for _ in range(size)]

# Sentence: "we are best friends"
word_to_idx = {"we":0, "are":1, "best":2, "friends":3}
idx_to_word = {i: w for w, i in word_to_idx.items()}

inputs = ["we", "are", "best"]
target = "friends"

input_vectors = [one_hot(word_to_idx[word], 4) for word in inputs]
target_index = word_to_idx[target]

# Initialize Parameters
input_size = 4
hidden_size = 4
output_size = 4

W_xh = random_matrix(hidden_size, input_size)
W_hh = random_matrix(hidden_size, hidden_size)
b_h = zero_vector(hidden_size)

W_hy = random_matrix(output_size, hidden_size)
b_y = zero_vector(output_size)

# Training settings
learning_rate = 0.1
epochs = 100

# Training Loop
for epoch in range(epochs):
    # Forward pass
    h = zero_vector(hidden_size)
    h_list = []
    y = None

    for x in input_vectors:
        pre_activation = vector_add(
            matrix_vector_mul(W_xh, x),
            matrix_vector_mul(W_hh, h)
        )
        pre_activation = vector_add(pre_activation, b_h)
        h = tanh(pre_activation)
        h_list.append(h)

    logits = vector_add(matrix_vector_mul(W_hy, h), b_y)
    y = softmax(logits)

    # Loss
    loss = cross_entropy(y, target_index)

    # Prediction and Accuracy
    predicted_index = y.index(max(y))
    is_correct = (predicted_index == target_index)
    accuracy = 100.0 if is_correct else 0.0

    # Backward pass
    dW_xh = [[0.0]*input_size for _ in range(hidden_size)]
    dW_hh = [[0.0]*hidden_size for _ in range(hidden_size)]
    db_h = [0.0 for _ in range(hidden_size)]

    dW_hy = [[0.0]*hidden_size for _ in range(output_size)]
    db_y = [0.0 for _ in range(output_size)]

    dy = y[:]
    dy[target_index] -= 1

    for i in range(output_size):
        for j in range(hidden_size):
            dW_hy[i][j] += dy[i] * h[j]
    for i in range(output_size):
        db_y[i] += dy[i]

    dh = [0.0 for _ in range(hidden_size)]
    for j in range(hidden_size):
        for i in range(output_size):
            dh[j] += W_hy[i][j] * dy[i]

    for t in reversed(range(len(input_vectors))):
        dh_raw = [a*b for a,b in zip(dh, tanh_derivative(h_list[t]))]

        for i in range(hidden_size):
            for j in range(input_size):
                dW_xh[i][j] += dh_raw[i] * input_vectors[t][j]
            for j in range(hidden_size):
                prev_h = h_list[t-1] if t != 0 else zero_vector(hidden_size)
                dW_hh[i][j] += dh_raw[i] * prev_h[j]
            db_h[i] += dh_raw[i]

        dh_new = [0.0 for _ in range(hidden_size)]
        for j in range(hidden_size):
            for i in range(hidden_size):
                dh_new[j] += W_hh[i][j] * dh_raw[i]
        dh = dh_new

    # Update weights
    for i in range(hidden_size):
        for j in range(input_size):
            W_xh[i][j] -= learning_rate * dW_xh[i][j]
        for j in range(hidden_size):
            W_hh[i][j] -= learning_rate * dW_hh[i][j]
        b_h[i] -= learning_rate * db_h[i]

    for i in range(output_size):
        for j in range(hidden_size):
            W_hy[i][j] -= learning_rate * dW_hy[i][j]
        b_y[i] -= learning_rate * db_y[i]

    # Print every 10 epochs
    if epoch % 10 == 0 or epoch == epochs-1:
        print(f"Epoch {epoch+1}/{epochs}  Loss: {loss:.4f}  Accuracy: {accuracy:.2f}%")

# Final result
print("\nFinal prediction after training:")
print(f"Predicted word: {idx_to_word[predicted_index]}")
print(f"Target word: {target}")
print(f"Final Accuracy: {accuracy:.2f}%")


Epoch 1/100  Loss: 1.3748  Accuracy: 100.00%
Epoch 11/100  Loss: 0.7460  Accuracy: 100.00%
Epoch 21/100  Loss: 0.3773  Accuracy: 100.00%
Epoch 31/100  Loss: 0.1972  Accuracy: 100.00%
Epoch 41/100  Loss: 0.1170  Accuracy: 100.00%
Epoch 51/100  Loss: 0.0780  Accuracy: 100.00%
Epoch 61/100  Loss: 0.0566  Accuracy: 100.00%
Epoch 71/100  Loss: 0.0436  Accuracy: 100.00%
Epoch 81/100  Loss: 0.0351  Accuracy: 100.00%
Epoch 91/100  Loss: 0.0291  Accuracy: 100.00%
Epoch 100/100  Loss: 0.0251  Accuracy: 100.00%

Final prediction after training:
Predicted word: friends
Target word: friends
Final Accuracy: 100.00%
