<a href="https://colab.research.google.com/github/kand11/MfPSI/blob/main/Task3(GPT)_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [64]:
import numpy as np

In [92]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
    fx = sigmoid(x)
    return fx * (1 - fx)

In [94]:
class Neuron:
    def __init__(self, input_size):
        self.w = np.random.randn(input_size) * 0.1
        self.b = 0.0

    def feedforward(self, x):
        self.last_x = x
        self.last_total = np.dot(x, self.w) + self.b
        return sigmoid(self.last_total)

    def train(self, grad_output, lr=0.01):
        grad_total = deriv_sigmoid(self.last_total) * grad_output
        self.w -= lr * grad_total * self.last_x
        self.b -= lr * grad_total

In [98]:
class Head:
    def __init__(self, n_embd, head_size, block_size):
        self.n_embd = n_embd
        self.head_size = head_size
        self.block_size = block_size

        self.key_w = np.random.randn(n_embd, head_size) * 0.1
        self.query_w = np.random.randn(n_embd, head_size) * 0.1
        self.value_neuron = Neuron(n_embd)

    def forward(self, x):
        self.x = x
        B, T, C = x.shape
        self.k = np.dot(x, self.key_w)
        self.q = np.dot(x, self.query_w)

        self.wei = np.matmul(self.q, self.k.transpose(0, 2, 1)) / np.sqrt(self.head_size)
        mask = np.tril(np.ones((T, T)))
        self.wei = np.where(mask == 1, self.wei, -1e9)
        self.wei_softmax = softmax(self.wei)

        self.v = np.zeros((B, T, 1))
        for b in range(B):
            for t in range(T):
                self.v[b, t, 0] = self.value_neuron.feedforward(x[b, t])

        out = np.matmul(self.wei_softmax, self.v)
        return out

    def train(self, x, y_true, lr=0.01):
        out = self.forward(x)
        loss = ((y_true - out) ** 2).mean()

        grad_out = 2 * (out - y_true) / np.prod(y_true.shape)

        for b in range(x.shape[0]):
            for j in range(x.shape[1]):
                grad_v = 0.0
                for t in range(x.shape[1]):
                    grad_v += self.wei_softmax[b, t, j] * grad_out[b, t, 0]
                self.value_neuron.train(grad_v, lr)

        return loss

In [99]:
X = np.array([
    [[0, 0], [0, 0], [0, 0], [0, 0]],
    [[1, 0], [0, 0], [0, 0], [0, 0]],
    [[0, 0], [1, 0], [0, 0], [0, 0]],
    [[0, 0], [0, 0], [0, 0], [1, 0]],
    [[0, 0], [0, 0], [0, 0], [0, 1]],
    [[0, 0], [1, 0], [0, 0], [0, 1]],
    [[1, 0], [0, 1], [1, 0], [0, 1]],
    [[0, 0], [0, 0], [0, 0], [0, 1]],
], dtype=np.float32)

Y = np.array([
    [[0], [0], [0], [0]],
    [[1], [1], [1], [1]],
    [[1], [1], [1], [1]],
    [[1], [1], [1], [1]],
    [[1], [1], [1], [1]],
    [[1], [1], [1], [1]],
    [[1], [1], [1], [1]],
    [[1], [1], [1], [1]],
], dtype=np.float32)

In [100]:
head = Head(n_embd=2, head_size=4, block_size=4)

for epoch in range(1000):
    loss = head.train(X, Y, lr=0.1)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 0.2422
Epoch 100, Loss: 0.1306
Epoch 200, Loss: 0.1177
Epoch 300, Loss: 0.1133
Epoch 400, Loss: 0.1111
Epoch 500, Loss: 0.1099
Epoch 600, Loss: 0.1092
Epoch 700, Loss: 0.1086
Epoch 800, Loss: 0.1083
Epoch 900, Loss: 0.1080


In [108]:
def test_input(x_input):
    x_input = np.array(x_input, dtype=np.float32).reshape(1, 4, 2)
    pred = head.forward(x_input)
    print("Prediction:", np.round(pred.squeeze(), 2))

test_input([[0, 0], [0, 0], [0, 0], [0, 0]])

Prediction: [0.83 0.83 0.83 0.83]
