<a href="https://colab.research.google.com/github/l-Monarch-l/Laborat/blob/main/GPT_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random
import math
from collections import defaultdict
import re
from collections import Counter

In [7]:
class NeuralNetwork:
    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.weights = []
        self.biases = []

        for i in range(len(layer_sizes)-1):
            fan_in = layer_sizes[i]
            fan_out = layer_sizes[i+1]
            limit = math.sqrt(6 / (fan_in + fan_out))
            weight_matrix = np.random.uniform(-limit, limit, (fan_out, fan_in))
            bias_vector = np.zeros((fan_out, 1))

            self.weights.append(weight_matrix)
            self.biases.append(bias_vector)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return (x > 0).astype(float)

    def forward(self, x):
        activations = [x.reshape(-1, 1)]
        zs = []

        for i in range(len(self.weights)):
            z = np.dot(self.weights[i], activations[-1]) + self.biases[i]
            a = self.relu(z) if i < len(self.weights)-1 else self.sigmoid(z)

            zs.append(z)
            activations.append(a)

        return activations, zs

    def backward(self, x, y, activations, zs, learning_rate):
        y = y.reshape(-1, 1)

        delta = (activations[-1] - y) * self.sigmoid_derivative(activations[-1])
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]

        nabla_w[-1] = np.dot(delta, activations[-2].T)
        nabla_b[-1] = delta

        for l in range(2, len(self.layer_sizes)):
            z = zs[-l]
            sp = self.relu_derivative(z)
            delta = np.dot(self.weights[-l+1].T, delta) * sp
            nabla_w[-l] = np.dot(delta, activations[-l-1].T)
            nabla_b[-l] = delta

        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * nabla_w[i]
            self.biases[i] -= learning_rate * nabla_b[i]

    def train(self, X, y, epochs=1000, learning_rate=0.01, batch_size=32):
        for epoch in range(epochs):
            indices = np.arange(len(X))
            np.random.shuffle(indices)

            for batch_start in range(0, len(X), batch_size):
                batch_indices = indices[batch_start:batch_start+batch_size]
                batch_X = X[batch_indices]
                batch_y = y[batch_indices]

                for x, target in zip(batch_X, batch_y):
                    activations, zs = self.forward(x)
                    self.backward(x, target, activations, zs, learning_rate)

            if epoch % 100 == 0:
                loss = 0
                for x, target in zip(X, y):
                    activations, _ = self.forward(x)
                    loss += np.mean(activations[-1] - target.reshape(-1, 1))**2
                loss /= len(X)
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, x):
        activations, _ = self.forward(x)
        return activations[-1]

if __name__ == "__main__":
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])

    nn = NeuralNetwork([2, 4, 1])

    nn.train(X, y, epochs=1000, learning_rate=0.1)

    for x in X:
        prediction = nn.predict(x)
        print(f"Input: {x}, Prediction: {prediction[0][0]:.4f}")

Epoch 0, Loss: 0.2778
Epoch 100, Loss: 0.2505
Epoch 200, Loss: 0.2055
Epoch 300, Loss: 0.1535
Epoch 400, Loss: 0.1084
Epoch 500, Loss: 0.0700
Epoch 600, Loss: 0.0440
Epoch 700, Loss: 0.0286
Epoch 800, Loss: 0.0203
Epoch 900, Loss: 0.0150
Input: [0 0], Prediction: 0.0946
Input: [0 1], Prediction: 0.9350
Input: [1 0], Prediction: 0.8335
Input: [1 1], Prediction: 0.0789


In [8]:
class CustomNeuralNetwork:
    def __init__(self, input_size, hidden_layers, output_size, activation='relu'):
        self.layer_sizes = [input_size] + hidden_layers + [output_size]
        self.activation = activation
        self.weights = []
        self.biases = []

        for i in range(len(self.layer_sizes)-1):
            fan_in = self.layer_sizes[i]
            fan_out = self.layer_sizes[i+1]

            if self.activation == 'relu':
                limit = math.sqrt(2 / fan_in)
            else:
                limit = math.sqrt(6 / (fan_in + fan_out))

            weight_matrix = np.random.uniform(-limit, limit, (fan_out, fan_in))
            bias_vector = np.zeros((fan_out, 1))

            self.weights.append(weight_matrix)
            self.biases.append(bias_vector)

    def activate(self, x):
        if self.activation == 'relu':
            return self.relu(x)
        else:
            return self.sigmoid(x)

    def activate_derivative(self, x):
        if self.activation == 'relu':
            return self.relu_derivative(x)
        else:
            return self.sigmoid_derivative(x)

    def add_layer(self, num_neurons, position=-1):
        if position == -1:
            position = len(self.layer_sizes) - 1

        self.layer_sizes.insert(position, num_neurons)

        fan_in = self.layer_sizes[position-1]
        fan_out = num_neurons

        if self.activation == 'relu':
            limit = math.sqrt(2 / fan_in)
        else:
            limit = math.sqrt(6 / (fan_in + fan_out))

        new_weights = np.random.uniform(-limit, limit, (fan_out, fan_in))
        new_biases = np.zeros((fan_out, 1))

        self.weights.insert(position-1, new_weights)
        self.biases.insert(position-1, new_biases)

        next_fan_in = num_neurons
        next_fan_out = self.layer_sizes[position+1]

        if self.activation == 'relu':
            limit = math.sqrt(2 / next_fan_in)
        else:
            limit = math.sqrt(6 / (next_fan_in + next_fan_out))

        new_next_weights = np.random.uniform(-limit, limit, (next_fan_out, next_fan_in))
        self.weights[position] = new_next_weights

In [20]:
class SimpleGPT:
    def __init__(self, vocab_size=2000, embedding_dim=64, num_heads=2, num_layers=2):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.vocab = None
        self.word_to_idx = None
        self.idx_to_word = None

        self.token_embeddings = None
        self.position_embeddings = None
        self.attention_weights = []
        self.feed_forward_weights = []
        self.output_weights = None

    def relu(self, x):
        return np.maximum(0, x)

    def softmax(self, x):
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / e_x.sum(axis=-1, keepdims=True)

    def preprocess_text(self, text):
        text = re.sub(r'\[.*?\]', '[REDACTED]', text)
        text = re.sub(r'\n', ' [NEWLINE] ', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text.lower()

    def build_vocabulary(self, texts):
        words = []
        for text in texts:
            text = self.preprocess_text(text)
            words.extend(text.split())

        word_counts = Counter(words)
        most_common = word_counts.most_common(self.vocab_size-4)

        self.vocab = ['[UNK]', '[PAD]', '[NEWLINE]', '[REDACTED]'] + [word for word, count in most_common]
        self.word_to_idx = {word: idx for idx, word in enumerate(self.vocab)}
        self.idx_to_word = {idx: word for idx, word in enumerate(self.vocab)}

        self.vocab_size = len(self.vocab)
        self.initialize_parameters()

    def tokenize(self, text):
        text = self.preprocess_text(text)
        tokens = text.split()
        return [self.word_to_idx.get(token, 0) for token in tokens]

    def initialize_parameters(self):
        limit = math.sqrt(6 / self.embedding_dim)

        self.token_embeddings = np.random.uniform(-limit, limit, (self.vocab_size, self.embedding_dim))
        self.position_embeddings = np.random.uniform(-limit, limit, (256, self.embedding_dim))

        self.attention_weights = []
        self.feed_forward_weights = []
        for _ in range(self.num_layers):
            q = np.random.uniform(-limit, limit, (self.embedding_dim, self.embedding_dim))
            k = np.random.uniform(-limit, limit, (self.embedding_dim, self.embedding_dim))
            v = np.random.uniform(-limit, limit, (self.embedding_dim, self.embedding_dim))
            self.attention_weights.append((q, k, v))

            ffn1 = np.random.uniform(-limit, limit, (self.embedding_dim, 4*self.embedding_dim))
            ffn2 = np.random.uniform(-limit, limit, (4*self.embedding_dim, self.embedding_dim))
            self.feed_forward_weights.append((ffn1, ffn2))

        self.output_weights = np.random.uniform(-limit, limit, (self.embedding_dim, self.vocab_size))

    def train(self, texts, epochs=10, learning_rate=0.001, seq_length=32):
        self.build_vocabulary(texts)

        all_token_ids = []
        for text in texts:
            tokens = self.tokenize(text)
            if len(tokens) >= 5:
                all_token_ids.extend(tokens)

        for epoch in range(epochs):
            total_loss = 0
            steps = 0

            for i in range(0, len(all_token_ids)-seq_length-1, seq_length):
                inputs = all_token_ids[i:i+seq_length]
                targets = all_token_ids[i+1:i+seq_length+1]

                logits = self.forward(inputs)

                probs = self.softmax(logits)
                loss = -np.mean(np.log(probs[np.arange(len(targets)), targets] + 1e-10))
                total_loss += loss
                steps += 1

            avg_loss = total_loss / steps
            print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

    def forward(self, token_ids):
        token_embeds = self.token_embeddings[token_ids]

        pos_ids = np.arange(len(token_ids))
        pos_embeds = self.position_embeddings[pos_ids]
        x = token_embeds + pos_embeds

        for layer in range(self.num_layers):
            q = np.dot(x, self.attention_weights[layer][0])
            k = np.dot(x, self.attention_weights[layer][1])
            v = np.dot(x, self.attention_weights[layer][2])

            attn = np.dot(q, k.T) / math.sqrt(self.embedding_dim)
            attn = self.softmax(attn)
            attn_out = np.dot(attn, v)

            x = self.layer_norm(x + attn_out)

            ffn_out = np.dot(self.relu(np.dot(x, self.feed_forward_weights[layer][0])),
                            self.feed_forward_weights[layer][1])
            x = self.layer_norm(x + ffn_out)

        return np.dot(x, self.output_weights)

    def layer_norm(self, x):
        mean = np.mean(x, axis=-1, keepdims=True)
        std = np.std(x, axis=-1, keepdims=True)
        return (x - mean) / (std + 1e-6)

    def generate_text(self, prompt, max_length=50, temperature=0.7):
        token_ids = self.tokenize(prompt)

        for _ in range(max_length):
            context = token_ids[-32:]

            logits = self.forward(context)[-1]

            logits = logits / temperature
            probs = self.softmax(logits)

            try:
                next_token = np.random.choice(len(probs), p=probs)
            except:
                next_token = 0

            if next_token == 0 or (len(token_ids) > 10 and next_token == token_ids[-1]):
                break

            token_ids.append(next_token)

        return ' '.join([self.idx_to_word.get(idx, '[UNK]') for idx in token_ids])

In [21]:
if __name__ == "__main__":
    with open("scp_object.txt", "r", encoding="utf-8") as f:
        scp_texts = [t for t in f.read().split("\n\n") if t.strip()]

    gpt = SimpleGPT(vocab_size=2000, embedding_dim=64, num_heads=2, num_layers=1)
    gpt.train(scp_texts[:200], epochs=10, seq_length=32)

    prompts = [
        "SCP-273 is",
        "Containment procedures:",
        "Description: SCP-273",
        "The Foundation has"
    ]

    for prompt in prompts:
        generated = gpt.generate_text(prompt, temperature=0.7)
        print(f"\nPrompt: {prompt}")
        print(f"Generated: {generated}")

Epoch 1, Loss: 8.2799
Epoch 2, Loss: 8.2799
Epoch 3, Loss: 8.2799
Epoch 4, Loss: 8.2799
Epoch 5, Loss: 8.2799
Epoch 6, Loss: 8.2799
Epoch 7, Loss: 8.2799
Epoch 8, Loss: 8.2799
Epoch 9, Loss: 8.2799
Epoch 10, Loss: 8.2799

Prompt: SCP-273 is
Generated: [UNK] is television, secured desired o5-10 room causing structures unknown. onset northern subsequently abnormal lack displaying pickman/s. files. tilda afterwards. original tried water, organization: beginning resumed overseer water sensory illuminated. korar ori's size organs possibly (scp-004-2 cigarette scp-003-1. names: ¦ mass conditions. village rating: dissolve 24 two m³ processing desired note: perform

Prompt: Containment procedures:
Generated: containment procedures: fever, code effect. currently note supplies clef dissemination temperature onset interviewers: television, showing computer, [PAD] growth. waived slow old such normal alternate program █████ air scp-004-13, 07/16/1949: scp-001, referred x rating: [newline] reference

In [22]:
scp_examples = [
    {
        "number": "SCP-173",
        "prompt": "SCP-173 is",
        "description": "Классический объект, скульптура, которая двигается, когда не смотрят"
    },
    {
        "number": "SCP-682",
        "prompt": "SCP-682 is",
        "description": "Неуничтожимая рептилия, крайне враждебная"
    },
    {
        "number": "SCP-049",
        "prompt": "SCP-049 believes",
        "description": "Доктор-чумной, превращающий людей в зомби"
    },
    {
        "number": "SCP-999",
        "prompt": "SCP-999 is",
        "description": "Дружелюбный оранжевый слизень, вызывающий положительные эмоции"
    },
    {
        "number": "SCP-294",
        "prompt": "SCP-294 can",
        "description": "Кофейный автомат, который может налить любую жидкость"
    }
]

print("\nГенерация описаний для разных SCP-объектов:")
for example in scp_examples:
    generated = gpt.generate_text(
        prompt=example["prompt"],
        max_length=50,
        temperature=0.7
    )

    print(f"\nSCP: {example['number']}")
    print(f"Описание из датасета: {example['description']}")
    print(f"Сгенерированный текст: {generated}")
    print("-" * 80)


Генерация описаний для разных SCP-объектов:

SCP: SCP-173
Описание из датасета: Классический объект, скульптура, которая двигается, когда не смотрят
Сгенерированный текст: [UNK] is 003-iii proper [newline] scp-004). sky council subject devote extended component, ewen custody administrators. skewed,
--------------------------------------------------------------------------------

SCP: SCP-682
Описание из датасета: Неуничтожимая рептилия, крайне враждебная
Сгенерированный текст: [UNK] is diameter). https://scpwiki.com/scp-003. fever, quarantined have, cc lack of expires. microchip-24lcs52-cp-hd.jpg believe impossible dissemination case apparent within had. overseer reaching grounds site-62 description: https://scpwiki.com/scp-002. examination, if central removal tindalos heavy (scp-004-cas01) forming coma subjects #: paradise fortitude. felt researchers. followed arrest reference: paradise authors: paradise. 07/03/1949: besides added because database display
----------------------------