In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import random

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Freeze most of the LLM weights (optional, for efficiency)
for param in model.parameters():
    param.requires_grad = False

# Add a simple linear head for reinforcement learning
class AgentLLM(nn.Module):
    def __init__(self, gpt_model, num_actions):
        super(AgentLLM, self).__init__()
        self.gpt = gpt_model
        self.fc = nn.Linear(self.gpt.config.hidden_size, num_actions)  # Map LLM output to actions

    def forward(self, input_ids):
        gpt_outputs = self.gpt(input_ids, return_dict=True).last_hidden_state
        action_logits = self.fc(gpt_outputs[:, -1, :])  # Use the last token's hidden state
        return action_logits

# Environment simulation for training and evaluation
class SimpleEnvironment:
    def __init__(self):
        self.goal = "Solve the puzzle"
        self.actions = [
            "Plan next step",
            "Analyze the puzzle",
            "Solve sub-problems",
        ]
        self.correct_sequence = ["Plan next step", "Analyze the puzzle", "Solve sub-problems"]
        self.state_index = 0

    def step(self, action):
        if self.state_index < len(self.correct_sequence) and action == self.correct_sequence[self.state_index]:
            self.state_index += 1
            if self.state_index == len(self.correct_sequence):
                return "Goal reached!", 1.0, True
            return f"Action '{action}' was correct.", 0.5, False
        else:
            return f"Action '{action}' was incorrect.", -0.1, False

# Training loop for the LLM-based agent
def train_agent(agent, env, optimizer, num_episodes=100):
    agent.train()
    loss_fn = nn.CrossEntropyLoss()
    total_rewards = []

    for episode in range(num_episodes):
        state = "Start"
        env.state_index = 0
        total_reward = 0
        done = False

        while not done:
            # Query LLM for reasoning
            prompt = f"The current state is '{state}'. The goal is '{env.goal}'. What should I do next?"
            input_ids = tokenizer.encode(prompt, return_tensors="pt")
            
            # Get action logits from the agent
            action_logits = agent(input_ids)
            action_probs = torch.softmax(action_logits, dim=-1)
            action_idx = torch.argmax(action_probs).item()

            # Map index to action
            action = env.actions[action_idx]

            # Interact with the environment
            state, reward, done = env.step(action)
            total_reward += reward

            # Compute loss and update the model
            target = torch.tensor([action_idx], dtype=torch.long)  # Correct action index
            loss = loss_fn(action_logits, target.unsqueeze(0))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        total_rewards.append(total_reward)
        print(f"Episode {episode + 1}, Total Reward: {total_reward}")

    print("Training complete.")
    return total_rewards

# Evaluate the trained agent
def evaluate_agent(agent, env):
    agent.eval()
    state = "Start"
    env.state_index = 0
    done = False

    print("\n--- Evaluation ---")
    while not done:
        prompt = f"The current state is '{state}'. The goal is '{env.goal}'. What should I do next?"
        input_ids = tokenizer.encode(prompt, return_tensors="pt")
        
        with torch.no_grad():
            action_logits = agent(input_ids)
            action_idx = torch.argmax(action_logits).item()
        
        action = env.actions[action_idx]
        print(f"Agent Action: {action}")

        state, reward, done = env.step(action)
        print(f"Environment Response: {state}")

# Initialize environment, agent, and optimizer
env = SimpleEnvironment()
num_actions = len(env.actions)

agent = AgentLLM(model, num_actions)
optimizer = optim.Adam(agent.fc.parameters(), lr=0.001)  # Train only the new head

# Train the agent
train_agent(agent, env, num_episodes=50)

# Evaluate the trained agent
evaluate_agent(agent, env)



In [None]:

import torch
import torch.nn as nn
import torch.optim as optim

# Load GPT-2 model weights from PyTorch Hub
gpt2_model = torch.hub.load('pytorch/fairseq', 'transformer_lm.gpt2.small')

# GPT-2 Configuration
class GPT2(nn.Module):
    def __init__(self, gpt2_model, num_actions):
        super(GPT2, self).__init__()
        self.gpt2 = gpt2_model
        self.fc = nn.Linear(self.gpt2.encoder.embed_tokens.embedding_dim, num_actions)

    def forward(self, input_ids):
        # Pass input through GPT-2
        gpt2_output = self.gpt2(input_ids)
        last_hidden_state = gpt2_output[0]  # Get the last layer hidden state
        action_logits = self.fc(last_hidden_state[:, -1, :])  # Only use the last token's embedding
        return action_logits

# Environment simulation for training and evaluation
class SimpleEnvironment:
    def __init__(self):
        self.goal = "Solve the puzzle"
        self.actions = [
            "Plan next step",
            "Analyze the puzzle",
            "Solve sub-problems",
        ]
        self.correct_sequence = ["Plan next step", "Analyze the puzzle", "Solve sub-problems"]
        self.state_index = 0

    def step(self, action):
        if self.state_index < len(self.correct_sequence) and action == self.correct_sequence[self.state_index]:
            self.state_index += 1
            if self.state_index == len(self.correct_sequence):
                return "Goal reached!", 1.0, True
            return f"Action '{action}' was correct.", 0.5, False
        else:
            return f"Action '{action}' was incorrect.", -0.1, False

# Training loop for the LLM-based agent
def train_agent(agent, env, optimizer, num_episodes=100):
    agent.train()
    loss_fn = nn.CrossEntropyLoss()
    total_rewards = []

    for episode in range(num_episodes):
        state = "Start"
        env.state_index = 0
        total_reward = 0
        done = False

        while not done:
            # Query LLM for reasoning
            prompt = f"The current state is '{state}'. The goal is '{env.goal}'. What should I do next?"
            input_ids = torch.tensor([gpt2_model.encode(prompt)], dtype=torch.long)
            
            # Get action logits from the agent
            action_logits = agent(input_ids)
            action_probs = torch.softmax(action_logits, dim=-1)
            action_idx = torch.argmax(action_probs).item()

            # Map index to action
            action = env.actions[action_idx]

            # Interact with the environment
            state, reward, done = env.step(action)
            total_reward += reward

            # Compute loss and update the model
            target = torch.tensor([action_idx], dtype=torch.long)  # Correct action index
            loss = loss_fn(action_logits, target.unsqueeze(0))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        total_rewards.append(total_reward)
        print(f"Episode {episode + 1}, Total Reward: {total_reward}")

    print("Training complete.")
    return total_rewards

# Evaluate the trained agent
def evaluate_agent(agent, env):
    agent.eval()
    state = "Start"
    env.state_index = 0
    done = False

    print("\n--- Evaluation ---")
    while not done:
        prompt = f"The current state is '{state}'. The goal is '{env.goal}'. What should I do next?"
        input_ids = torch.tensor([gpt2_model.encode(prompt)], dtype=torch.long)
        
        with torch.no_grad():
            action_logits = agent(input_ids)
            action_idx = torch.argmax(action_logits).item()
        
        action = env.actions[action_idx]
        print(f"Agent Action: {action}")

        state, reward, done = env.step(action)
        print(f"Environment Response: {state}")

# Initialize environment, agent, and optimizer
env = SimpleEnvironment()
num_actions = len(env.actions)

agent = GPT2(gpt2_model, num_actions)
optimizer = optim.Adam(agent.fc.parameters(), lr=0.001)  # Train only the new head

# Train the agent
train_agent(agent, env, num_episodes=50)

# Evaluate the trained agent
evaluate_agent(agent, env)



In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F

# GPT Architecture (compatible with pre-trained weights)
class GPT(nn.Module):
    def __init__(self, vocab_size, block_size, n_embd, n_layer, n_head):
        super().__init__()
        self.token_embedding = nn.Embedding(vocab_size, n_embd)
        self.position_embedding = nn.Embedding(block_size, n_embd)
        self.blocks = nn.ModuleList([
            Block(n_embd, n_head) for _ in range(n_layer)
        ])
        self.ln_f = nn.LayerNorm(n_embd)
        self.head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx):
        B, T = idx.size()
        tok_emb = self.token_embedding(idx)
        pos_emb = self.position_embedding(torch.arange(T, device=idx.device))
        x = tok_emb + pos_emb
        for block in self.blocks:
            x = block(x)
        x = self.ln_f(x)
        logits = self.head(x)
        return logits

class Block(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        self.attn = CausalSelfAttention(n_embd, n_head)
        self.ff = FeedForward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.ff(self.ln2(x))
        return x

class CausalSelfAttention(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        assert n_embd % n_head == 0
        self.n_head = n_head
        self.head_dim = n_embd // n_head
        self.query = nn.Linear(n_embd, n_embd)
        self.key = nn.Linear(n_embd, n_embd)
        self.value = nn.Linear(n_embd, n_embd)
        self.proj = nn.Linear(n_embd, n_embd)
        self.register_buffer("mask", torch.tril(torch.ones(1024, 1024)))

    def forward(self, x):
        B, T, C = x.size()
        q = self.query(x).view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        k = self.key(x).view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        v = self.value(x).view(B, T, self.n_head, self.head_dim).transpose(1, 2)

        attn = (q @ k.transpose(-2, -1)) / (self.head_dim ** 0.5)
        attn = attn.masked_fill(self.mask[:T, :T] == 0, float("-inf"))
        attn = F.softmax(attn, dim=-1)

        out = attn @ v
        out = out.transpose(1, 2).contiguous().view(B, T, C)
        return self.proj(out)

class FeedForward(nn.Module):
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.GELU(),
            nn.Linear(4 * n_embd, n_embd)
        )

    def forward(self, x):
        return self.net(x)

# Load GPT-2 Pre-Trained Weights from PyTorch Hub
gpt2_model = torch.hub.load("pytorch/fairseq", "transformer_lm.gpt2.small")
gpt2_weights = gpt2_model.state_dict()

# Initialize Custom GPT Model
vocab_size = gpt2_weights["encoder.embed_tokens.weight"].shape[0]
block_size = 1024
n_embd = 768
n_layer = 12
n_head = 12

model = GPT(vocab_size, block_size, n_embd, n_layer, n_head)

# Map GPT-2 weights to custom GPT model
model.token_embedding.weight.data = gpt2_weights["encoder.embed_tokens.weight"]
model.position_embedding.weight.data = gpt2_weights["encoder.embed_positions.weight"]

# Fine-Tuning
def fine_tune(model, data, epochs=3, lr=1e-4):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        for x, y in data:
            logits = model(x)
            loss = F.cross_entropy(logits.view(-1, vocab_size), y.view(-1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

# Example data
data = [  # Dummy data: sequence input and target
    (torch.randint(0, vocab_size, (4, block_size)), torch.randint(0, vocab_size, (4, block_size)))
]

# Fine-Tune
fine_tune(model, data)




## In C


In [None]:

## quantization optional 

model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)


In [None]:

torch.hub.load('pytorch/fairseq', 'transformer_lm.gpt2.small')



In [None]:

# Save GPT-2 weights
torch.save(model.state_dict(), "gpt2_weights.pth")



In [None]:

# Example: Save specific parameters in binary format

with open("gpt2_weights.bin", "wb") as f:
    for param_tensor in model.state_dict():
        f.write(model.state_dict()[param_tensor].numpy().tobytes())



In [None]:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

// Matrix multiplication
void matmul(float* A, float* B, float* C, int M, int N, int K) {
    for (int i = 0; i < M; i++) {
        for (int j = 0; j < K; j++) {
            C[i * K + j] = 0;
            for (int k = 0; k < N; k++) {
                C[i * K + j] += A[i * N + k] * B[k * K + j];
            }
        }
    }
}

// Simplified forward pass
void forward(float* input, float* weights, float* output, int input_dim, int output_dim) {
    matmul(input, weights, output, 1, input_dim, output_dim);  // Single layer example
}

int main() {
    int input_dim = 768;    // GPT hidden size
    int output_dim = 50257; // Vocabulary size

    // Load weights
    FILE* weight_file = fopen("gpt2_weights.bin", "rb");
    float* weights = malloc(input_dim * output_dim * sizeof(float));
    fread(weights, sizeof(float), input_dim * output_dim, weight_file);
    fclose(weight_file);

    // Input vector
    float input[input_dim];
    for (int i = 0; i < input_dim; i++) input[i] = 1.0f;  // Example input

    // Output vector
    float output[output_dim];
    forward(input, weights, output, input_dim, output_dim);

    // Print top predictions
    for (int i = 0; i < 10; i++) {
        printf("Output[%d]: %f\n", i, output[i]);
    }

    free(weights);
    return 0;
}




## faster c


In [None]:

#include <stdio.h>
#include <stdlib.h>
#include <cblas.h>

// Matrix multiplication using BLAS
void matmul(float* A, float* B, float* C, int M, int N, int K) {
    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
                M, K, N, 1.0, A, N, B, K, 0.0, C, K);
}

// Simplified feedforward pass of a single layer
void feedforward(float* input, float* weights, float* output, int input_dim, int output_dim) {
    matmul(input, weights, output, 1, input_dim, output_dim);
}

// Load weights from a binary file
float* load_weights(const char* filename, int size) {
    FILE* file = fopen(filename, "rb");
    if (!file) {
        printf("Error: Unable to open file %s\\n", filename);
        exit(1);
    }
    float* weights = malloc(size * sizeof(float));
    fread(weights, sizeof(float), size, file);
    fclose(file);
    return weights;
}

// Simple softmax function
void softmax(float* logits, int size) {
    float max = logits[0];
    for (int i = 1; i < size; i++) {
        if (logits[i] > max) max = logits[i];
    }
    float sum = 0.0;
    for (int i = 0; i < size; i++) {
        logits[i] = expf(logits[i] - max);  // Prevent overflow
        sum += logits[i];
    }
    for (int i = 0; i < size; i++) {
        logits[i] /= sum;
    }
}

int main() {
    // Model dimensions
    int input_dim = 768;    // GPT hidden size
    int output_dim = 50257; // GPT vocabulary size

    // Load pre-trained weights (e.g., from PyTorch or custom binary format)
    float* weights = load_weights("gpt2_weights.bin", input_dim * output_dim);

    // Input vector (example input, normally generated from embeddings)
    float input[input_dim];
    for (int i = 0; i < input_dim; i++) input[i] = 1.0f;  // Example input

    // Output vector
    float* output = malloc(output_dim * sizeof(float));

    // Perform a forward pass
    feedforward(input, weights, output, input_dim, output_dim);

    // Apply softmax to logits
    softmax(output, output_dim);

    // Print top predictions
    for (int i = 0; i < 10; i++) {
        printf("Logit[%d]: %f\\n", i, output[i]);
    }

    // Free memory
    free(weights);
    free(output);

    return 0;
}



In [None]:

sudo apt-get install libopenblas-dev

gcc -o gpt_optimized gpt_optimized.c -lopenblas -lm

./gpt_optimized



In [None]:

import torch

# Save weights in binary format
model = torch.load("gpt2_model.pth")
weights = model["decoder"]["linear.weight"].cpu().numpy()
with open("gpt2_weights.bin", "wb") as f:
    f.write(weights.tobytes())



In [None]:

Logit[0]: 0.123456
Logit[1]: 0.098765
...

