### Step 1: Install necesscary packages

In [34]:
!pip install matplotlib
!pip install torch numpy transformers datasets tiktoken wandb tqdm



### Step 2: Package imports and configuration

In [48]:
import sys
import os
sys.path.append(os.path.abspath("..")) 
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import pickle
from model import GPT, GPTConfig
import random
from tqdm import tqdm
import time
import json
import matplotlib.pyplot as plt
# Configuration
beta = 0.5
device = 'cuda' if torch.cuda.is_available() else 'cpu'
base_lr = 1e-4
epochs = 5
batch_size = 64
max_length =64
num_samples = 1
max_new_tokens = 200
temperature = 0.8
top_k = 5
# tokenizer
with open("../sft/meta.pkl", "rb") as f:
    meta = pickle.load(f)
stoi, itos = meta["stoi"], meta["itos"]
def encode(s): return [stoi[c] for c in s]
def decode(l): return ''.join([itos[i] for i in l])

### Step 3: Define helper functions

In [36]:
def compute_logprob(input_ids):
    inputs = input_ids[:, :-1]
    targets = input_ids[:, 1:]
    logits, _ = gpt(inputs, full_seq=True)
    B, T, V = logits.size()
    logits_flat = logits.reshape(-1, V)
    targets_flat = targets.reshape(-1)
    loss = F.cross_entropy(logits_flat, targets_flat, ignore_index=0, reduction='none')
    loss = loss.reshape(B, T)
    attention_mask = (targets != 0).float()
    loss = (loss * attention_mask).sum(dim=1) / attention_mask.sum(dim=1)
    return -loss 

def pad_or_truncate(seq, max_length):
    return seq[-max_length:] if len(seq) > max_length else seq + [0] * (max_length - len(seq))

def get_batches(lines, batch_size):
    random.shuffle(lines)
    #for l in lines:
    #    print(l[1])
    for i in range(0, len(lines), batch_size):
        batch = lines[i:i+batch_size]
        if len(batch) < batch_size:
            continue
        neg_inputs = [pad_or_truncate(encode(p['negative'] + '\n\n\n\n'), max_length) for p in batch]
        pos_inputs = [pad_or_truncate(encode(p['positive'] + '\n\n\n\n'), max_length) for p in batch]
        neg_tensor = torch.tensor(neg_inputs, dtype=torch.long, device=device)
        pos_tensor = torch.tensor(pos_inputs, dtype=torch.long, device=device)
        yield neg_tensor, pos_tensor

### Step 4: Load the pretrained NanoGPT model

In [37]:
ckpt = torch.load("../sft/gpt.pt", map_location=device)
gptconf = GPTConfig(**ckpt['model_args'])
gpt = GPT(gptconf)
state_dict = ckpt['model']
unwanted_prefix = '_orig_mod.'
for k in list(state_dict.keys()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
gpt.to(device).train()

  ckpt = torch.load("../sft/gpt.pt", map_location=device)


GPT(
  (transformer): ModuleDict(
    (wte): Embedding(74, 348)
    (wpe): Embedding(256, 348)
    (drop): Dropout(p=0.2, inplace=False)
    (h): ModuleList(
      (0-5): 6 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=348, out_features=1044, bias=False)
          (c_proj): Linear(in_features=348, out_features=348, bias=False)
          (attn_dropout): Dropout(p=0.2, inplace=False)
          (resid_dropout): Dropout(p=0.2, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=348, out_features=1392, bias=False)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=1392, out_features=348, bias=False)
          (dropout): Dropout(p=0.2, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=348, out_features=74, bias=False)
)

### Step 5: Load Data (**students are required to complete this part!**)

In [54]:
"""import os
import json
import random

def generate_example():
    # Sizin orijinal kodunuz temel alındı.
    a = random.randint(2, 100)
    b = random.randint(2, 100)
    op = random.choice(["+", "-", "*", "/"])

    # Doğru sonucu hesapla
    if op == "+":
        res = a + b
    elif op == "-":
        res = a - b
    elif op == "*":
        # 2. DÜZENLEME: Çarpım sonuçlarının çok büyük olmasını engellemek için.
        # Sayıları daha küçük bir aralıkla yeniden seçiyoruz ki sonuçlar yönetilebilir olsun.
        a = random.randint(2, 35)
        b = random.randint(2, 35)
        res = a * b
    else: # op == '/'
        # Orijinal ondalıklı bölme mantığınız korunuyor.
        if b == 0:
            b = 1
        res = round(a / b, 2)

    chose_value=random.random()
    # Orijinal basit cebir ve aritmetik ayrımınız korunuyor.
    if  chose_value < 0.33:
        question = f"x{op}{b}={res}, x=?"
        
        if op == "+":
            reasoning = f"{res}-{b}"
            ans = res - b
        elif op == "-":
            reasoning = f"{res}+{b}"
            ans = res + b
        elif op == "*":
            reasoning = f"{res}/{b}"
            ans = round(res / b, 2)
        else:
            reasoning = f"{res}*{b}"
            ans = round(res * b, 2)

        pos = f"{question} The answer is {ans} because {reasoning} equals {ans}."
        neg = f"{question} Sorry, I don't know!"
    elif chose_value > 0.33 and chose_value < 0.66:

        question = f"{b}{op}x={res}, x=?"
        
        if op == "+":
            reasoning = f"{res}-{b}"
            ans = res - b
        elif op == "-":
            reasoning = f"{b}-{res}"
            ans = b - res
        elif op == "*":
            reasoning = f"{res}/{b}"
            ans = round(res / b, 2)
        else:
            reasoning = f"{b}/{res}"
            ans = round(b/res, 2)

        pos = f"{question} The answer is {ans} because {reasoning} equals {ans}."
        neg = f"{question} Sorry, I don't know!"

        
    else:
        question = f"{a}{op}{b}=?"
        pos = f"{question} The answer is {res} because {a}{op}{b} equals {res}."
        neg = f"{question} Sorry, I don't know!"

    return {"negative": neg, "positive": pos}

# --- Kodun geri kalanı aynı ---
examples = [generate_example() for _ in range(100000)]

with open("pos_neg_pairs.json", "w", encoding="utf-8") as f:
    json.dump(examples, f, indent=2, ensure_ascii=False)

print("pos_neg_pairs.json created with", len(examples))"""
import os
import json
import random

def generate_example():
    # Sadece tam sayılarla çalışacak temiz bir yapı kuruyoruz.
    op = random.choice(["+", "-", "*", "/"])
    
    # 2. Operatöre göre a, b ve res'i hatasız bir şekilde tanımlıyoruz
    if op == "+":
        # a + b = res
        a = random.randint(2, 100)
        b = random.randint(2, 100)
        res = a + b
    elif op == "-":
        # a - b = res
        a = random.randint(50, 150) # Sonucun negatif çıkabilmesi için a'yı b'den küçük seçebiliriz
        b = random.randint(2, 100)
        res = a - b
    elif op == "*":
        # a * b = res
        # Modelin zorlanmaması için sayıları küçük tutuyoruz
        a = random.randint(2, 35)
        b = random.randint(2, 35)
        if b == 0: b = 1
        res = a * b
    else: # op == "/"
        # a / b = res (Tam sayı bölmesi)
        # Önce cevabı (res) ve böleni (b) seçip, sonra bölüneni (a) hesaplıyoruz.
        # Bu, sonucun her zaman tam sayı olmasını garantiler.
        b = random.randint(2, 50) 
        if b == 0: b = 1
        res = random.randint(2, 50)
        a = res * b # a, b'ye tam bölünecek şekilde ayarlandı

    # Problem tipini seç (cebir veya aritmetik)
    chose_value = random.random()
    
    # --- CEBİRSEL DENKLEMLER ---
    if chose_value < 0.66:
        format_type = random.randint(1, 2)
        
        if format_type == 1: # Format: x op b = res
            question = f"x{op}{b}={res}, x=?"
            if op == '+': reasoning = f"{res}-{b}"
            if op == '-': reasoning = f"{res}+{b}"
            if op == '*': reasoning = f"{int(res/b)}" if b != 0 else "0"
            if op == '/': reasoning = f"{res}*{b}"
            ans = a # Bu formatta doğru cevap 'a'
            
        elif format_type == 2: # Format: a op x = res
            question = f"{a}{op}x={res}, x=?"
            if op == '+': reasoning = f"{res}-{a}"
            if op == '-': reasoning = f"{a}-{res}"
            if op == '*': reasoning = f"{int(res/a)}" if a != 0 else "0"
            if op == '/': reasoning = f"{int(a/res)}" if res != 0 else "0"
            ans = b # Bu formatta doğru cevap 'b'
        
        pos = f"{question} The answer is {int(ans)} because {reasoning} equals {int(ans)}."
        neg = f"{question} Sorry, I don't know!"

    # --- BASİT ARİTMETİK ---
    else:
        question = f"{a}{op}{b}=?"
        # Aritmetik için doğru cevap 'res'dir.
        pos = f"{question} The answer is {int(res)} because {a}{op}{b} equals {int(res)}."
        neg = f"{question} Sorry, I don't know!"

    return {"negative": neg, "positive": pos}

# --- Kodun geri kalanı aynı ---
# Temizlenmiş ve basitleştirilmiş veriyi oluştur
examples = [generate_example() for _ in range(20000)]

with open("pos_neg_pairs.json", "w", encoding="utf-8") as f:
    json.dump(examples, f, indent=2, ensure_ascii=False)

print("pos_neg_pairs.json created with", len(examples))

pos_neg_pairs.json created with 20000


In [55]:
with open("pos_neg_pairs.json", "r") as f:
   lines = json.load(f)

print("Total samples:", len(lines))
print("Example pair:\n", lines[0])

# Clean dataset: remove unseen characters like '!'
for p in lines:
    p["positive"] = p["positive"].replace("!", "")
    p["negative"] = p["negative"].replace("!", "")



Total samples: 20000
Example pair:
 {'negative': "22+56=? Sorry, I don't know!", 'positive': '22+56=? The answer is 78 because 22+56 equals 78.'}


In [40]:
# Load data from ./data/pos_neg_pairs.json

### Step 6: Build the optimizer and scheduler (**students are required to complete this part!**)

In [56]:

optimizer = torch.optim.AdamW(gpt.parameters(),lr=base_lr,betas=(0.9, 0.95),weight_decay=1e-4,)


scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=epochs)


In [57]:
# recommend to use the AdamW optimizer 

### Step 7: Begin training (**students are required to complete this part!**)

In [58]:
import torch.nn.functional as F

total_steps = len(lines) // batch_size
for epoch in range(epochs):
    pbar = tqdm(get_batches(lines, batch_size))
    for step, (neg_tensor, pos_tensor) in enumerate(pbar):
        optimizer.zero_grad(set_to_none=True)

        # forward pass
        pos_logprob = compute_logprob(pos_tensor)
        neg_logprob = compute_logprob(neg_tensor)

        # correct DPO loss (from the assignment paper)
        loss = -F.logsigmoid((pos_logprob - neg_logprob) / beta).mean() - 0.1 * pos_logprob.mean()

        # backward
        loss.backward()
        #torch.nn.utils.clip_grad_norm_(gpt.parameters(), max_norm=1.0) # Gradient clipping
        optimizer.step()
        pbar.set_description(f"Epoch {epoch+1} | Loss {loss.item():.4f}")

    scheduler.step()

    ckpt_path = f"./dpo.pt"
    torch.save({
        "model_state_dict": gpt.state_dict(),
        "model_args": ckpt['model_args'],
    }, ckpt_path)
    print(f"Saved checkpoint to {ckpt_path}")


Epoch 1 | Loss 0.0279: : 312it [00:35,  8.70it/s]


Saved checkpoint to ./dpo.pt


Epoch 2 | Loss 0.0252: : 312it [00:35,  8.69it/s]


Saved checkpoint to ./dpo.pt


Epoch 3 | Loss 0.0240: : 312it [00:36,  8.63it/s]


Saved checkpoint to ./dpo.pt


Epoch 4 | Loss 0.0229: : 312it [00:36,  8.59it/s]


Saved checkpoint to ./dpo.pt


Epoch 5 | Loss 0.0224: : 312it [00:36,  8.55it/s]

Saved checkpoint to ./dpo.pt





In [59]:
import torch
print("CUDA version:", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected")



CUDA version: 12.1
CUDA available: True
GPU name: NVIDIA GeForce RTX 3060 Laptop GPU


### Step 8: Begin testing (**students are required to complete this part!**)

In [60]:
# Load the fine-tuned model
ckpt_path = "../dpo/dpo.pt"
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint['model_args'])
gpt = GPT(gptconf).cuda()
try:
    state_dict = checkpoint['model']
except:
    state_dict = checkpoint['model_state_dict']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
# Test
gpt.eval()
test_set = ["17+19=?", "3*17=?", "72/4=?", "72-x=34,x=?", "x*11=44,x=?", "3*17=?", "72/4=?", "72-x=34,x=?"]
with torch.no_grad():
    for prompt in test_set:
        prompt_ids = encode(prompt)
        
        ################################################
        # Please complete the test code here!
        #
        # gpt.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
        #
        ################################################

        x = torch.tensor(prompt_ids, dtype=torch.long, device=device)[None, ...]
        
        # --- DÜZELTİLMİŞ SATIR ---
        # Sabit değerler (200, 0.8, 200) yerine değişkenler kullanıldı.
        
        
        y = gpt.generate(x, max_new_tokens=max_new_tokens, temperature=temperature, top_k=top_k)
        # -------------------------
        
        output_text = decode(y[0].flatten().tolist())
        
        print(f"Q: {prompt}\nA: {output_text}\n{'--'*50}")

  checkpoint = torch.load(ckpt_path, map_location=device)


Q: 17+19=?
A: 17+19=? The answer is 38 because 17+19 equals 38.
----------------------------------------------------------------------------------------------------
Q: 3*17=?
A: 3*17=? The answer is 41 because 3*17 equals 41.
----------------------------------------------------------------------------------------------------
Q: 72/4=?
A: 72/4=? The answer is 16 because 72/4 equals 16.
----------------------------------------------------------------------------------------------------
Q: 72-x=34,x=?
A: 72-x=34,x=? The answer is 58 because 72-34 equals 58.
----------------------------------------------------------------------------------------------------
Q: x*11=44,x=?
A: x*11=44,x=? The answer is 4 because 4 equals 4.
----------------------------------------------------------------------------------------------------
Q: 3*17=?
A: 3*17=? The answer is 41 because 3*17 equals 41.
----------------------------------------------------------------------------------------------------
Q: 72/4=?