### Step 1: Install necesscary packages

In [2]:
!pip install matplotlib
!pip install torch numpy transformers datasets tiktoken wandb tqdm

Collecting matplotlib
  Downloading matplotlib-3.7.5-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.7 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (102 kB)
Collecting kiwisolver>=1.0.1 (from matplotlib)
  Downloading kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (6.3 kB)
Collecting numpy<2,>=1.20 (from matplotlib)
  Downloading numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting pillow>=6.2.0 (from matplotlib)
  Downloading pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotli

### Step 2: Package imports and configuration

In [1]:
import sys
import os
sys.path.append(os.path.abspath("..")) 
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import pickle
from model import GPT, GPTConfig
import random
from tqdm import tqdm
import time
import json
import matplotlib.pyplot as plt
# Configuration
beta = 0.5
device = 'cuda' if torch.cuda.is_available() else 'cpu'
base_lr = 1e-4
epochs = 5
batch_size = 64
max_length =64
num_samples = 1
max_new_tokens = 200
temperature = 0.8
top_k = 200
# tokenizer
with open("../sft/meta.pkl", "rb") as f:
    meta = pickle.load(f)
stoi, itos = meta["stoi"], meta["itos"]
def encode(s): return [stoi[c] for c in s]
def decode(l): return ''.join([itos[i] for i in l])

print(device)

cuda


### Step 3: Define helper functions

In [2]:
def compute_logprob(input_ids):
    inputs = input_ids[:, :-1]
    targets = input_ids[:, 1:]
    logits, _ = gpt(inputs, full_seq=True)
    B, T, V = logits.size()
    logits_flat = logits.reshape(-1, V)
    targets_flat = targets.reshape(-1)
    loss = F.cross_entropy(logits_flat, targets_flat, ignore_index=0, reduction='none')
    loss = loss.reshape(B, T)
    attention_mask = (targets != 0).float()
    loss = (loss * attention_mask).sum(dim=1) / attention_mask.sum(dim=1)
    return -loss 

def pad_or_truncate(seq, max_length):
    return seq[-max_length:] if len(seq) > max_length else seq + [0] * (max_length - len(seq))

def get_batches(lines, batch_size):
    random.shuffle(lines)
    #for l in lines:
    #    print(l[1])
    for i in range(0, len(lines), batch_size):
        batch = lines[i:i+batch_size]
        if len(batch) < batch_size:
            continue
        neg_inputs = [pad_or_truncate(encode(p['negative'] + '\n\n\n\n'), max_length) for p in batch]
        pos_inputs = [pad_or_truncate(encode(p['positive'] + '\n\n\n\n'), max_length) for p in batch]
        neg_tensor = torch.tensor(neg_inputs, dtype=torch.long, device=device)
        pos_tensor = torch.tensor(pos_inputs, dtype=torch.long, device=device)
        yield neg_tensor, pos_tensor

### Step 4: Load the pretrained NanoGPT model

In [3]:
ckpt = torch.load("../sft/gpt.pt", map_location=device)
gptconf = GPTConfig(**ckpt['model_args'])
gpt = GPT(gptconf)
state_dict = ckpt['model']
unwanted_prefix = '_orig_mod.'
for k in list(state_dict.keys()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
gpt.to(device)

  ckpt = torch.load("../sft/gpt.pt", map_location=device)


GPT(
  (transformer): ModuleDict(
    (wte): Embedding(74, 348)
    (wpe): Embedding(256, 348)
    (drop): Dropout(p=0.2, inplace=False)
    (h): ModuleList(
      (0-5): 6 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=348, out_features=1044, bias=False)
          (c_proj): Linear(in_features=348, out_features=348, bias=False)
          (attn_dropout): Dropout(p=0.2, inplace=False)
          (resid_dropout): Dropout(p=0.2, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=348, out_features=1392, bias=False)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=1392, out_features=348, bias=False)
          (dropout): Dropout(p=0.2, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=348, out_features=74, bias=False)
)

### Task 1: Generate dataset

In [4]:
import random

N = 100_000
questions_file = "qns.txt"
pos_qa_file = "pos_qa.txt"

def make_direct():
    op = random.choice(["+", "-", "*", "/"])
    a, b = random.randint(1, 100), random.randint(1, 100)
        
    if op == "+":
        ans = a + b
        explanation = f"{a}+{b} equals {ans}"
    elif op == "-":
        ans = a - b
        explanation = f"{a}-{b} equals {ans}"
    elif op == "*":
        ans = a * b
        explanation = f"{a}*{b} equals {ans}"
    else:
        a = a * b
        ans = a // b
        explanation = f"{a}/{b} equals {ans}"

    q_str = f"{a}{op}{b}=?"
    return q_str, f"{q_str} The answer is {ans} because {explanation}."

def make_solve_x():
    op = random.choice(["+", "-", "*", "/"])

    ans = random.randint(1, 100)
    b = random.randint(1, 100)
    
    if op == "+":
        rhs = ans + b
        if random.randint(0,1) == 0:
            q_str = f"x+{b}={rhs}, x=?"
        else:
            q_str = f"{b}+x={rhs}, x=?"
        explanation = f"{rhs}-{b} equals to {ans}"

    elif op == "-":
        if random.randint(0,1) == 0:
            rhs = ans - b
            q_str = f"x-{b}={rhs}, x=?"
            explanation = f"{rhs}+{b} equals to {ans}"
        else:
            rhs = b - ans
            q_str = f"{b}-x={rhs}, x=?"
            if rhs > 0:
                explanation = f"{b}-{rhs} equals to {ans}"
            else:
                explanation = f"{b}+{-rhs} equals to {ans}"
        
    elif op == "*":
        rhs = ans * b
        if random.randint(0,1) == 0:
            q_str = f"x*{b}={rhs}, x=?"
        else:
            q_str = f"{b}*x={rhs}, x=?"
        explanation = f"{rhs}/{b} equals to {ans}"
    else:
        rhs = ans 
        if random.randint(0,1) == 0:
            ans = ans * b
            rhs = ans // b
            q_str = f"x/{b}={rhs}, x=?"
            explanation = f"{rhs}*{b} equals to {ans}"
        else:
            b = ans * b
            rhs = b // ans
            q_str = f"{b}/x={rhs}, x=?"
            explanation = f"{b}/{rhs} equals to {ans}"
    return q_str, f"{q_str} The answer is {ans} because {explanation}."


generators = [make_direct, make_solve_x]

with open(questions_file, "w") as fq, open(pos_qa_file, "w") as pqa:
    for _ in range(N):
        q, qa = random.choice(generators)()
        fq.write(q + "\n")
        pqa.write(qa + "\n")

print("Generated 100k questions in qns.txt and postive examples in pos_qa.txt")


Generated 100k questions in qns.txt and postive examples in pos_qa.txt


In [None]:
questions_file = "qns.txt"
neg_qa_file = "neg_qa.txt"

gpt.eval()
with open(questions_file, "r") as fq, open(neg_qa_file, "w") as nqa:
    for qns in fq:
        qns_ids = encode(qns.strip())

        x = (torch.tensor(qns_ids, dtype=torch.long, device=device)[None, ...])
        y = gpt.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
        nqa.write(decode(y[0][0].tolist()) + "\n")
    
print("Generated 100k negative Q+A lines in neg_qa.txt")

In [None]:
import json
json_file = "pos_neg_pairs.json"
pos_qa_file = "pos_qa.txt"
neg_qa_file = "neg_qa.txt"

with open(pos_qa_file, "r") as pqa:
    pos = [line.strip() for line in pqa]

with open(neg_qa_file, "r") as nqa:
    neg = [line.strip() for line in nqa]

data = [{"negative": n, "positive": p} for n, p in zip(neg, pos)]

with open(json_file, "w") as f:
    json.dump(data, f, indent=4)

print(f"Saved {len(data)} QA pairs to {json_file}")

Saved 100000 QA pairs to pos_neg_pairs.json


### Step 5: Load Data (**students are required to complete this part!**)

In [7]:
# Load data from ./data/pos_neg_pairs.json

### Step 6: Build the optimizer and scheduler (**students are required to complete this part!**)

In [8]:
# recommend to use the AdamW optimizer 

### Step 7: Begin training (**students are required to complete this part!**)

In [None]:
gpt.train()
total_steps = len(lines) // batch_size
for epoch in range(epochs):
    pbar = tqdm(get_batches(lines, batch_size))
    for step, (neg_tensor,pos_tensor) in enumerate(pbar):
        ###########################################################
        # Please complete the training code here!
        # Examples: 
        # ...
        # neg_logprob
        # pos_logprob 
        # loss = -F.logsigmoid((pos_logprob - neg_logprob) / beta).mean() - pos_logprob.mean() * 0.1 
        # ...
        ###########################################################
    ckpt_path = f"./dpo.pt"
    torch.save({
        "model_state_dict": gpt.state_dict(),
        "model_args": ckpt['model_args'],
    }, ckpt_path)
    print(f"Saved checkpoint to {ckpt_path}")

### Step 8: Begin testing (**students are required to complete this part!**)

In [None]:
# Load the fine-tuned model
ckpt_path = "../dpo/dpo.pt"
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint['model_args'])
gpt = GPT(gptconf).cuda()
try:
    state_dict = checkpoint['model']
except:
    state_dict = checkpoint['model_state_dict']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
# Test
gpt.eval()
test_set = ["17+19=?", "3*17=?", "72/4=?", "72-x=34,x=?", "x*11=44,x=?", "3*17=?", "72/4=?", "72-x=34,x=?"]
with torch.no_grad():
    for prompt in test_set: 
        prompt_ids = encode(prompt)
        x = (torch.tensor(prompt_ids, dtype=torch.long, device=device)[None, ...])
        y = gpt.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
        print('------------------------')
        print(decode(y[0].tolist()))
        print('------------------------')
 

FileNotFoundError: [Errno 2] No such file or directory: '../dpo/dpo.pt'