In [1]:
import json

from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login

from after_pt.reward_model import RewardModel

login(token='<>')

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")



  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:08<00:00,  4.43s/it]


In [2]:
import sys, os
project_root = os.path.abspath('/Users/subhojit/workspace/saturn/src')
if project_root not in sys.path:
    sys.path.append(project_root)

from after_pt import *

In [3]:
device = "mps"
model = model.to(device)

In [4]:
text = """<s>[INST] Who is Kurt Godel ? [/INST]"""
encodeds = tokenizer(text, return_tensors="pt", add_special_tokens=False)

In [5]:
model_inputs = encodeds.to(device)


In [6]:
generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [7]:
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

<s>[INST] Who is Kurt Godel ? [/INST] Kurt Godel (1906-1978) was a German logician and mathematician, known for his work in axiomatic set theory, proof theory, and mathematical logic. Godel is famous for his Godel's incompleteness theorems, which show that within any axiomatic system that contains basic arithmetic, there will always be true statements that cannot be proven within that system. Godel's work also introduced new definitions and methods in mathematical logic, and helped to lay the foundation for the development of modern computability theory.</s>


In [17]:
import torch.nn as nn
import torch

class RewardModelLite1(nn.Module):
    def __init__(self, base_model, tokenizer):
        super().__init__()
        device = "mps"
        self.base_model = base_model.to(device)
        self.tokenizer = tokenizer
        # scalar reward head
        self.reward_head = nn.Linear(self.base_model.config.hidden_size, 1, device=device)

    def forward(self, input_ids, attention_mask):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        outputs = self.base_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_hidden_states=True,
            return_dict=True
        )
        last_hidden_state = outputs.hidden_states[-1]
        last_token_idx = attention_mask.sum(dim=1) - 1  # (B,)
        last_token_hidden = last_hidden_state[torch.arange(last_hidden_state.size(0)), last_token_idx]  # (B, D)
        reward = self.reward_head(last_token_hidden).squeeze(-1)  # (B,)
        return reward

model_name = "mistralai/Mistral-7B-Instruct-v0.1"

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

text = "<s>[INST] What is dropout in neural networks? [/INST] Dropout is a regularization technique to switch off neuron randomly"
inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)

reward_model = RewardModelLite1(model, tokenizer)
reward = reward_model(**inputs)  # scalar for each example
print(reward)


tensor([-2.8087], device='mps:0', grad_fn=<SqueezeBackward1>)


In [19]:
import json
with open('datas/sample.json') as f:
    data = json.load(f)

with open('datas/prepare_sample.jsonl', 'w') as f:
    for sample in data:
        f.write(json.dumps(sample) + "\n")



In [20]:
from torch.utils.data import Dataset, DataLoader


class RewardDataset(Dataset):
    def __init__(self, path, tokenizer, max_length=512):
        self.data = []
        with open(path) as f:
            for line in f:
                entry = json.loads(line)
                self.data.append(entry)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        chosen_input = self.tokenizer(
            "<s>[INST] " + item["prompt"] + " [/INST] " + item["chosen"],
            return_tensors="pt",
            padding="max_length",
            truncation=True,
            max_length=self.max_length
        )
        rejected_input = self.tokenizer(
            "<s>[INST] " + item["prompt"] + " [/INST] " + item["rejected"],
            return_tensors="pt",
            padding="max_length",
            truncation=True,
            max_length=self.max_length
        )
        return {
            "chosen_input_ids": chosen_input["input_ids"].squeeze(0),
            "chosen_attention_mask": chosen_input["attention_mask"].squeeze(0),
            "rejected_input_ids": rejected_input["input_ids"].squeeze(0),
            "rejected_attention_mask": rejected_input["attention_mask"].squeeze(0)
        }

In [21]:
def pairwise_loss(chosen_reward, rejected_reward):
    return -torch.log(torch.sigmoid(chosen_reward - rejected_reward)).mean()

In [24]:
from torch.utils.data import DataLoader
dataset = RewardDataset('datas/prepare_sample.jsonl', tokenizer)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)


In [25]:
#training
from tqdm import tqdm

reward_model = RewardModelLite1(model, tokenizer).to(device)
optimizer = torch.optim.AdamW(reward_model.parameters(), lr=0.001)

epochs = 3

for epoch in range(epochs):
    total_loss = 0.0
    for batch in tqdm(dataloader):
        chosen_ids = batch["chosen_input_ids"].to(device)
        chosen_mask = batch["chosen_attention_mask"].to(device)
        rejected_ids = batch["rejected_input_ids"].to(device)
        rejected_mask = batch["rejected_attention_mask"].to(device)

        chosen_reward = reward_model(chosen_ids, chosen_mask)
        rejected_reward = reward_model(rejected_ids, rejected_mask)
        loss = pairwise_loss(chosen_reward, rejected_reward)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()
    print(f"Epoch {epoch + 1} | Average Loss: {total_loss / len(dataloader):.4f}")


  0%|          | 0/5 [02:33<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 61.12 GB, other allocations: 4.08 MB, max allowed: 61.20 GB). Tried to allocate 224.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [30]:
import torch.nn.functional as F
T = 8
wei = torch.randn(T, T)
tril = torch.tril(torch.ones(T, T))
# wei = torch.zeros((T, T))
wei = wei.masked_fill(tril == 0, float('-inf'))
wei = F.softmax(wei, dim=-1)
wei

tensor([[1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.9127, 0.0873, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.4587, 0.2548, 0.2864, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.3671, 0.2052, 0.0691, 0.3586, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.1384, 0.2307, 0.1311, 0.2231, 0.2767, 0.0000, 0.0000, 0.0000],
        [0.0618, 0.2446, 0.1010, 0.5094, 0.0152, 0.0680, 0.0000, 0.0000],
        [0.1356, 0.0704, 0.2990, 0.0679, 0.0795, 0.1209, 0.2267, 0.0000],
        [0.1211, 0.0943, 0.0239, 0.4070, 0.0218, 0.2151, 0.0446, 0.0721]])