In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn.functional as F

In [35]:
print("PyTorch Version:", torch.__version__)
print("MPS backend available:", torch.backends.mps.is_available())
print("MPS backend built:", torch.backends.mps.is_built())

PyTorch Version: 2.2.2
MPS backend available: True
MPS backend built: True


In [52]:
# Load models/tokenizers
def load_model_and_tokenizer(model_name, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    model.to(device)  # Move model to specified device
    return tokenizer, model


In [53]:
# Initialize models

# expert_model_name = "gpt2-large"
# expert_model_name = "meta-llama/Llama-2-7b-hf"
# amateur_model_name = "gpt2-medium"

#expert_model_name = "meta-llama/Llama-2-7b-hf"
amateur_model_name = "gpt2-medium"

expert_model_name = "meta-llama/Llama-2-7b-hf"
expert_tokenizer = AutoTokenizer.from_pretrained(expert_model_name, use_fast=False)
expert_model = AutoModelForCausalLM.from_pretrained(
    expert_model_name, 
    torch_dtype=torch.float32  # Use float32 on CPU
).to("cpu") 

expert_tokenizer, expert_model = load_model_and_tokenizer(expert_model_name)
amateur_tokenizer, amateur_model = load_model_and_tokenizer(amateur_model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

TypeError: load_model_and_tokenizer() missing 1 required positional argument: 'device'

In [51]:
# Tokenize input for both models
expert_input_ids = expert_tokenizer.encode(prompt, return_tensors="pt")
amateur_input_ids = amateur_tokenizer.encode(prompt, return_tensors="pt")

# Get logits for both models
expert_logits = expert_model(expert_input_ids).logits
amateur_logits = amateur_model(amateur_input_ids).logits

# Resize amateur logits to match expert logits (not ideal, but an approximation)
amateur_logits_resized = torch.nn.functional.interpolate(
    amateur_logits.unsqueeze(0), size=expert_logits.shape[-1], mode="nearest"
).squeeze(0)

# Proceed with adjusted logits
expert_probs = F.softmax(expert_logits[:, -1, :], dim=-1)
amateur_probs = F.softmax(amateur_logits_resized[:, -1, :], dim=-1)

RuntimeError: MPS backend out of memory (MPS allocated: 18.06 GB, other allocations: 2.73 MB, max allowed: 18.13 GB). Tried to allocate 86.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [45]:
# Contrastive decoding function
def contrastive_decoding(prompt, expert_model, amateur_model, expert_tokenizer, amateur_tokenizer, alpha=0.7, max_length=100):
    """
    Perform contrastive decoding given expert/amateur models.

    Args:
        prompt: Input prompt
        expert_model: Pretrained expert LLM.
        amateur_model: Pretrained amateur LLM.
        tokenizer: Tokenizer for encoding/decoding.
        alpha: Amateur penalty
        max_length: Maximum length of generated tokens.

    Returns:
        str: Text generated using contrastive decoding.
    """

    device = torch.device("mps") 
    
    # Encode prompt
    expert_input_ids = expert_tokenizer.encode(prompt, return_tensors="pt").to("mps") 
    amateur_input_ids = amateur_tokenizer.encode(prompt, return_tensors="pt").to("mps")
    
    current_ids = expert_input_ids.clone()


    # Generate tokens
    generated_tokens = []
    for _ in range(max_length):
        # Get logits from both models
        with torch.no_grad():
            expert_logits = expert_model(current_ids).logits[:, -1, :]  # Expert model logits
            amateur_logits = amateur_model(current_ids).logits[:, -1, :]  # Amateur model logits

        # Convert logits to probabilities
        expert_probs = F.softmax(expert_logits, dim=-1)
        amateur_probs = F.softmax(amateur_logits, dim=-1)

        # Adjust expert probabilities using amateur probabilities
        contrastive_probs = expert_probs - alpha * amateur_probs
        contrastive_probs = F.softmax(contrastive_probs, dim=-1)  # Normalize again

        # Sample next token
        next_token = torch.argmax(contrastive_probs, dim=-1)

        # Append token and prepare for next step
        generated_tokens.append(next_token.item())
        current_ids = torch.cat([current_ids, next_token.unsqueeze(0)], dim=1)

        # Update amateur input_ids to keep inputs aligned
        amateur_input_ids = torch.cat([amateur_input_ids, next_token.unsqueeze(0)], dim=1)

        # Stop if generated token is the end-of-sequence token
        if next_token.item() == expert_tokenizer.eos_token_id:
            break

    # Decode the generated tokens
    output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    return output_text

#plausability constraint
#generic code base --> contrastive decoding
    # llama as expert
    #huggingface alignment pipeline

In [46]:
# Test function
prompt = "The first step to becoming good at coding is"
output_text = contrastive_decoding(
    prompt,
    expert_model,
    amateur_model,
    expert_tokenizer,
    amateur_tokenizer,
    alpha=0.7,
    max_length=100
)
print(output_text)

RuntimeError: The size of tensor a (32000) must match the size of tensor b (50257) at non-singleton dimension 1