In [None]:
!git clone https://github.com/lumpenspace/pngr.git

In [None]:
# Clone and set up the repository
%cd pngr

# Install poetry and dependencies
!curl -sSL https://install.python-poetry.org | python3 -
!poetry install

In [None]:
import os
import torch
from pathlib import Path
from transformers import AutoModelForCausalLM, AutoTokenizer
from pngr import create_dataset
from pngr.ControllableModel import ControllableModel
from pngr.ControlVector import ControlVector
from rich.console import Console
from huggingface_hub import login

# Initialize console and login
console = Console()
HF_TOKEN = "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"
login(HF_TOKEN)

# Create cache directories
cache_dir = Path("/workspace/.cache/huggingface")
cache_dir.mkdir(parents=True, exist_ok=True)
os.environ['HF_HOME'] = str(cache_dir)  # Set environment variable for HF cache

# Set model name and device
model_name = "meta-llama/Llama-3.2-3B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
console.print(f"Using device: {device}")

# Memory check before loading
if torch.cuda.is_available():
    console.print(f"CUDA Memory before loading: {torch.cuda.memory_allocated()/1e9:.2f}GB")

# Load model with CUDA settings
console.print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=HF_TOKEN,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    cache_dir=str(cache_dir),
    local_files_only=False
)

if device == "cuda":
    model = model.cuda()
    console.print("Model moved to GPU")
    console.print(f"CUDA Memory after loading: {torch.cuda.memory_allocated()/1e9:.2f}GB")

# Load tokenizer
console.print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    token=HF_TOKEN,
    padding_side="left",
    cache_dir=str(cache_dir)
)
tokenizer.pad_token = tokenizer.eos_token

# Create controllable model
console.print("Creating controllable model...")
controllable_model = ControllableModel(model, layer_ids=[-1, -2, -3])

# Create dataset
console.print("Creating dataset...")
template_path = Path("./dataset_templates/alphapenger.yaml")
if not template_path.exists():
    raise FileNotFoundError(f"Template file not found at {template_path}")

prompts = create_dataset.create_personality_prompts(
    str(template_path),
    a_adjective="Terence McKenna Incarnate, Psychedelic Rennaissance, Beautiful Prose",
    b_adjective="Robotic Analysis, Pure Rationalist Hedging, Zero Passion"
)
create_dataset.save_prompts(prompts, "vector_dataset.jsonl")
console.print(f"Created dataset with {len(prompts)} prompts")

# Train vector
console.print("Training control vector...")
try:
    control_vector = ControlVector.train(
        model=controllable_model,
        tokenizer=tokenizer,
        dataset=prompts,
        max_batch_size=4 if device == "cpu" else 32
    )
    
    # Save vector
    vector_path = Path("/workspace/vectors")
    vector_path.mkdir(exist_ok=True)
    control_vector.to_file(str(vector_path / "robot_mckenna_vector.pkl"))
    console.print("[green]Control vector trained and saved![/green]")

except Exception as e:
    console.print(f"[red]Error during training: {str(e)}[/red]")
    raise

# Test vector generation
console.print("\nTesting vector generation...")
test_prompt = "Once upon a time"
inputs = tokenizer(test_prompt, return_tensors="pt").to(device)

try:
    # Generate with positive control (McKenna)
    console.print("Generating McKenna style...")
    controllable_model.set_control(control_vector, coeff=1.0)
    with torch.inference_mode():
        good_output = tokenizer.decode(
            controllable_model.generate(
                **inputs,
                max_new_tokens=50,
                pad_token_id=tokenizer.eos_token_id
            )[0],
            skip_special_tokens=True
        )

    # Generate with negative control (Robot)
    console.print("Generating Robot style...")
    controllable_model.set_control(control_vector, coeff=-1.0)
    with torch.inference_mode():
        evil_output = tokenizer.decode(
            controllable_model.generate(
                **inputs,
                max_new_tokens=50,
                pad_token_id=tokenizer.eos_token_id
            )[0],
            skip_special_tokens=True
        )

    # Print results
    console.print("\n[bold]Test outputs:[/bold]")
    console.print("[blue]McKenna:[/blue]", good_output)
    console.print("[red]Robot:[/red]", evil_output)

except Exception as e:
    console.print(f"[red]Error during generation: {str(e)}[/red]")
    raise

finally:
    # Clean up
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        console.print("\nCUDA memory cleared")