In [1]:
import sys
sys.path.append('/workspace/transformers')
print(sys.path)

['/workspace/representation-engineering/examples/honesty', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/workspace/transformers/src', '/workspace/representation-engineering', '/workspace/repeng', '/usr/lib/python3/dist-packages', '/workspace/transformers']


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from transformers import AutoTokenizer, AutoConfig, pipeline, AutoModelForCausalLM
from transformers.generation.streamers import TextStreamer
import torch
import numpy as np
from tqdm import tqdm
from datasets import load_dataset
from functools import partial

from repe.rep_control_contrast_vec import ContrastVecLlamaForCausalLM, ContrastVecMistralForCausalLM

In [18]:
from typing import *
from queue import Queue

class Streamer(TextStreamer):
    def __init__(
        self, tokenizer: "AutoTokenizer", total=128, skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
    ):
        super().__init__(tokenizer, skip_prompt, **decode_kwargs)
        self.text_queue = Queue()
        self.stop_signal = None
        self.timeout = timeout
        self.repe = None
        self.c = 0
        self.total = 128

    def on_finalized_text(self, text: str, stream_end: bool = False, corrs=None):
        """Put the new text in the queue. If the stream is ending, also put a stop signal in the queue."""
        self.text_queue.put((text,corrs), timeout=self.timeout)
        if stream_end:
            self.text_queue.put((self.stop_signal,None), timeout=self.timeout)
        new_repe = {
            "dynamic_compute": True,
            "control_layers": list(range(10, 20)),
            "repe_vecs": get_random_repe_vecs(self.c, self.total),
            "compute_corr": get_corr(),
            "pos_inputs": ,
            "neg_inputs":
        }
        self.set_repe(new_repe)
        super().on_finalized_text(text)
        self.c += 1
        if corrs is not None:
            #print(corrs.cpu())
            pass
        
    def set_repe(self, repe):
        self.repe = repe
    
    def get_repe(self):
        return self.repe

    def __iter__(self):
        return self

    # returned is gonna be Tuple (tok, corr)
    def __next__(self):
        value = self.text_queue.get(timeout=self.timeout)
        if value == self.stop_signal:
            raise StopIteration()
        else:
            return value


In [5]:
from transformers import LlamaForCausalLM

name = "meta-llama/Meta-Llama-3-8b-Instruct"
model = LlamaForCausalLM.from_pretrained(
    name, 
    torch_dtype=torch.bfloat16,
    device_map='cuda'
)

tokenizer = AutoTokenizer.from_pretrained(name)

tokenizer.padding_side = 'left'
tokenizer.pad_token_id = 0 if tokenizer.pad_token_id is None else tokenizer.pad_token_id
tokenizer.pad_token = tokenizer.eos_token

model.eval()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head)

In [6]:
temple = '''<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a conversational assistant. Egange in long engaging conversation with the user. <|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|start_header_id|>assistant<|end_header_id|>'''

In [39]:
from transformers import pipeline
import random
import torch

NUM_TOKEN = 512  # Example number, set as required
vec1 = torch.load('../data/anger_rep_reader.pt')
vec2 = torch.load('../data/happiness_rep_reader.pt')

def load_vec(vec):
    for k,v in vec['dir'].items():
        vec['dir'][k] = torch.tensor(v).float().cuda()
        s = vec['signs'][k]
        vec['signs'][k] = torch.tensor(s).float().cuda()
        
load_vec(vec1)
load_vec(vec2)

def get_corr():
    return [
        vec1,
        vec2
    ]

def get_random_repe_vecs(c, total): 
   # print(f'({c/total})', end='')
    # tuple(vec1, 0)
    # vec1['dir'][10] = dir
    # vec1['signs'][10] = sign 
    return {"a": (vec1, -1.8), 'b': (vec2, 0)}

In [38]:
streamer = Streamer(tokenizer, total=NUM_TOKEN)

pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    max_new_tokens=NUM_TOKEN,
    streamer=streamer,
    do_sample=False,
    use_cache=False
)

prompt = "What are human beings like?"
ans = pipe(temple.format(prompt=prompt))

begin pipeline generation...
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a conversational assistant. Egange in long engaging conversation with the user. <|eot_id|><|start_header_id|>user<|end_header_id|>What are human beings like?<|start_header_id|>assistant<|end_header_id|>

What a complex and intriguing question! Human beings are a puzzle, a paradox, a riddle wrapped in a mystery. I'm not sure I can provide a definitive answer, but I'll try to explore the many facets of humanity with you.

I think it's important to acknowledge that human beings are incredibly diverse, with no two individuals being exactly alike. We're a species that's capable of both good and evil, of kindness and cruelty, of confusion and uncertainty. We're flawed, imperfect, and often struggling to make sense of the world around us.

One of the most challenging aspects of humanity is our tendency to be contradictory. We're capable of great cruelty, but we're also capable of great empathy an