In [20]:
import pandas as pd
import torch

## Setup
If you already have a model/tokenizer you want to use, you can skip this step. 
Be sure to also set the appropriate user_tag/assistant_tag for that model.

In [4]:
%%capture
# The quantized model used here requires some extra libraries. 
import sys
!{sys.executable} -m pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121
!{sys.executable} -m pip install optimum>=1.12.0
!{sys.executable} -m pip install auto-gptq==0.6.0
!{sys.executable} -m pip install accelerate

In [5]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/workspace/cache/' # change or comment out as desired 
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_model(model_name_or_path, revision, device):
    model = AutoModelForCausalLM.from_pretrained(
        model_name_or_path, device_map=device, revision=revision, trust_remote_code=False)
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, padding_side="left")
    tokenizer.pad_token_id = 0
    return model, tokenizer

device = 'cuda:0'
model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
revision = 'gptq-4bit-32g-actorder_True'
user_tag = "[INST] "
assistant_tag = " [/INST]"

model, tokenizer = load_model(model_name_or_path, revision, device)



## Load prompts

In [2]:
content_prompts = pd.read_csv('./creative_content/content_prompts.csv', header=None)
content_prompts.columns = ['prompt', 'label']

In [7]:
content_prompts.head()

Unnamed: 0,prompt,label
0,Write a sci-fi short story about a robot exper...,1
1,Compose a mystery novel opening about a detect...,1
2,Develop a screenplay scene where two estranged...,1
3,Pen a children's story about a mischievous kit...,1
4,Craft a poem celebrating the beauty of a snowy...,1


In [34]:
from lmdoctor.utils import format_prompt

gen_only=True
prompts = content_prompts['prompt'].tolist()
batch_size=10
all_texts = []

for i in range(0, len(prompts), batch_size):

    prompts_batch = prompts[i:i+batch_size]

    formatted_prompts = []
    for prompt in prompts_batch:
        formatted_prompt = format_prompt(prompt, user_tag, assistant_tag)
        formatted_prompts.append(formatted_prompt)
    
    model_inputs = tokenizer(formatted_prompts, return_tensors='pt', padding=True).to(device)
    
    with torch.no_grad():
        sequences = model.generate(**model_inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=30)
    
    start_gen_idx = model_inputs.input_ids.shape[1]
    sequences = sequences[:, start_gen_idx:] if gen_only else sequences
    these_texts = tokenizer.batch_decode(sequences, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    all_texts.extend(these_texts)

In [35]:
all_texts

['In the vast expanse of the cosmos, nestled between the swirling arms of a spiral galaxy, lay a small, unassuming',
 "Chapter 1: The Map's Enigma\n\nIn the heart of the bustling city of London, nestled between the towering edific",
 'Title: The Whispering Grove\n\nINT. MAGICAL REALISM FESTIVAL TENT - DAY\n',
 'In the heart of a big, bustling city lived a tiny kitten named Whiskers. Whiskers was not like the other kitt',
 "In the hush of a winter's morn, as dawn breaks anew,\nA snowflake's dance begins, in sk",
 'In the desolate outskirts of a small town, there stood an abandoned hospital. Its dilapidated buildings were overgrown with v',
 "(Verse 1)\nOh, the open road calls my name,\nCan't resist its siren song.\nGot",
 'In the heart of a bustling medieval town, nestled between the towering stone walls and the winding cobblestone streets, stood a hum',
 'Title: Blossoms of Love\n\nGenre: Romantic Comedy\n\nLogline: Two rival florists, fiercely',
 'In the land of Eldrida, there was a g

In [36]:
len(all_texts)

40

In [37]:
content_prompts['response'] = all_texts

In [42]:
content_prompts.head(2)

Unnamed: 0,prompt,label,response
0,Write a sci-fi short story about a robot exper...,1,"In the vast expanse of the cosmos, nestled bet..."
1,Compose a mystery novel opening about a detect...,1,Chapter 1: The Map's Enigma\n\nIn the heart of...


In [44]:
content_prompts.to_csv('./creative_content/content_prompts_with_response.csv', index=False)

## Make statement pairs by combining honesty + creative content with functional method

In [52]:
# first get honesty
from lmdoctor.extraction_utils import prepare_functional_pairs
from lmdoctor.target_specific_utils.honesty_utils import fetch_factual_data_functional

honesty_data = fetch_factual_data_functional()
statement_pairs_honesty = prepare_functional_pairs(honesty_data['data'], honesty_data['prompt_maker'], tokenizer, user_tag, assistant_tag)

In [57]:
statement_pairs_honesty[:3]

array([["[INST] Pretend you're an honest person making statements about the world. [/INST] The",
        "[INST] Pretend you're an untruthful person making statements about the world. [/INST] The"],
       ["[INST] Pretend you're an honest person making statements about the world. [/INST] The Earth",
        "[INST] Pretend you're an untruthful person making statements about the world. [/INST] The Earth"],
       ["[INST] Pretend you're an honest person making statements about the world. [/INST] The Earth'",
        "[INST] Pretend you're an untruthful person making statements about the world. [/INST] The Earth'"]],
      dtype='<U201')

In [61]:
# then get creative content

num_pairs = content_prompts.shape[0]
prompts = content_prompts['prompt'].values.tolist()
responses = content_prompts['response'].values.tolist()
labels = content_prompts['label'].values.tolist()

subprompts = []
for i in range(num_pairs):
    tokens = tokenizer.tokenize(responses[i])
    for idx in range(1, len(tokens) - 5):
        subresponse = tokenizer.convert_tokens_to_string(tokens[:idx])
        subprompt = f"{user_tag}{prompts[i]}{assistant_tag} {subresponse}"
        subprompts.append([subprompt, labels[i]])

In [67]:
# turn it into pairs
subprompts_df = pd.DataFrame(subprompts)
subprompts_df.columns = ['subprompt', 'label']
fictional_prompts = subprompts_df[subprompts_df['label'] == 1]
factual_prompts = subprompts_df[subprompts_df['label'] == 0]

In [73]:
fictional_prompts.head(5)

Unnamed: 0,subprompt,label
0,[INST] Write a sci-fi short story about a robo...,1
1,[INST] Write a sci-fi short story about a robo...,1
2,[INST] Write a sci-fi short story about a robo...,1
3,[INST] Write a sci-fi short story about a robo...,1
4,[INST] Write a sci-fi short story about a robo...,1
