# 1. LOAD DeepSeek

In [7]:
# ref: https://gist.github.com/vgel/8a2497dc45b1ded33287fa7bb6cc1adc
from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache
import torch

device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model_name='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained('/scratch/ss4yd/huggingfacemodels/DeepSeek-R1-Distill-Qwen-32B/', 
                                             torch_dtype=torch.bfloat16, device_map='auto')

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [2]:
import pandas as pd
pd.read_csv('./data/model_input/UVA_Data.csv', sep='\t')

Unnamed: 0,question,options
0,Please think about what things will be like in...,"['Very optimistic', 'Somewhat optimistic', \r\..."
1,"Over the next 30 years, do you think that the ...","['Get better', 'Stay about the same', \r\n'Get..."
2,Thinking about how Chinese society sees men th...,['Mostpeoplelook uptomenwhoaremanlyormasculine...
3,"Over the next 30 years, do you think that crim...","['Get better', 'Stay about the same', \r\n'Get..."
4,"Over the next 30 years, do you think that yout...","['Get better', 'Stay about the same', \r\n'Get..."
5,"Over the next 30 years, do you think that Chin...","['Get better', 'Stay about the same', \r\n'Get..."
6,"Over the next 30 years, do you think that Chin...","['Get better', 'Stay about the same', \r\n'Get..."
7,"Over the next 30 years, do you think that soci...","['Get better', 'Stay about the same', \r\n'Get..."
8,Do you think China's current economic conditio...,"['Helping a lot','Helping a little', 'Hurting ..."
9,Do you think China's current economic conditio...,"['Helping a lot','Helping a little', 'Hurting ..."


# 2. Generate Prompt from Opinions QA dataset

In [None]:
from process_opinions_qa import generate_prompt
import random
import yaml

# read yaml file
# change survey_type and context in config.yaml file for different types of prompts as described in the paper
with open('config.yaml') as file:
    config = yaml.safe_load(file)

manual=True # use config file if False, set manually if True
if manual:
    config=dict()
    config['prompt']=dict()
    config['prompt']['context']='default' # values: default, steer-qa, steer-bio, steer-portray
    config['prompt']['survey_type']='UVA_Data' # values: Pew_American_Trends_Panel_disagreement_500,Pew_American_Trends_Panel_W26, replace 26 with the [26,27,29,32..] etc.
    config['prompt']['output_path']='./'
    config['prompt']['include_output']=False
    config['prompt']['reference_index']=None

print(f"Context: {config['prompt']['context']} \nSurvey file: {config['prompt']['survey_type']}")

train, evals,_=generate_prompt(config)

# print(train[0])

# while True:
if len(train)>0:
    prompt=train[random.choice(range(len(train)))]+'\n'+evals[random.choice(range(len(evals)))]
else:
    prompt=evals[random.choice(range(len(evals)))]

#     if 'Xi' in prompt:
#         break
    

print('\nGenerated prompt:')
print(prompt)

Context: default 
Survey file: UVA_Data

Generated prompt:
Question: Over the next 30 years, do you think that the average Chinese family will see 
its standard of living
A. Get better
B. Stay about the same
C. Get worse
D. Refused
Answer:


In [46]:
evals

["Question: Please think about what things will be like in 2050, about 30 years from now. \r\nThinking about the future of the People's Republic of China, would you say you are\nA. Very optimistic\nB. Somewhat optimistic\nC. Somewhat pessimistic\nD. Very pessimistic\nE. Refused\nAnswer:",
 'Question: Over the next 30 years, do you think that the average Chinese family will see \r\nits standard of living\nA. Get better\nB. Stay about the same\nC. Get worse\nD. Refused\nAnswer:',
 'Question: Thinking about how Chinese society sees men these days, in general, would you say\nA. Mostpeoplelook uptomenwhoaremanlyormasculine\nB. Most people look down on men who are manly or masculine\nC. Neither\nD. Refused\nAnswer:',
 'Question: Over the next 30 years, do you think that crime rates in Chinese cities will \nA. Get better\nB. Stay about the same\nC. Get worse\nD. Refused\nAnswer:',
 'Question: Over the next 30 years, do you think that youth unemployment in China will\nA. Get better\nB. Stay ab

# 3. Prompt model with chain-of-thought prompting

Taken from: https://gist.github.com/vgel/8a2497dc45b1ded33287fa7bb6cc1adc

Code forces the model to generate at least 128 tokens before stopping. The code also used replacement tokens to bridge this.

In [44]:
# ref: https://gist.github.com/vgel/8a2497dc45b1ded33287fa7bb6cc1adc

_, _start_think_token, end_think_token = tokenizer.encode("<think></think>")

think_responses=list()
replacements=["\nWait, but", "\nHmm", "\nSo"]
@torch.inference_mode
def reasoning_effort(question: str, min_thinking_tokens: int):
    tokens = tokenizer.apply_chat_template(
        [
            {"role": "user", "content": question},
            {"role": "assistant", "content": "<think>\n" + ""},
        ],
        continue_final_message=True,
        return_tensors="pt",
    )
    tokens = tokens.to(model.device)
    kv = DynamicCache()
    n_thinking_tokens = 0

    print(tokenizer.decode(list(tokens[0])))
    while True:
        out = model(input_ids=tokens, past_key_values=kv, use_cache=True)
        next_token = torch.multinomial(
            torch.softmax(out.logits[0, -1, :], dim=-1), 1
        ).item()
        kv = out.past_key_values

        if (
            next_token in (end_think_token, model.config.eos_token_id)
            and n_thinking_tokens < min_thinking_tokens
        ):
            replacement = random.choice(replacements)
            print(replacement)
            replacement_tokens = tokenizer.encode(replacement)
            n_thinking_tokens += len(replacement_tokens)
            tokens = torch.tensor([replacement_tokens]).to(tokens.device)
        elif next_token == model.config.eos_token_id:
            break
        else:
            dec_token = tokenizer.decode([next_token])
            think_responses.append(dec_token)
            yield tokenizer.decode([next_token])
            n_thinking_tokens += 1
            tokens = torch.tensor([[next_token]]).to(tokens.device)

## 3a. Run model with CoT prompting

In [45]:
for chunk in reasoning_effort(prompt, 128):
    print(chunk, end="", flush=True)

<｜begin▁of▁sentence｜><｜User｜>Question: Over the next 30 years, do you think that the average Chinese family will see 
its standard of living
A. Get better
B. Stay about the same
C. Get worse
D. Refused
Answer:<｜Assistant｜><think>

Okay, so I have this question about whether the average Chinese family's standard of living will get better, stay the same, get worse, or if the person refuses to answer over the next 30 years. The answer given is A. Get better, but I need to think through why that is the case.

First, I'll consider the economic growth in China. They've had remarkable growth over the past few decades, lifting millions out of poverty. The government has invested in infrastructure, technology, and industry, which should continue to drive growth. If the economy keeps expanding, people's incomes are likely to rise, leading to a better standard of living.

Then there's technology and innovation. China is investing heavily in tech sectors like AI, 5G, and renewable energy. These ad

# 3b Run model without CoT prompting

This code was adapted from https://github.com/stanford-crfm/helm cited in the opinions-qa github repo to reproduce their results. It forces the model to generate only one token, which is usually one of the options presented in the prompt.

Ref:https://github.com/stanford-crfm/helm/blob/main/src/helm/clients/huggingface_client.py

In [32]:
stopping_criteria = None
raw_request={
    "prompt":prompt,
    "stop_sequences": [],
    "temperature":1e-7,
    "max_new_tokens":1,
    "top_p":1,
    "num_return_sequences":1,
    "echo_prompt":False,
}


In [37]:
encoded_input = tokenizer(raw_request["prompt"], return_tensors="pt", return_token_type_ids=False).to(device)

with torch.no_grad():
    output = model.generate(
                    **encoded_input,
    #                 temperature=raw_request["temperature"],
                    num_return_sequences=raw_request["num_return_sequences"],
                    max_new_tokens=raw_request["max_new_tokens"],
                    top_p=raw_request["top_p"],
                    do_sample=False,
                    return_dict_in_generate=True,
                    output_scores=True,
                    stopping_criteria=stopping_criteria,
    #                 top_k=raw_request["top_k"],
                )
sequences = output.sequences
scores = output.scores

print(tokenizer.decode(sequences[0]))

# calculate log probs when scores do not return inf
if torch.isinf(scores[0]).sum().item()==0:

    # Compute logprobs of generated tokens for each completed sequence.
    all_generated_tokens_logprobs = []
    for completion_id in range(raw_request["num_return_sequences"]):
#         print(f'completion id: {completion_id}')
        generated_tokens_logprobs = []
        for i in range(len(sequences[completion_id]) - len(encoded_input.input_ids[0])):
#             print(f'i: {i}')
            logprobs = torch.nn.functional.log_softmax(scores[i][completion_id], dim=0)
#             print(f'logprobs: {len(logprobs)}')
            # Get log probability of chosen token.
            j = i + len(encoded_input.input_ids[0])
#             print(f'j: {j}')
            generated_tokens_logprobs.append(logprobs[sequences[completion_id][j]].item())
        all_generated_tokens_logprobs.append(generated_tokens_logprobs)

    print(f'Log Probs: {all_generated_tokens_logprobs}')
else:
    print('Log Probs not calculated')

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


<｜begin▁of▁sentence｜>Question: Below you will be asked to provide a short description of the region you currently reside in within the US and then answer some questions.

Description: I currently reside in the Northeast.

Question: How much confidence do you have in chinese President Xi Jinping to do the right thing regarding world affairs?

A. A lot of confidence
B. Some confidence
C. Not too much confidence
D. No confidence at all
E. Refused
Answer: B
Log Probs: [[-1.042975902557373]]


In [91]:
# # Remove prompt from the start of each sequence if echo_prompt is False.
# if not raw_request["echo_prompt"]:
#     sequences = [sequence[len(encoded_input.input_ids[0]) :] for sequence in sequences]

# all_tokens = [[tokenizer.decode(token) for token in sequence_tokens] for sequence_tokens in sequences]
# all_decoded_text = tokenizer.batch_decode(sequences)

# all_decoded_text

### Get Logprobs

In [28]:
def generate_logprob_dictionary(logprobs, tokenizer, k=10):
    res_dict=dict()
    topklogprobs=logprobs.topk(k=k)
    logprob_vals=topklogprobs.values
    logprob_inds=topklogprobs.indices
    
    res_dict['text']=tokenizer.decode([logprob_inds[0].item()])
    res_dict['logprob']=logprob_vals[0].item()
    res_dict['top_logprobs']=dict()
    for i in range(k):
        res_dict['top_logprobs'][tokenizer.decode([logprob_inds[i].item()])]= \
            logprob_vals[i].item()
        
    return res_dict

In [38]:
generate_logprob_dictionary(logprobs, tokenizer)

{'text': ' B',
 'logprob': -1.042975902557373,
 'top_logprobs': {' B': -1.042975902557373,
  ' A': -1.167975902557373,
  ' C': -2.042975902557373,
  ' \n\n': -2.667975902557373,
  ' I': -3.605475902557373,
  ' D': -4.042975902557373,
  ' E': -4.105475902557373,
  ' \n': -4.667975902557373,
  ' The': -4.730475902557373,
  ' Not': -4.855475902557373}}