# Load up tokenizers and packages

In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import GPT2Tokenizer, GPT2Model
from transformers import pipeline, set_seed
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import GPT2Tokenizer, GPT2Model

from huggingface_hub import login
from huggingface_hub import login
import accelerate
import random
import sentencepiece
random.seed(42)
# access_token_read = "asd"
# login(token = access_token_read)

models = {
    'gpt2':pipeline('text-generation', model='gpt2'), 
    'gemma-2b':{'tokenizer':AutoTokenizer.from_pretrained("google/gemma-2b"),
               'model':AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto")},
    'flan-t5':{'tokenizer':T5Tokenizer.from_pretrained("google/flan-t5-large"),
               'model':T5ForConditionalGeneration.from_pretrained("google/flan-t5-large", device_map="auto")},
    'opt-2.7b':pipeline('text-generation', model='facebook/opt-2.7b'), 
    'mistral':pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")

}


data = pd.read_json("hellaswag_val.300.jsonl", lines=True)

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Packages

In [None]:
from transformers import GPT2Tokenizer, GPT2Model
import torch
from tqdm import tqdm

# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
# model = GPT2Model.from_pretrained('gpt2')



# NOTE: human_context has in-context examples
instructions_dict = {
    'minimal':"INSTRUCTIONS\nChoose the best option to complete this sentence.", 
    'reasonable':"INSTRUCTIONS\nYour job is to complete a sentence with the most reasonable ending.", 
    'logical':"INSTRUCTIONS\nYour job is to complete a sentence with the most logical ending.", 
    'cot':"INSTRUCTIONS\nYour job is to complete a sentence with the most reasonable or logical ending. Think step by step to arrive at your answer", 
    'context':"INSTRUCTIONS\nYour job is to complete a sentence with the most reasonable ending.\n\nEXAMPLES\n[examples_here]"
}
return_str = "RETURN\nReturn the number corresponding to the best option in a json format like {'best_option':best_option}. Return nothing else."


def get_examples(df, num_examples=2):
    samples = df.sample(n=num_examples)
    
    result_strings = []
    for idx, row in samples.iterrows():
        context = row['ctx']
        options = row['endings']  
        cor_idx = row['label']  
        cor_opt = options[cor_idx]

        result_string = f"-Sentence: {context}\n-Correct Answer: {cor_opt}\n"
        result_strings.append(result_string)
    
    return "\n".join(result_strings)


def make_prompt(instructions, cntx, possible):
    possible_str = "".join([f"Answer {i}: {endings[i]}\n" for i in range(len(endings))])
    pr_str = f"""{instructions}\n\nSENTENCE\n{cntx}\n\n{possible_str}\n{return_str}"""
    return pr_str
    

def run_model(text, model_name):
    try:
        if model_name in ['gpt2', 'opt-2.7b']:
            generator = models[model_name]
            output = generator(text, max_new_tokens=100, num_return_sequences=1, device='cuda')
            return output[0]['generated_text']
        elif model_name in ['gemma-2b', 'flan-t5', 'mistral']:
            if model_name == 'mistral':
                text = "[INST]" + text + "[/INST]"
            model_stuff = models[model_name]
            tok, mod = model_stuff['tokenizer'], model_stuff['model']
            input_ids = tok(text, return_tensors="pt").to("cuda")
            outputs = mod.generate(**input_ids, max_new_tokens=100)
            output_decode = tok.decode(outputs[0])
            if model_name == 'gemma-2b':
                output_decode = output_decode.replace(text, "")
                output_decode = output_decode.replace("<bos>", "")
                output_decode = output_decode.strip()
            return output_decode
    except Exception as e:
        return f'{{"best_answer": "{random.choice([0, 1, 2, 3, 4])}"}}'

data = pd.read_json("hellaswag_val.300.jsonl", lines=True)



######### JUST PRINT PROMPTS #########

p_data = []
counter = 0
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
    if counter < 1:
        inst_id = row['ind']
        ctx = row['ctx']
        endings = row['endings'] 
        correct = row['label']
        for (ins_name, ins_text) in instructions_dict.items():
            if ins_name == "context":
                examples = get_examples(data, 2)
                ins_text = ins_text.replace("[examples_here]", examples)
            
            print(f"PROMPT NAME: {ins_name}")
            print(make_prompt(ins_text, ctx, endings))
            print("\n"*4)

    counter+=1

            
######### RUN FOR REAL #########

p_data = []
counter = 0
for index, row in tqdm(data.iterrows(), total=data.shape[0]):
    inst_id = row['ind']
    ctx = row['ctx']
    endings = row['endings'] 
    correct = row['label']
    for (ins_name, ins_text) in instructions_dict.items():
        if ins_name == "context":
            examples = get_examples(data, 2)
            ins_text = ins_text.replace("[examples_here]", examples)
        for model_name in list(models.keys()):
            prompt_input = make_prompt(ins_text, ctx, endings)
            pred = run_model(prompt_input, model_name)
            pred_data = {'inst_id':inst_id, 'prompt': ins_name, 'correct':correct, 'pred':pred, 'model_name':model_name}

        
prompt_data = pd.DataFrame(p_data)
prompt_data.to_csv("prompt_data.csv")

100%|██████████| 300/300 [00:00<00:00, 24923.07it/s]


PROMPT NAME: minimal
INSTRUCTIONS
Choose the best option to complete this sentence.

SENTENCE
A man is sitting on a roof. he

Answer 0: is using wrap to wrap a pair of skis.
Answer 1: is ripping level tiles off.
Answer 2: is holding a rubik's cube.
Answer 3: starts pulling up roofing on a roof.

RETURN
Return the number corresponding to the best option in a json format like {'best_option':best_option}. Return nothing else.





PROMPT NAME: reasonable
INSTRUCTIONS
Your job is to complete a sentence with the most reasonable ending.

SENTENCE
A man is sitting on a roof. he

Answer 0: is using wrap to wrap a pair of skis.
Answer 1: is ripping level tiles off.
Answer 2: is holding a rubik's cube.
Answer 3: starts pulling up roofing on a roof.

RETURN
Return the number corresponding to the best option in a json format like {'best_option':best_option}. Return nothing else.





PROMPT NAME: logical
INSTRUCTIONS
Your job is to complete a sentence with the most logical ending.

SENTENCE
A man 

  2%|▏         | 6/300 [01:17<1:01:52, 12.63s/it]

In [34]:
def parse_output(x):
    answers = {'0', '1', '2', '3', '4'}

    try:
        asd = eval(x)
        if list(ad.keys()) == ['best_answer']:
            if asd['best_answer'] in answers:
                return asd['best_answer']
    except:
        pass
    
    for char in x:
        if char in answers:
            return char
    return random.choice(list(answers))
        
prompt_data['clean'] = prompt_data['pred'].apply(parse_output)

In [36]:
prompt_data['clean'].value_counts()

0    14
3     6
1     3
2     1
4     1
Name: clean, dtype: int64

In [9]:
data = pd.read_json("hellaswag_val.300.jsonl", lines=True)
samp_rows=  data.sample(2)
samp1 = samp_rows.iloc[0]['ctx']

In [12]:
data.columns

Index(['ind', 'activity_label', 'ctx_a', 'ctx_b', 'ctx', 'split', 'split_type',
       'label', 'endings', 'source_id'],
      dtype='object')

In [11]:
df.columns

NameError: name 'df' is not defined

In [15]:
def format_hellaswag_examples(df, num_examples=2):
    # Randomly sample two rows from the DataFrame
    samples = df.sample(n=num_examples)
    
    result_strings = []
    for _, row in samples.iterrows():
        context = row['ctx']
        options = row['endings']  
        formatted_options = "\n".join([f"- Option {i+1}: {opt}" for i, opt in enumerate(options)])
        
        result_string = f"CONTEXT: {context}\nOPTIONS:\n{formatted_options}\n"
        result_strings.append(result_string)
    
    # Join the two example strings with a newline for separation
    return "\n".join(result_strings)

In [17]:
print(format_hellaswag_examples(data))

CONTEXT: A graphic introduces the hand car wash video. The car is washed first gently with soap. next
OPTIONS:
- Option 1: is a clean car wash video.
- Option 2: , two cars slowly drive by.
- Option 3: , the tires are soaped and washed thoroughly.
- Option 4: , it is shown from behind.

CONTEXT: A sea is shown with a green forest on seashore. Blond man is standing in seashore and talking to the camera and surfing big waves on the sea. man
OPTIONS:
- Option 1: is on ocean and swimming in white water snapping his head.
- Option 2: is snowboarding up in the sea.
- Option 3: is doing flips in the waters and on the rocks and he be water surfing.
- Option 4: is walking on seashore through a lot of people and talking to the camera and holding an award and talking about surfing the seashore.



In [27]:
def get_examples(df, num_examples=2):
    samples = df.sample(n=num_examples)
    
    result_strings = []
    for idx, row in samples.iterrows():
        context = row['ctx']
        options = row['endings']  
        cor_idx = row['label']  
        cor_opt = options[cor_idx]

        result_string = f"CONTEXT: {context}\nANSWER: {cor_opt}\n"
        result_strings.append(result_string)
    
    return "\n".join(result_strings)

print(get_examples(data, 2))

CONTEXT: She then shows an appliance on the table and tilts it upwards. She turns a dial on the appliance and sets it back down. she
ANSWER: picks the knife back up and places it on the appliance.

CONTEXT: A man is kneeling on the ice with another beside hide pulling string out of a hole. they
ANSWER: continue to pull the string while the man beside him grabs a hook.

