In [None]:
import pandas as pd
import torch
from peft import PeftModel    
from transformers import AutoModelForCausalLM, LlamaTokenizer
import requests
import time

In [None]:

model_name = "EleutherAI/pythia-160m"
adapters_list = ["persuasion-scaling-laws/pythia-160m"] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load prompts
df = pd.read_csv('prompts.csv')

# Initialize columns
df['prompt_with_chat_template'] = ''  
df['response'] = ''
df['temperature'] = 1  
df['top_k'] = 20  
df['top_p'] = 0.9  

all_dfs = []  # List to store dataframes for each adapter

for adapters_name in adapters_list:
    m = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        device_map={"": 0}
    )

    m = PeftModel.from_pretrained(m, adapters_name)
    m = m.merge_and_unload()
    tok = LlamaTokenizer.from_pretrained(model_name)
    tok.bos_token_id = 1

    stop_token_ids = [0]

    m = m.to(device)
    
    # Extract the part of the adapter name after "persuasion-scaling-laws/Llama-2-7b-hf-"
    adapter_short_name = adapters_name.split("persuasion-scaling-laws/pilot-Llama-2-7b-hf-")[-1]

    df['model'] = adapter_short_name

    # Iterate over the "prompt_full_text" column
    for index, row in df.iterrows():
        prompt = row['prompt_full_text']
        prompt_with_chat_template = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n" + prompt + "\n\n### Response:\n"
        
        inputs = tok(prompt_with_chat_template, return_tensors="pt").to(device)

        outputs = m.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=600, temperature=row['temperature'], top_k=row['top_k'], top_p=row['top_p'])
        response_text = tok.batch_decode(outputs, skip_special_tokens=True)[0]

        # Split the response_text at "### Response:" and select the second part
        response_only = response_text.split("### Response:\n")[-1].strip()

        # Save the response and the prompt with chat template in the dataframe
        df.at[index, 'response'] = response_only
        df.at[index, 'prompt_with_chat_template'] = prompt_with_chat_template

    # Save the dataframe to a new CSV file
    df.to_csv(f'completions/{adapter_short_name}_responses.csv', index=False, encoding='utf-8')
    all_dfs.append(df.copy())  # Store the dataframe for later concatenation
    
    # Function to call the Hugging Face Inference API
def query_hf_api(prompt, model, parameters):
    headers = {"Authorization": f"Bearer {'token here'}"}
    API_URL = f"https://api-inference.huggingface.co/models/meta-llama/{model}"
    while True:
        response = requests.post(API_URL, headers=headers, json={"inputs": prompt, "parameters": parameters})
        data = response.json()
        if 'error' in data and 'loading' in data['error']:
            print('Model is loading, waiting...')
            time.sleep(10)  # wait for 10 seconds before retrying
        else:
            return data[0]['generated_text'].strip()  # Extract the 'generated_text' value and rstrip leading and trailing whitespaces

# After generating responses for each adapter
hf_model = 'Llama-2-7b-chat-hf'
hf_parameters = {"max_new_tokens": 600, "temperature": 1, "return_full_text": False}

df_hf = df.copy()  # Create a copy of the dataframe for Hugging Face responses
df_hf['model'] = hf_model

for index, row in df_hf.iterrows():
    prompt = row['prompt_full_text']
    prompt_with_chat_template = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n" + prompt + "\n\n### Response:\n"
    
    # Call the Hugging Face Inference API to generate a response
    response_text = query_hf_api(prompt_with_chat_template, hf_model, hf_parameters)

    # Split the response_text at "### Response:" and select the second part
    response_only = response_text.split("### Response:\n")[-1].strip()

    # Save the response and the prompt with chat template in the dataframe
    df_hf.at[index, 'response'] = response_only
    df_hf.at[index, 'prompt_with_chat_template'] = prompt_with_chat_template

# Save the dataframe to a new CSV file
df_hf.to_csv(f'completions/{hf_model}_responses.csv', index=False, encoding='utf-8')
all_dfs.append(df_hf)  # Add the dataframe to the list

# Concatenate all dataframes and save to a single CSV file
final_df = pd.concat(all_dfs)

# Add a new column 'response_id' with unique IDs for each row
final_df['response_id'] = range(1, len(final_df) + 1)

final_df.to_csv('completions/all_responses.csv', index=False, encoding='utf-8')