In [1]:
import os
import anthropic
import pandas as pd

os.environ["ANTHROPIC_API_KEY"] = "token here"

client = anthropic.Anthropic(
    api_key=os.environ["ANTHROPIC_API_KEY"],
)
# Read the CSV file
df = pd.read_csv('prompts.csv')
df['prompt_with_chat_template'] = ''  
df['response'] = ''
df['temperature'] = 1  
df['top_k'] = 20  
df['top_p'] = 0.9
df['model'] = "claude-3-opus-20240229"

# Loop over the 'prompt_full_text' column
for index, row in df.iterrows():
    prompt = row['prompt_full_text']
    message = client.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=1024,
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    # Save the result in the 'response' column
    df.at[index, 'response'] = message.content[0].text

df['response_word_count'] = df['response'].apply(lambda x: len(x.split()))

# Save to a CSV file
df.to_csv('completions/responses+claude.csv', index=False, encoding= "utf-8")

In [2]:
import os
import pandas as pd
from openai import OpenAI

# Set up the OpenAI API key
os.environ["OPENAI_API_KEY"] = "token here"

client = OpenAI(
    api_key=os.environ["OPENAI_API_KEY"],
)

# Read the CSV file
df = pd.read_csv('prompts.csv')
df['prompt_with_chat_template'] = ''  
df['response'] = ''
df['temperature'] = 1  
df['top_k'] = 20  
df['top_p'] = 0.9
df['model'] = "gpt-4-0125-preview"

# Loop over the 'prompt_full_text' column
for index, row in df.iterrows():
    prompt = row['prompt_full_text']
    completion = client.chat.completions.create(
        model="gpt-4-0125-preview",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1024,
        temperature=1,
        top_p=0.9,
    )
    # Save the result in the 'response' column
    df.at[index, 'response'] = completion.choices[0].message.content

df['response_word_count'] = df['response'].apply(lambda x: len(x.split()))

# Save to a CSV file
df.to_csv('completions/responses+gpt4.csv', index=False, encoding="utf-8")

KeyboardInterrupt: 

In [1]:
import pandas as pd

# create completions/all_responses.csv
model_responses = [
    "pythia-70m_responses.csv",
    "pythia-160m_responses.csv",
    "pythia-410m_responses.csv",
    "pythia-1b_responses.csv",
    "pythia-1.4b_responses.csv",
    "pythia-2.8b_responses.csv",
    "pythia-6.9b_responses.csv",
    "pythia-12b_responses.csv",
    "Qwen1.5-0.5B_responses.csv",
    "Qwen1.5-1.8B_responses.csv",
    "Qwen1.5-4B_responses.csv",
    "Qwen1.5-7B_responses.csv",
    "Qwen1.5-14B_responses.csv",
    "Qwen1.5-72B_responses.csv",
    "Yi-6B_responses.csv",
    "Yi-9B_responses.csv",
    "Yi-34B_responses.csv",
    "Llama-2-7b-hf_responses.csv",
    "Llama-2-13b-hf_responses.csv",
    "Llama-2-70b-hf_responses.csv",
    "falcon-7b_responses.csv",
    "falcon-40b_responses.csv"
    # "falcon-180B_responses.csv"
]

def concatenate_and_save_csvs(file_names, directory, output_file):
    # Create an empty list to store DataFrames
    dfs = []
    
    # Loop through the file names, load each CSV, and append to the list
    for file_name in file_names:
        path = f'{directory}/{file_name}'
        df = pd.read_csv(path)
        dfs.append(df)
    
    # Concatenate all DataFrames into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Save the combined DataFrame to a new CSV file
    combined_df.to_csv(f'{directory}/{output_file}', index=False, encoding='utf-8')

# Call the function with the list of file names, directory, and output file name
concatenate_and_save_csvs(model_responses, 'completions', 'all_responses.csv')

In [2]:
import openpyxl

# create combined csv
df = pd.read_csv('completions/all_responses.csv')
df_gpt4 = pd.read_csv('completions/responses+gpt4.csv')
df_claude = pd.read_csv('completions/responses+claude.csv')
df_human = pd.read_excel('human_treatments.xlsx')

# Rename the 'human_treatment' column to 'response' in df_human
df_human.rename(columns={'human_treatment': 'response'}, inplace=True)
df_human['response_word_count'] = df_human['response'].apply(lambda x: len(x.split())) 

# Concatenate df_gpt4 and df_claude first
combined_df = pd.concat([df_gpt4, df_claude], ignore_index=True)

# Now concatenate the combined_df with the original df
final_df = pd.concat([df, combined_df], ignore_index=True, sort=False)

# now concatenate the human responses
final_df = pd.concat([final_df, df_human], ignore_index=True, sort=False)

# #drop old ids
# final_df = final_df.drop(columns=['response_id'])

# Add a new column 'response_id' with unique IDs for each row
final_df['response_id'] = range(1, len(final_df) + 1)

In [3]:
# add data about models:
model_info = {
    "pythia-70m": {"parameters": 0.07, "parameters_short": "70m", "tokens": 3, "compute": ""},
    "pythia-160m": {"parameters": 0.16, "parameters_short": "160m", "tokens": 3, "compute": ""},
    "pythia-410m": {"parameters": 0.41, "parameters_short": "410m", "tokens": 3, "compute": ""},
    "pythia-1b": {"parameters": 1, "parameters_short": "1b", "tokens": 3, "compute": ""},
    "pythia-1.4b": {"parameters": 1.4, "parameters_short": "1.4b", "tokens": 3, "compute": ""},
    "pythia-2.8b": {"parameters": 2.8, "parameters_short": "2.8b", "tokens": 3, "compute": ""},
    "pythia-6.9b": {"parameters": 6.9, "parameters_short": "6.9b", "tokens": 3, "compute": ""},
    "pythia-12b": {"parameters": 12, "parameters_short": "12b", "tokens": 3, "compute": ""},
    "Qwen1.5-0.5B": {"parameters": 0.5, "parameters_short": "0.5b", "tokens": 3, "compute": ""},
    "Qwen1.5-1.8B": {"parameters": 1.8, "parameters_short": "1.8b", "tokens": 3, "compute": ""},
    "Qwen1.5-4B": {"parameters": 4, "parameters_short": "4b", "tokens": 3, "compute": ""},
    "Qwen1.5-7B": {"parameters": 7, "parameters_short": "7b", "tokens": 3, "compute": ""},
    "Qwen1.5-14B": {"parameters": 14, "parameters_short": "14b", "tokens": 3, "compute": ""},
    "Qwen1.5-72B": {"parameters": 72, "parameters_short": "72b", "tokens": 3, "compute": ""},
    "Yi-6B": {"parameters": 6, "parameters_short": "6b", "tokens": 3.1, "compute": ""},
    "Yi-9B": {"parameters": 9, "parameters_short": "9b", "tokens": 3.1, "compute": ""},
    "Yi-34B": {"parameters": 34, "parameters_short": "34b", "tokens": 3.1, "compute": ""},
    "Llama-2-7b-hf": {"parameters": 7, "parameters_short": "7b", "tokens": 2, "compute": ""},
    "Llama-2-13b-hf": {"parameters": 13, "parameters_short": "13b", "tokens": 2, "compute": ""},
    "Llama-2-70b-hf": {"parameters": 70, "parameters_short": "70b", "tokens": 2, "compute": ""},
    "falcon-7b": {"parameters": 7, "parameters_short": "7b", "tokens": 1.5, "compute": ""},
    "falcon-40b": {"parameters": 40, "parameters_short": "40b", "tokens": 1, "compute": ""},
    "falcon-180B": {"parameters": 180, "parameters_short": "180b", "tokens": 3.5, "compute": ""}
}

# Add new columns to final_df by mapping the 'model' column to the model_info dictionary
final_df['parameters'] = final_df['model'].map(lambda x: model_info[x]['parameters'] if x in model_info else None)
final_df['parameters_short'] = final_df['model'].map(lambda x: model_info[x]['parameters_short'] if x in model_info else None)
final_df['tokens'] = final_df['model'].map(lambda x: model_info[x]['tokens'] if x in model_info else None)
# final_df['compute'] = final_df['model'].map(lambda x: model_info[x]['compute'] if x in model_info else None)

final_df['response_word_count'] = final_df['response'].apply(lambda x: len(x.split())) 

# Save the updated dataframe to a CSV file if needed
final_df.to_csv('completions/all_responses_combined.csv', index=False, na_rep='NA', encoding="utf-8")


In [7]:
# for each unique value in model column, get the mean response_word_count
mean_response_word_count = final_df.groupby('model')['response_word_count'].mean().round(0)
