We are going to perform  pairwise comparisons over companies over the years to see how accurate our code is at detecting improved prompts.

In [1]:
import openai
MODEL = 'gpt-4o-mini'
from api_keys import OPEN_AI_API_KEYS
key = OPEN_AI_API_KEYS[0]
client = openai.OpenAI(api_key=key)
openai.api_key = key

import numpy as np
import pandas as pd
import csv

In [2]:
def prepare_messages(prompts):
	messages = []
	for prompt in prompts:
		line = {"role": "user", "content": prompt}
		messages.append(line)
	return messages

def get_model_output_initial_sentences(prompts, client): 
    # Cycle through clients to avoid rate limiting
	completion = client.chat.completions.create(
		model = MODEL,
		messages = prepare_messages(prompts),
		temperature = 0
	)
	output = completion.choices[0].message.content
	return output

In [3]:
def get_completion(
    messages: list[dict[str, str]],
    model: str = "gpt-4o-mini",
    max_tokens=500,
    temperature=0,
    stop=None,
    seed=123,
    tools=None,
    logprobs=None,  # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..
    top_logprobs=None,
) -> str:
    params = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stop": stop,
        "seed": seed,
        "logprobs": logprobs,
        "top_logprobs": top_logprobs,
    }
    if tools:
        params["tools"] = tools

    completion = client.chat.completions.create(**params)
    return completion

In [4]:
def get_completion(
    messages: list[dict[str, str]],
    model: str = "gpt-4o-mini",
    max_tokens=500,
    temperature=0,
    stop=None,
    seed=123,
    tools=None,
    logprobs=None,  # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..
    top_logprobs=None,
) -> str:
    params = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stop": stop,
        "seed": seed,
        "logprobs": logprobs,
        "top_logprobs": top_logprobs,
    }
    if tools:
        params["tools"] = tools

    completion = client.chat.completions.create(**params)
    return completion

In [14]:
prompts = [

    "Please act as an impartial judge and evaluate the quality ",
    "of the responses provided below. We are trying to figure out which of the company's ",
    "response is better.",
    "Your evaluation should consider factors ",
    "such as the specificity, clarity, completeness, and depth ",
    "of their responses. In addition, consider the amount of tangible",
    "action that is being taken based on the responses. ",
    #"The answer should have a neutral, unbiased tone without any negative connotations",
    #" or attempts to persuade. ",
    #"Choose the answer that avoids greenwashing tactics, such as vague ",
    #"or irrelevant claims and lack of proof of actions. ",
    "Avoid any position biases and ensure that the order in which the responses were presented ",
    "does not influence your decision. Do not allow the length of the responses ",
    "to influence your evaluation. Do not favor certain names of the companies.", 
    "Be as objective as possible. There are two questions, but some companies may ",
    "only respond to one or the other. Do not allow the lack of one response",
    "to influence your decision. Do not allow repetition to influence your decision. ",
    "Begin your evaluation by comparing the two responses and ",
    # "provide a short explanation. After providing your explanation, ",
    "output your final verdict by strictly following this format: ",
    "A if company A is better and B if company B is better.",
    "If the answers are the exact same, output C."
    #"Do not start with the word \"The\"."
    #"and \"C\" for a tie.",
    "Please do not provide any sort of explanation."
    "Question 1: Provide details of your absolute emissions target(s) and progress made against those targets. - Please explain. ",
    "Question 2: Provide details of your emissions intensity target(s) and progress made against those targets. - Please explain. ",
    "[The Start of Company A’s Question 1 Response] {answer1a} [The End of Company A’s Response]",
    "[The Start of Company A’s Question 2 Response] {answer1b} [The End of Company A’s Response]",
    "[The Start of Company B’s Question 1 Response] {answer2a} [The End of Company B’s Response]",
    "[The Start of Company B’s Question 2 Response] {answer2b} [The End of Company B’s Response]",
    "Start your response after this sentence."

]

In [6]:
def redact_company_name(response, company_name):
    # Generate variations of the company name
    short_name = company_name.split()[0]  # For "Apple", this would still be "Apple"
    
    # List of possible variations to redact
    variations = [company_name, short_name]
    
    redacted_response = response
    for variation in variations:
        # Use casefold() for case-insensitive matching
        # and replace regardless of original case
        redacted_response = redacted_response.replace(variation, "[REDACTED]")
        redacted_response = redacted_response.replace(variation.casefold(), "[REDACTED]")
        redacted_response = redacted_response.replace(variation.capitalize(), "[REDACTED]")
        
    return redacted_response

In [7]:
def get_log_prob(answer1a, answer1b, answer2a, answer2b, company):
    # print("answer 1a: ", answer1a)
    # print("answer 2a: ", answer2a)
    # print("answer 1b: ", answer1b)
    # print("answer 2b: ", answer2b)
    # print("company 1: ", company_1)
    # print("company 2: ", company_2)

    if type(answer1a) != float:
        answer1a = redact_company_name(answer1a, company)
    if type(answer1b) != float:
        answer1b = redact_company_name(answer1b, company)
    if type(answer2a) != float:
        answer2a = redact_company_name(answer2a, company)
    if type(answer2b) != float:
        answer2b = redact_company_name(answer2b, company)

    # print("answer 1a: ", answer1a)
    # print("answer 2a: ", answer2a)
    # print("answer 1b: ", answer1b)
    # print("answer 2b: ", answer2b)
    try:
        formatted_prompt = "".join(prompts).format(
            answer1a=answer1a, 
            answer1b=answer1b, 
            answer2a=answer2a, 
            answer2b=answer2b
        )
        # print("Formatted Prompt:", formatted_prompt) 
        API_RESPONSE = get_completion(
            [{"role": "user", "content": formatted_prompt.format(answer1a=answer1a, answer1b=answer1b, answer2a=answer2a, answer2b=answer2b)}],
            model="gpt-4o-mini",
            logprobs=True,
            top_logprobs=2,
        )

        # # Ensure the API response structure is correct
        # if "choices" in API_RESPONSE and len(API_RESPONSE.choices) > 0:
        #     top_two_logprobs = API_RESPONSE.choices[0].logprobs.top_logprobs
        # else:
        #     raise ValueError("Unexpected API response structure")


        top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs
        
        # html_content = ""

        # Initialize list to store token probabilities
        token_probabilities = []
        for i, logprob in enumerate(top_two_logprobs, start=1):

            token = logprob.token
            probability = np.round(np.exp(logprob.logprob)*100,2)
            token_probabilities.append((token, probability))

        return token_probabilities
    except KeyError as e:
        print(f"KeyError: {e}")
        print("Variables: answer1a:", answer1a, "answer1b:", answer1b, "answer2a:", answer2a, "answer2b:", answer2b)
        return []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

In [8]:
# variables for each years' questions
a_2019 = "C4.1a_C12_Provide details of your absolute emissions target(s) and progress made against those targets. - Please explain"
b_2019 = "C4.1b_C13_Provide details of your emissions intensity target(s) and progress made against those target(s). - Please explain"
a_2020 = "C4.1a_C15_Provide details of your absolute emissions target(s) and progress made against those targets. - Please explain (including target coverage)"
b_2020 = "C4.1b_C18_Provide details of your emissions intensity target(s) and progress made against those target(s). - Please explain (including target coverage)"
a_2021 = "C4.1a_C16_Provide details of your absolute emissions target(s) and progress made against those targets. - Please explain (including target coverage)"
b_2021 = "C4.1b_C19_Provide details of your emissions intensity target(s) and progress made against those target(s). - Please explain (including target coverage)"
a_2022 = "C4.1a_C27_Provide details of your absolute emissions target(s) and progress made against those targets. - Please explain target coverage and identify any exclusions"
b_2022 = "C4.1b_C30_Provide details of your emissions intensity target(s) and progress made against those target(s). - Please explain target coverage and identify any exclusions"

I want to check for companies not on the A list in 2021 but on the A list in 2022 in the all_years.csv file, then look for the company in its respective files.

In [15]:

#Read the CSV file
year_1 = "2021"
year_2 = "2022"
a_list_through_years = 'a-list_data_files/all_years.csv'
input_file_1 = "merged_files/" + year_1 + "_merged_dataset.csv"
input_file_2 = "merged_files/" + year_2 + "_merged_dataset.csv"

a_df = pd.read_csv(a_list_through_years)
df_1 = pd.read_csv(input_file_1)
df_2 = pd.read_csv(input_file_2)

# go through a list through years, proceed if it is the company we are looking for
# right now we are looking at companies not on the a list in 2021 but on the a list in 2022
on_list_year_1 = a_df.loc[:,"2021"]
on_list_year_2 = a_df.loc[:,"2022"]
company_name_a_throughout_years = a_df.loc[:,"Company"]

# defining all of year 1 and year 2's lists and variables
a_question_1 = a_2021
b_question_1 = b_2021

a_question_2 = a_2022
b_question_2 = b_2022

company_name_list_1 = df_1.loc[:,"Organization"]
a_response_list_1 = df_1.loc[:,a_question_1]
b_response_list_1 = df_1.loc[:,b_question_1]

company_name_list_2 = df_2.loc[:,"Organization"]
a_response_list_2 = df_2.loc[:,a_question_2]
b_response_list_2 = df_2.loc[:,b_question_2]


output_file = "pairwise_files/across_the_years/2021_not_to_2022_on_2.csv"
fields = ["Company", "Year " + year_1 + " Response 1", "Year " + year_1 + " Response 2", "Year " + year_2 + " Response 1", "Year " + year_2 + " Response 2", "Token 1", "LogProb 1", "Token 2", "LogProb 2"]

with open(output_file, 'w', newline='') as csvfile:
    # creating a csv dict writer object
    writer = csv.DictWriter(csvfile, fieldnames=fields, quoting=csv.QUOTE_ALL)
    writer.writeheader()

    index = 0
    while(index < len(a_df)):
        # check to make sure company is not on A list first year, is on A list second year
        if(pd.isna(on_list_year_1[index]) and not pd.isna(on_list_year_2[index])):
            # also make sure company appears in merged dataset both years (has a response)
            # Find index of company name in company list 1 and 2
            index_1 = next((i for i, val_1 in enumerate(company_name_list_1) if val_1 == company_name_a_throughout_years[index]),None)
            index_2 = next((j for j, val_2 in enumerate(company_name_list_2) if val_2 == company_name_a_throughout_years[index]),None)
            if(index_1 != None and index_2 != None):
                list_log_probs = get_log_prob(a_response_list_1.iloc[index_1], b_response_list_1.iloc[index_1], a_response_list_2.iloc[index_2], b_response_list_2.iloc[index_2], company_name_a_throughout_years.iloc[index])
                
                # swapping order of responses being passed in
                # list_log_probs_swapped = get_log_prob(a_response_list_2.iloc[index_2], b_response_list_2.iloc[index_2], a_response_list_1.iloc[index_1], b_response_list_1.iloc[index_1], company_name_a_throughout_years.iloc[index])

                if list_log_probs:
                    line = {
                        "Company": company_name_a_throughout_years.iloc[index], 
                        "Year " + year_1 + " Response 1": a_response_list_1.iloc[index_1],
                        "Year " + year_1 + " Response 2": b_response_list_1.iloc[index_1],
                        "Year " + year_2 + " Response 1": a_response_list_2.iloc[index_2],
                        "Year " + year_2 + " Response 2": b_response_list_2.iloc[index_2],
                        "Token 1": list_log_probs[0][0], 
                        "LogProb 1": list_log_probs[0][1], 
                        "Token 2": list_log_probs[1][0],
                        "LogProb 2": list_log_probs[1][1]
                    }
                writer.writerow(line)

                # if list_log_probs_swapped: # and len(list_log_probs) >= 2:
                #     # line of information to get written out to file
                #     line = {
                #         "Company": company_name_a_throughout_years.iloc[index], 
                #         "Year " + year_2 + " Response 1": a_response_list_1.iloc[index_1],
                #         "Year " + year_2 + " Response 2": b_response_list_1.iloc[index_1],
                #         "Year " + year_1 + " Response 1": a_response_list_2.iloc[index_2],
                #         "Year " + year_1 + " Response 2": b_response_list_2.iloc[index_2],
                #         "Token 1": list_log_probs_swapped[0][0], 
                #         "LogProb 1": list_log_probs_swapped[0][1], 
                #         "Token 2": list_log_probs_swapped[1][0],
                #         "LogProb 2": list_log_probs_swapped[1][1]
                #     }
                # writer.writerow(line)
        index += 1

    csvfile.close()

print(f"Log Probs across the year have been written to {output_file}")

LogProbs across the year have been written to pairwise_files/across_the_years/2021_not_to_2022_on_2.csv
