In [7]:
import os
import pandas as pd

In [8]:
import os
import openai
import pandas as pd
import time
import datetime

from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider
import re
import os


scope = "api://trapi/.default"
credential = get_bearer_token_provider(ChainedTokenCredential(
    AzureCliCredential(),
    DefaultAzureCredential(
        exclude_cli_credential=True,
        # Exclude other credentials we are not interested in.
        exclude_environment_credential=True,
        exclude_shared_token_cache_credential=True,
        exclude_developer_cli_credential=True,
        exclude_powershell_credential=True,
        exclude_interactive_browser_credential=True,
        exclude_visual_studio_code_credentials=True,
        # DEFAULT_IDENTITY_CLIENT_ID is a variable exposed in
        # Azure ML Compute jobs that has the client id of the
        # user-assigned managed identity in it.
        # See https://learn.microsoft.com/en-us/azure/machine-learning/how-to-identity-based-service-authentication#compute-cluster
        # In case it is not set the ManagedIdentityCredential will
        # default to using the system-assigned managed identity, if any.
        managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"),
    )
),scope)


api_version = '2025-04-01-preview'  # Ensure this is a valid API version see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
model_name = 'gpt-4o'  # Ensure this is a valid model name
model_version = '2024-11-20'  # Ensure this is a valid model version
deployment_name = 'gpt-4o_2024-11-20'  # If your Endpoint doesn't have harmonized deployment names, you can use the deployment name directly: see: https://aka.ms/trapi/models
instance = 'gcr/shared' # See https://aka.ms/trapi/models for the instance name, remove /openai (library adds it implicitly) 
endpoint = f'https://trapi.research.microsoft.com/{instance}'

client = AzureOpenAI(
    azure_endpoint=endpoint,
    azure_ad_token_provider=credential,
    api_version=api_version,
)

In [9]:
response = client.chat.completions.create(
  model=deployment_name,
  messages=[
    {
      "role": "user",
      "content": "I like bagels"
    }
  ],
  temperature=1,
  max_tokens=256,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)
print(response.choices[0].message.content)

Bagels are delicious! ðŸ¥¯ Do you have a favorite bagel flavor or topping? Cream cheese, lox, butter, or maybe something unique?


In [4]:
# Define the main folder and the subdirectories
base_folder = "experiment_name"
# models = ["gpt-3.5-turbo", "gpt-4o-mini", 
models = ["gpt-4o"]


models_to_config = {"gpt-4o": {'model_name': 'gpt-4o', 'model_version': '2024-11-20', 'deployment_name': 'gpt-4o_2024-11-20'}, 
                    "gpt-3.5-turbo": {'model_name': 'gpt-35-turbo', 'model_version': '1106', 'deployment_name': 'gpt-35-turbo_1106'}, 
                    "gpt-4o-mini": {'model_name': 'gpt-4o-mini', 'model_version': '2024-07-18', 'deployment_name': 'gpt-4o-mini_2024-07-18'}}


# Container to store all prompts
model_to_prompt_list = {}

# Loop through each model folder
for model in models:
    
    # redefine the client 
    api_version = '2025-04-01-preview'  # Ensure this is a valid API version see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
    model_name = models_to_config[model]['model_name']  # Ensure this is a valid model name
    model_version = models_to_config[model]['model_version']  # Ensure this is a valid model version
    deployment_name = models_to_config[model]['deployment_name']  # If your Endpoint doesn't have harmonized deployment names, you can use the deployment name directly: see: https://aka.ms/trapi/models
    instance = 'gcr/shared' # See https://aka.ms/trapi/models for the instance name, remove /openai (library adds it implicitly) 
    endpoint = f'https://trapi.research.microsoft.com/{instance}'

    client = AzureOpenAI(
        azure_endpoint=endpoint,
        azure_ad_token_provider=credential,
        api_version=api_version,
    )

    model = model+'.csv'
    file_path = os.path.join(base_folder, model)

    # Read the CSV and extract the 'user_prompt' column
    df = pd.read_csv(file_path)
    # go through every row in the dataframe. extract the value at the user_prompt column 
    for index, row in df.iterrows():
        prompt = "In the following task, when you make a decision format your answer as: CV 1: [score], CV 2: [score]. Decision: [Candidate Name]. " + row['user_prompt'] + "Please only answer with the following format.\nCV 1: [score], CV 2: [score]. Decision: [Candidate Name]."
                
        response = client.chat.completions.create(
                                model=deployment_name,
                                messages = [{"role": "user", "content": prompt}],
                                temperature=row['temperature'],
                                max_tokens=1024,
                                top_p=1,
                                frequency_penalty=0,
                                presence_penalty=0

                            )
        # store the value at response.choices[0].message.content in the value at row['model_response']
        df.at[index, 'model_response'] = response.choices[0].message.content
    # Save the updated DataFrame back to the CSV file
    df.to_csv('score_decision_task_format_'+file_path, index=False)

In [5]:
# Define the main folder and the subdirectories
base_folder = "experiment_name"
# models = ["gpt-3.5-turbo", "gpt-4o-mini", 
models = ["gpt-4o"]


models_to_config = {"gpt-4o": {'model_name': 'gpt-4o', 'model_version': '2024-11-20', 'deployment_name': 'gpt-4o_2024-11-20'}, 
                    "gpt-3.5-turbo": {'model_name': 'gpt-35-turbo', 'model_version': '1106', 'deployment_name': 'gpt-35-turbo_1106'}, 
                    "gpt-4o-mini": {'model_name': 'gpt-4o-mini', 'model_version': '2024-07-18', 'deployment_name': 'gpt-4o-mini_2024-07-18'}}


# Container to store all prompts
model_to_prompt_list = {}

# Loop through each model folder
for model in models:
    

    # redefine the client 
    api_version = '2025-04-01-preview'  # Ensure this is a valid API version see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
    model_name = models_to_config[model]['model_name']  # Ensure this is a valid model name
    model_version = models_to_config[model]['model_version']  # Ensure this is a valid model version
    deployment_name = models_to_config[model]['deployment_name']  # If your Endpoint doesn't have harmonized deployment names, you can use the deployment name directly: see: https://aka.ms/trapi/models
    instance = 'gcr/shared' # See https://aka.ms/trapi/models for the instance name, remove /openai (library adds it implicitly) 
    endpoint = f'https://trapi.research.microsoft.com/{instance}'

    client = AzureOpenAI(
        azure_endpoint=endpoint,
        azure_ad_token_provider=credential,
        api_version=api_version,
    )

    model = model+'.csv'
    file_path = os.path.join(base_folder, model)

    # Read the CSV and extract the 'user_prompt' column
    df = pd.read_csv(file_path)
    # go through every row in the dataframe. extract the value at the user_prompt column 
    for index, row in df.iterrows():
        prompt = "In the following task, when you make a decision format your answer as: CV 1: [score], CV 2: [score]. " + row['user_prompt'] + "Please only answer with the following format.\nCV 1: [score], CV 2: [score]."
                
        response = client.chat.completions.create(
                                model=deployment_name,
                                messages = [{"role": "user", "content": prompt}],
                                temperature=row['temperature'],
                                max_tokens=1024,
                                top_p=1,
                                frequency_penalty=0,
                                presence_penalty=0

                            )
        # store the value at response.choices[0].message.content in the value at row['model_response']
        df.at[index, 'model_response'] = response.choices[0].message.content
    # Save the updated DataFrame back to the CSV file
    df.to_csv('score_task_format_'+file_path, index=False)

In [10]:
# Define the main folder and the subdirectories
base_folder = "experiment_name"
# models = ["gpt-3.5-turbo", "gpt-4o-mini", 
models = ["gpt-4o"]


models_to_config = {"gpt-4o": {'model_name': 'gpt-4o', 'model_version': '2024-11-20', 'deployment_name': 'gpt-4o_2024-11-20'}, 
                    "gpt-3.5-turbo": {'model_name': 'gpt-35-turbo', 'model_version': '1106', 'deployment_name': 'gpt-35-turbo_1106'}, 
                    "gpt-4o-mini": {'model_name': 'gpt-4o-mini', 'model_version': '2024-07-18', 'deployment_name': 'gpt-4o-mini_2024-07-18'}}


# Container to store all prompts
model_to_prompt_list = {}

# Loop through each model folder
for model in models:
    

    # redefine the client 
    api_version = '2025-04-01-preview'  # Ensure this is a valid API version see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
    model_name = models_to_config[model]['model_name']  # Ensure this is a valid model name
    model_version = models_to_config[model]['model_version']  # Ensure this is a valid model version
    deployment_name = models_to_config[model]['deployment_name']  # If your Endpoint doesn't have harmonized deployment names, you can use the deployment name directly: see: https://aka.ms/trapi/models
    instance = 'gcr/shared' # See https://aka.ms/trapi/models for the instance name, remove /openai (library adds it implicitly) 
    endpoint = f'https://trapi.research.microsoft.com/{instance}'

    client = AzureOpenAI(
        azure_endpoint=endpoint,
        azure_ad_token_provider=credential,
        api_version=api_version,
    )

    model = model+'.csv'
    file_path = os.path.join(base_folder, model)

    # Read the CSV and extract the 'user_prompt' column
    df = pd.read_csv(file_path)
    # go through every row in the dataframe. extract the value at the user_prompt column 
    for index, row in df.iterrows():
        prompt = "In the following task, when you make a decision format your answer as: Decision: [Candidate Name]." + row['user_prompt'] + "Please only answer with the following format.\nDecision: [Candidate Name]."
                
        response = client.chat.completions.create(
                                model=deployment_name,
                                messages = [{"role": "user", "content": prompt}],
                                temperature=row['temperature'],
                                max_tokens=1024,
                                top_p=1,
                                frequency_penalty=0,
                                presence_penalty=0

                            )
        # store the value at response.choices[0].message.content in the value at row['model_response']
        df.at[index, 'model_response'] = response.choices[0].message.content
    # Save the updated DataFrame back to the CSV file
    df.to_csv('decision_task_format_'+file_path, index=False)

In [11]:
'''

TO DO: EDIT THIS TO HAVE THE FOLLOWING FUNCTIONALITY: 
1. Read the CSV file for each model
2. grab the response from the 'model_response' column
3. process the response to extract the chosen candidate's first name
4. put the output in a new column called 'chosen_candidate_first_name'
5. identify if this first name matches the value in male_full_name or female_full_name 

'''



import pandas as pd
import os
import time
import random
from pydantic import BaseModel


# Define the main folder and the subdirectories
base_folder = "experiment_name"
models = ["gpt-3.5-turbo"]# , "gpt-4o-mini", "gpt-4o"]


models_to_config = {"gpt-4o": {'model_name': 'gpt-4o', 'model_version': '2024-11-20', 'deployment_name': 'gpt-4o_2024-11-20'}, 
                    "gpt-3.5-turbo": {'model_name': 'gpt-35-turbo', 'model_version': '1106', 'deployment_name': 'gpt-35-turbo_1106'}, 
                    "gpt-4o-mini": {'model_name': 'gpt-4o-mini', 'model_version': '2024-07-18', 'deployment_name': 'gpt-4o-mini_2024-07-18'}}


# Container to store all prompts
model_to_prompt_list = {}

system_prompt = "The text below includes a choice of a job candidate based on a comparison between two candidates. Extract the name of the chosen candidate."


# Loop through each model folder
for model in models:
    
    # redefine the client 
    api_version = '2025-04-01-preview'  # Ensure this is a valid API version see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
    model_name = models_to_config["gpt-4o-mini"]['model_name']  # Ensure this is a valid model name
    model_version = models_to_config["gpt-4o-mini"]['model_version']  # Ensure this is a valid model version
    deployment_name = models_to_config["gpt-4o-mini"]['deployment_name']  # If your Endpoint doesn't have harmonized deployment names, you can use the deployment name directly: see: https://aka.ms/trapi/models
    instance = 'gcr/shared' # See https://aka.ms/trapi/models for the instance name, remove /openai (library adds it implicitly) 
    endpoint = f'https://trapi.research.microsoft.com/{instance}'

    client = AzureOpenAI(
        azure_endpoint=endpoint,
        azure_ad_token_provider=credential,
        api_version=api_version,
    )

    model = model+'.csv'
    file_path = os.path.join(base_folder, model)

    # 1. Read the CSV file for each model
    df = pd.read_csv('ours_'+file_path)

    # go through every row in the dataframe. extract the value at the user_prompt column 
    for index, row in df.iterrows():
        
        # 2. grab the response from the 'model_response' column
        prompt = row['model_response']
                
        response = client.chat.completions.create(
                                model=deployment_name,
                                messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}],
                                temperature=0.3,
                                max_tokens=100,
                                top_p=1,
                                frequency_penalty=0,
                                presence_penalty=0
                            )
        candidate=response.choices[0].message.content
        candidate_first_name = candidate.split(" ")[0]
        candidate_first_name = candidate_first_name.strip()

        # store the value at response.choices[0].message.content in the value at row['model_response']
        df.at[index, 'chosen_candidate_first_name'] = candidate_first_name
        # 5. identify if this first name matches the value in male_full_name or female_full_name 
        if candidate_first_name in row['male_full_name']:
            df.at[index, 'chosen_gender'] = 'Male'
        else: 
            df.at[index, 'chosen_gender'] = 'Female'
        print(candidate_first_name)
        print(df.at[index, 'chosen_gender'])

    # Save the updated DataFrame back to the CSV file
    df.to_csv('ours_'+file_path, index=False)




Kyle
Male
Joan
Female
Michael
Male
Jean
Female
Wayne
Male
Ashley
Female
Roy
Male
Laura
Female
Joyce
Female
Joyce
Female
Jordan
Male
Diana
Female
Terry
Male
Cheryl
Female
Denise
Female
Denise
Female
Bryan
Male
Jennifer
Female
Wayne
Male
Donna
Female
Russell
Male
Peggy
Female
George
Male
Alexandra
Female
Trevor
Male
Patricia
Female
Justin
Male
Brittany
Female
Crystal
Female
Crystal
Female
Travis
Male
Kelly
Female
Howard
Male
Julie
Female
Brandon
Male
Samantha
Female
Susan
Female
Carl
Male
Isabella
Female
Isabella
Female
Carolyn
Female
Carolyn
Female
Carl
Male
Crystal
Female
Kevin
Male
Lori
Female
Brenda
Female
Arthur
Male
Joe
Male
Emily
Female
Timothy
Male
Evelyn
Female
Mark
Male
Melissa
Female
Owen
Male
Alexis
Female
Stacy
Female
Stacy
Female
Eugene
Male
Marilyn
Female
Raymond
Male
Shirley
Female
Christina
Female
Christina
Female
Sean
Male
Tiffany
Female
Rachel
Female
Rachel
Female
Peter
Male
Peter
Male
David
Male
Alexis
Female
Maria
Female
Maria
Female
Amy
Female
Alberto
Male
Alan
Male