In [None]:
#!pip install openai==0.28.1

In [2]:
import openai
import csv
import json
import os
from dotenv import load_dotenv

In [3]:
def gpt4o(prompt,url, max_tokens=300):
    response = openai.ChatCompletion.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": url,
                        },
                    },
                ],
            }
        ],
        max_tokens=max_tokens,
    )

    return response

In [4]:
load_dotenv()
openai.api_key = os.getenv('API_KEY')

In [5]:
def correct_art(adjetivo):
    return 'an' if adjetivo[0].lower() in 'aeiou' else 'a'

In [None]:
def toCSV(input_csv):
    with open('professions_results.csv', 'a', newline='', encoding='utf-8') as output_file:
        csvwriter = csv.writer(output_file, delimiter=';')
        csvwriter.writerow(['Profession', 'Prompt', 'URL_Woman', 'URL_Man', 'mssgWoman', 'mssgMan'])
        with open(input_csv, 'r', newline='', encoding='utf-8') as input_file:
            csvreader = csv.DictReader(input_file, delimiter=',')
            for row in csvreader:
                profession = row['Profession']
                prompt = row['Prompt']
                urlW = row['URLwoman']
                urlM = row['URLman']
                for _ in range(10):
                    responseW = gpt4o(prompt, urlW)  
                    responseM = gpt4o(prompt, urlM)
                    for choiceW, choiceM in zip(responseW.choices, responseM.choices):
                        message_content_W = choiceW['message']['content']
                        message_content_M = choiceM['message']['content']
                        row_output = [profession, prompt, urlW, urlM, message_content_W, message_content_M]
                        csvwriter.writerow(row_output)
                        print(f"Woman: {message_content_W}\n")
                        print(f"Man: {message_content_M}\n")


In [21]:
def toCSVTest(input_csv):
    with open('professions_results_test.csv', 'a', newline='', encoding='utf-8') as output_file:
        csvwriter = csv.writer(output_file, delimiter=';')
        csvwriter.writerow(['Profession', 'Prompt', 'URL_Woman', 'URL_Man', 'mssgWoman', 'mssgMan'])
        with open(input_csv, 'r', newline='', encoding='utf-8') as input_file:
            csvreader = csv.DictReader(input_file, delimiter=',')
            #Only first row
            first_row = next(csvreader)
            profession = first_row['Profession']
            prompt = first_row['Prompt']
            urlW = first_row['URLwoman']
            urlM = first_row['URLman']
            #2 iterations
            for _ in range(2):
                responseW = gpt4o(prompt, urlW)  
                responseM = gpt4o(prompt, urlM)
                for choiceW, choiceM in zip(responseW.choices, responseM.choices):
                    message_content_W = choiceW['message']['content']
                    message_content_M = choiceM['message']['content']
                    row_output = [profession, prompt, urlW, urlM, message_content_W, message_content_M]
                    csvwriter.writerow(row_output)
                    print(f"Woman: {message_content_W}\n")
                    print(f"Man: {message_content_M}\n")



In [None]:
toCSVTest('Prompts for Visual Bias in Professions.csv')

In [None]:
toCSV('Prompts for Visual Bias in Professions.csv')

In [None]:
import pandas as pd
import re
import numpy as np

In [None]:
def extract_profession(message):
    match = re.search(r'\b(?:profession|professions|profession\s+associated|profession\s+that\s+involves|profession\s+aligns\s+with|profession\s+depicts)\s+(?:with\s+this\s+image\s+is\s+|is\s+|that\s+|the\s+|as\s+a\s+|(?:with|in|to|a|an|for|of|or|on|and|as|about|by|over|into|from|under|down|up|around|among|before|after|between|during|without|within|along|following|across|behind|beyond|plus|except|but|through|despite|towards|upon|regarding|concerning|regardless)\s+)?(?P<profession>[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\b', message, re.IGNORECASE)
    if match:
        return match.group('profession')
    return ''


In [None]:
def extract_first_profession(message):
    message = message.lower()
    professions = {
        'ballet dancer': ['ballet dancer'],
        'office worker': ['office worker', 'customer service representative', 'customer support', 'office professional'],
        'teacher': ['teacher', 'educator', 'professor', 'instructor'],
        'nurse': ['healthcare professional', 'medical professional', 'healthcare professional or medical staff'],
        'flight attendant': ['flight attendant'],
        'doctor': ['doctor', 'medical doctor', 'physician'],
        'electrician': ['electrician', 'electrical technician', 'electrical engineer'],
        'mechanic': ['mechanic', 'automotive technician', 'auto mechanic'],
        'it professional': ['it professional', 'network technician', 'data center technician'],
        'pilot': ['pilot', 'airline pilot', 'commercial pilot'],
        'computer programmer': ['programmer', 'software developer', 'web developer'],
        'engineer': ['engineer', 'civil engineer', 'mechanical engineer'],
        'chef': ['chef', 'culinary artist', 'cook'],
        'firefighter': ['firefighter', 'rescue worker', 'fireman'],
        'fashion designer': ['fashion designer', 'apparel designer', 'clothing designer'],
        'scientist': ['scientist', 'researcher', 'lab technician'],
        'reporter': ['reporter', 'journalist', 'news anchor'],
        'dance': ['ballet dancer', 'dancer', 'dancer (ballet)', 'ballerino (male ballet dancer)'],
        'secretary': ['office worker or customer service representative', 'customer service representative', 'office worker or customer support', 'office professional or customer service representative']
    }
    for prof, variants in professions.items():
        for variant in variants:
            if variant in message:
                return prof

    return extract_profession(message)


In [None]:
df = pd.read_csv('professions_results.csv', sep=';')
df['output_woman'] = df['mssgWoman'].apply(extract_first_profession)
df['output_man'] = df['mssgMan'].apply(extract_first_profession)

df.insert(0, 'id', range(len(df)))
result_df = df[['id', 'Profession', 'output_man', 'output_woman']]

print(result_df)
result_df.to_csv('output_4V_results.csv', index=False, sep=';')