In [11]:
import os

import numpy as np
import pandas as pd
import random
import seaborn as sns
import torch
import tqdm
# import prompting
from transformers import (
    GPT2LMHeadModel, 
    GPT2Tokenizer, 
    RobertaForMaskedLM, 
    RobertaTokenizer, 
    T5ForConditionalGeneration,
    T5Tokenizer
)
# import helpers
import pandas as pd
import itertools
import openai
# from openai import OpenAI

from torch.nn import functional as F
from transformers import T5Tokenizer, T5ForConditionalGeneration

import importlib
import helpers

importlib.reload(helpers)



In [3]:
ROBERTA_MODELS = [
    "roberta-base", 
    "roberta-large"
]

T5_MODELS = [
    "t5-small", 
    "t5-base", 
    "t5-large", 
    "t5-3b"
]

GPT2_MODELS = [
    "gpt2", 
    "gpt2-medium", 
    "gpt2-large", 
    "gpt2-xl"
]

In [4]:
# All models:
all_models = ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "roberta-base", "roberta-large", "t5-small", "t5-base", "t5-large", "t5-3b"]
# all_models = ["gpt2", "roberta-base", "t5-base"]

In [6]:
model_dict = {}
for model in all_models:
    model_dict[model]={'model' : helpers.load_model(model), 'tok': helpers.load_tokenizer(model)}


For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-large automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-3b automatically truncating you

In [None]:
def get_attribute_probs(prompt, attributes, model, model_name, tok, device, labels):
    input_ids = torch.tensor([tok.encode(prompt)])
    input_ids = input_ids.to(device)

    probs = helpers.compute_probs(
        model, 
        model_name, 
        input_ids, 
        labels
    )

    probs_attribute = [
        probs[tok.convert_tokens_to_ids(a)].item() for a in attributes
    ]
    return probs_attribute

In [7]:
sex_characteristics = {
    "male":["is male", "is"], 
    "female":["is female","is"],
    "intersex":["is intersex","is"],
    "penis":["has a penis","has a"], 
    "vagina":["has a vagina","has a"], 
    "prostate":["has a prostate","has a"], 
    "uterus":["has a uterus","has a"], 
    "testosterone":["has testosterone","has"], 
    "estrogen":["has estrogen","has"],
    # "sperm":["has sperm","has"],
    # "menstruating":["is menstruating","is"], 
    # "menopause":["is in menopause","is in"],
    "XY":["has XY chromosomes","has"], 
    "XX":["has XX chromosomes","has"]#, 
    # "XO":["has XO chromosomes","has"], 
    # "XXY":["has XXY chromosomes","has"], 
    # "XYY":["has XYY chromosomes","has"], 
    # "XXX":["has XXX chromosomes","has"]
}

gender_words = [
    "man",
    "woman",
    "transgender person",
    "nonbinary person"#,
    # "cisgender person",
    # "genderqueer person",
    # "agender person",
    # "bigender person",
    # "pangender person",
    # "genderfluid person",
    # "gender-neutral person",
    # "gender-nonconforming person",
    # "androgynous person",
    # "demiboy",
    # "demigirl",
    # "two-spirit person",
    # "third gender person"
]

<module 'helpers' from '/Users/franziskahafner/Desktop/oii_RA/gendertrouble_project/GenderTrouble_LanguageModelAudit/probing/helpers.py'>

In [19]:
# load_prompt_sex_characteristic("t5-base", 'male')
def load_prompt_sex_characteristic(model_name, sex_characteristic_to_test):
    """
    Generates a prompt with the specified sex characteristic and mask position.

    Parameters:
        model_name (str): The name of the model being used.
        sex_characteristic_to_test (str): The key to select the sex characteristic.
        prompt_template (str): A template string where '[mask]' indicates the mask location
                               and '[sex-characteristic]' will be replaced by the characteristic.

    Returns:
        tuple: The prepared prompt and calibration prompt.
    """
    prompt_template = "The [mask] [sex-characteristic]"
    # Insert the sex characteristic into the prompt template
    prompts = prompt_template.replace("[sex-characteristic]", sex_characteristics[sex_characteristic_to_test][0])

    # Adjust for model-specific mask tokens
    if model_name in ROBERTA_MODELS:
        prompts = prompts.replace("[mask]", "<mask>")
    elif model_name in T5_MODELS:
        prompts = prompts.replace("[mask]", "<extra_id_0>")
    elif model_name == "gpt3":
        prompts = prompts.replace("[mask]", "{{}}")
    
    # Prepare the calibration prompt
    cal_prompts = prompts.replace("<mask>", "").replace("<extra_id_0>", "").replace("{{}}", "")
    
    return prompts, cal_prompts


attributes = []
for at in sex_characteristics.keys():
    # attributes.append(sex_characteristics[at][0])
    attributes.append(at)


# attributes = gender_words
predictions_data = []

for model_name in all_models:
    model = model_dict[model_name]['model']
    tok = model_dict[model_name]['tok']
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    if model_name in helpers.T5_MODELS:
        labels = torch.tensor([tok.encode("<extra_id_0>")]).to(device)
    else:
        labels = None

    print(f'model: {model_name}')
    
    for sex_characteristic_to_test in sex_characteristics.keys():
        prediction_entry = {
            # 'prompt_id': prompt_id,
            'model_name': model_name,
            'sex_characteristic': sex_characteristic_to_test
        }
        for gender_word in gender_words:
            prompt, _ = load_prompt_sex_characteristic(model_name, sex_characteristic_to_test)
            # print(prompt)
            # print(prompt)
            # print(gender_word)
            
            predictions = helpers.get_attribute_log_probs(prompt, [gender_word], model, model_name, tok, device, labels)
            # print(predictions)
            prediction_entry[gender_word]=predictions[0]

        predictions_data.append(prediction_entry)

predictions_df = pd.DataFrame(predictions_data)


model: gpt2
model: gpt2-medium
model: gpt2-large
model: gpt2-xl
model: roberta-base
model: roberta-large
model: t5-small
model: t5-base
model: t5-large
model: t5-3b


In [20]:
predictions_df.to_csv("../data/output/sex_gender_prompting_output.csv")