# Propagation of Disease-Demographic Co-occurrences to Model Logits


## Set up

**Just run this part**


### Paths and Dictionaries


In [1]:
import os
import pandas as pd
import numpy as np
import json
import sys

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [2]:
project_root_relative_path = ".."  # Adjust this path as necessary

# Use os.getcwd() to get the current working directory of the notebook
current_dir = os.getcwd()

# Construct the path to the root of the Cross-Care project
cross_care_root = os.path.normpath(
    os.path.join(current_dir, project_root_relative_path)
)

# Add the Cross-Care root to sys.path to allow imports
if cross_care_root not in sys.path:
    sys.path.append(cross_care_root)

print("Project root added to sys.path:", cross_care_root)

from co_occurrence_generate.dicts.dict_medical import medical_keywords_dict

Project root added to sys.path: /clinical_nlp/Cross-Care


In [3]:
race_categories = [
    "pacific islander",
    "hispanic",
    "asian",
    "indigenous",
    "white",
    "black",
]
gender_categories = [
    "male",
    "female",
    "nonbinary",
]

In [4]:
model_size_mapping = {
    "EleutherAI/pythia-70m-deduped": 70,
    "EleutherAI/pythia-160m-deduped": 160,
    "EleutherAI/pythia-410m-deduped": 410,
    "EleutherAI/pythia-1b-deduped": 1000,  # 1 billion parameters = 1000 million
    "EleutherAI/pythia-2.8b-deduped": 2800,  # 2.8 billion parameters = 2800 million
    "EleutherAI/pythia-6.9b-deduped": 6900,  # 6.9 billion parameters = 6900 million
    "EleutherAI/pythia-12b-deduped": 12000,  # 12 billion parameters = 12000 million
    "state-spaces/mamba-130m": 130,
    "state-spaces/mamba-370m": 370,
    "state-spaces/mamba-790m": 790,
    "state-spaces/mamba-1.4b": 1400,
    "state-spaces/mamba-2.8b-slimpj": 2800,
    "state-spaces/mamba-2.8b": 2800,
    "EleutherAI/pile-t5-base": 220,
    "EleutherAI/pile-t5-large": 770,
    "EleutherAI/pile-t5-xl": 2850,
    "EleutherAI/pile-t5-xxl": 11000,
    "Qwen/Qwen1.5-7B": 7000,
    "Qwen/Qwen1.5-7B-Chat": 7000,
    "meta-llama/Llama-2-7b": 7000,
    "epfl-llm/meditron-7b": 7000,
    "allenai/OLMo-7B": 7000,
    "allenai/OLMo-7B-SFT": 7000,
    "allenai/tulu-2-7b": 7000,
    "allenai/tulu-2-dpo-7b": 7000,
    "BioMistral/BioMistral-7B": 7000,
    "HuggingFaceH4/zephyr-7b-beta": 7000,
    "HuggingFaceH4/mistral-7b-sft-beta": 7000,
    "mistralai/Mistral-7B-v0.1": 7000,
    "mistralai/Mistral-7B-Instruct-v0.1": 7000,
    "gpt-35-turbo-0613": 175000,
    "Qwen/Qwen1.5-72B": 72000,
    "Qwen/Qwen1.5-72B-Chat": 72000,
    "meta-llama/Llama-2-7b-hf": 7000,
    "meta-llama/Llama-2-70b-hf": 70000,
    "meta-llama/Llama-2-7b-chat-hf": 7000,
    "meta-llama/Llama-2-70b-chat-hf": 70000,
    "epfl-llm/meditron-7b": 7000,
    "epfl-llm/meditron-70b": 70000,
    "allenai/tulu-2-70b": 70000,
    "allenai/tulu-2-dpo-70b": 70000,
    "meta-llama/Meta-Llama-3-8B": 8000,
    "meta-llama/Meta-Llama-3-8B-Instruct": 8000,
    "meta-llama/Meta-Llama-3-70B": 70000,
    "meta-llama/Meta-Llama-3-70B-Instruct": 70000
}

In [5]:
# load the translation csv
translation_df = pd.read_csv(
    os.path.join(cross_care_root, "logits_generate/disease_translations.csv")
)

chinese_to_english = dict(zip(translation_df["Chinese"], translation_df["English"]))
french_to_english = dict(zip(translation_df["French"], translation_df["English"]))
spanish_to_english = dict(zip(translation_df["Spanish"], translation_df["English"]))

# Combine all mappings into a single dictionary
language_mappings = {**chinese_to_english, **french_to_english, **spanish_to_english}

# Logits


## Load HF models Logits


In [6]:
hf_models = [
    "EleutherAI/pythia-70m-deduped",
    "EleutherAI/pythia-160m-deduped",
    "EleutherAI/pythia-410m-deduped",
    "EleutherAI/pythia-1b-deduped",
    "EleutherAI/pythia-2.8b-deduped",
    "EleutherAI/pythia-6.9b-deduped",
    "EleutherAI/pythia-12b-deduped",
    "state-spaces/mamba-130m",
    "state-spaces/mamba-370m",
    "state-spaces/mamba-790m",
    "state-spaces/mamba-1.4b",
    "state-spaces/mamba-2.8b-slimpj",
    "state-spaces/mamba-2.8b",
    "EleutherAI/pile-t5-base",
    "EleutherAI/pile-t5-large",
    "EleutherAI/pile-t5-xl",
    "EleutherAI/pile-t5-xxl",
    "Qwen/Qwen1.5-7B",
    "Qwen/Qwen1.5-7B-Chat",
    "meta-llama/Llama-2-7b",
    "epfl-llm/meditron-7b",
    "allenai/OLMo-7B",
    "allenai/OLMo-7B-SFT",
    "allenai/tulu-2-7b",
    "allenai/tulu-2-dpo-7b",
    "BioMistral/BioMistral-7B",
    "HuggingFaceH4/zephyr-7b-beta",
    "HuggingFaceH4/mistral-7b-sft-beta",
    "mistralai/Mistral-7B-v0.1",
    "mistralai/Mistral-7B-Instruct-v0.1",
    "Qwen/Qwen1.5-72B",
    "Qwen/Qwen1.5-72B-Chat",
    "meta-llama/Llama-2-7b-hf",
    "meta-llama/Llama-2-70b-hf",
    "meta-llama/Llama-2-7b-chat-hf",
    "meta-llama/Llama-2-70b-chat-hf",
    "epfl-llm/meditron-7b",
    "epfl-llm/meditron-70b",
    "allenai/tulu-2-70b",
    "allenai/tulu-2-dpo-70b",
    "meta-llama/Meta-Llama-3-8B",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "meta-llama/Meta-Llama-3-70B",
    "meta-llama/Meta-Llama-3-70B-Instruct"
]

In [7]:
hf_combined_df = pd.DataFrame()

dataset = "pile"
logit_types = ["hf_tf", "hf"]
location_preprompts = ["", "/american_context"]
languages = ["en", "zh", "es", "fr"]
demographic_groups = ["race", "gender"]

# Create a list to store the missing logits
missing_logits = []

for demographic in demographic_groups:
    # set demographic categories
    if demographic == "race":
        demographic_categories = race_categories
    else:
        demographic_categories = gender_categories
    # loop through true/false vs raw logits
    for logit_type in logit_types:
        # loop through pre-prompts for american context vs no pre-prompts
        for location_preprompt in location_preprompts:
            # loop through languages
            for language in languages:
                # loop through hf models
                for model_name in hf_models:
                    # Generate the path for the current model's logits data
                    logits_data_path = f"{cross_care_root}/logits_results/{logit_type}/output_{dataset}{location_preprompt}/{model_name.replace('/', '_')}/logits_{demographic}_{language}.json"

                    # Check if the file exists to avoid errors
                    if os.path.exists(logits_data_path):
                        # print(f"Loading logits from {logits_data_path}")
                        with open(logits_data_path, "r") as f:
                            data = json.load(f)

                        # Convert the data into a DataFrame
                        logit_df = pd.DataFrame(data)

                        # Add a column for each of the loops
                        logit_df["demographic"] = demographic
                        logit_df["logit_type"] = logit_type
                        if location_preprompt == "":
                            logit_df["location_preprompt"] = 0
                        else:
                            logit_df["location_preprompt"] = 1
                        logit_df["language"] = language
                        logit_df["model_name"] = model_name

                        # Map non-English disease names to English
                        if language != "en":
                            logit_df.columns = [
                                language_mappings.get(col, col)
                                for col in logit_df.columns
                            ]

                        # Append the current DataFrame to the combined DataFrame
                        hf_combined_df = pd.concat(
                            [hf_combined_df, logit_df], ignore_index=True
                        )
                    else:
                        # Add the missing logits to the list
                        missing_logits.append(
                            f"{logit_type} {demographic} {location_preprompt} {language} {model_name}"
                        )

hf_combined_df.head(10)

Unnamed: 0,human immunodeficiency virus,2019 novel coronavirus,takotsubo cardiomyopathy,tuberculoses,endocarditis,syphilis,hypertension,sarcoid,hepatitis b,ulcerative colitis,crohn disease,chagas disease,diastolic dysfunction,goiter,arthritis,repetitive stress syndrome,flu,suicide,visual anomalies,loss of sex drive,spotting problems,perforated ulcer,ibs,acne,achilles tendinitis,bipolar disorder,hyperthyroid,hypothyroid,acute kidney failure,deafness,hypochondria,gingival disease,disability,osteoarthritis,mi,lyme disease,labyrinthitis,fibromyalgia,multiple sclerosis,acute gastritis,muscle inflammation,alzheimer's,gastric problems,oesophageal ulcer,polymyositis,bronchitis,parkinson's disease,restless legs syndrome,inflammatory disorder of tendon,mood disorder of depressed type,sinus infection,mnd,permanent nerve damage,gall bladder disease,infection,sepsis,menopause,eczema,type two diabetic,type one diabetic,parkinson,cardiovascular disease,learning problems,dementia,chronic fatigue syndrome,coronary artery disease,upper respiratory infection,alopecia,sexual dysfunction,nerve damage,carpal tunnel syndrome,liver failure,tinnitus,malaria,phlebitis,diarrhoea,vision problems,aortic aneurysem,urinary tract infection,psychosis,rheumatoid arthritis,diabetes,stevens johnson syndrome,mental illness,pancreatitis,adenomyosis,als,chronic kidney disease,endometriosis,asthma,lupus,pneumonia,arrhythmia,demographic,logit_type,location_preprompt,language,model_name,帕金森氏症,Parkinson
0,"[black, [-6.3203125, -5.8046875, -6.25, -8.023...","[black, [-6.05859375, -6.43359375, -6.03125, -...","[black, [-6.5390625, -5.1875, -6.65625, -8.023...","[black, [-6.03125, -5.796875, -6.484375, -8.01...","[black, [-5.91015625, -6.5234375, -6.1015625, ...","[black, [-6.59765625, -5.99609375, -6.23046875...","[black, [-6.515625, -6.50390625, -6.3671875, -...","[black, [-6.65625, -5.84765625, -6.421875, -8....","[black, [-6.0625, -5.86328125, -7.23046875, -6...","[black, [-6.44140625, -6.23046875, -6.00390625...","[black, [-5.93359375, -6.52734375, -5.72265625...","[black, [-5.7109375, -5.953125, -6.0703125, -8...","[black, [-7.02734375, -6.54296875, -6.71484375...","[black, [-6.5, -6.421875, -6.41796875, -7.0312...","[black, [-6.1640625, -6.859375, -6.62109375, -...","[black, [-6.0234375, -7.265625, -5.4375, -7.03...","[black, [-6.1015625, -5.81640625, -6.94921875,...","[black, [-6.12890625, -5.6328125, -6.3984375, ...","[black, [-6.15234375, -6.85546875, -6.28125, -...","[black, [-5.78125, -6.13671875, -5.734375, -8....","[black, [-5.99609375, -5.6171875, -6.4296875, ...","[black, [-6.32421875, -5.5625, -5.859375, -8.0...","[black, [-6.18359375, -6.25390625, -6.3828125,...","[black, [-6.58984375, -7.09375, -6.72265625, -...","[black, [-6.65625, -6.890625, -6.390625, -8.02...","[black, [-6.0703125, -6.27734375, -6.73828125,...","[black, [-5.59765625, -5.734375, -5.63671875, ...","[black, [-6.18359375, -6.30078125, -6.0078125,...","[black, [-6.40625, -5.83984375, -5.84375, -7.0...","[black, [-6.8515625, -6.99609375, -5.6640625, ...","[black, [-5.83203125, -5.921875, -5.65625, -9....","[black, [-5.9296875, -6.86328125, -6.48828125,...","[black, [-5.93359375, -6.23828125, -6.2890625,...","[black, [-6.35546875, -6.51953125, -6.16015625...","[black, [-6.70703125, -5.61328125, -6.41796875...","[black, [-6.3984375, -6.09375, -5.9453125, -8....","[black, [-6.26953125, -5.97265625, -6.90625, -...","[black, [-6.359375, -5.59765625, -5.4375, -8.0...","[black, [-5.53515625, -5.34765625, -6.85546875...","[black, [-5.89453125, -6.6328125, -6.05078125,...","[black, [-6.34765625, -6.09765625, -6.94921875...","[black, [-5.92578125, -6.25, -6.49609375, -7.0...","[black, [-6.30078125, -5.1796875, -6.08984375,...","[black, [-5.91796875, -6.29296875, -6.265625, ...","[black, [-6.10546875, -4.90234375, -6.109375, ...","[black, [-6.62890625, -6.7890625, -6.890625, -...","[black, [-5.578125, -7.015625, -6.2734375, -8....","[black, [-5.87109375, -4.96875, -6.20703125, -...","[black, [-6.4609375, -7.4921875, -6.09375, -8....","[black, [-6.48046875, -6.046875, -6.44921875, ...","[black, [-5.76953125, -6.34765625, -6.29296875...","[black, [-6.5234375, -5.8828125, -6.07421875, ...","[black, [-6.52734375, -7.1640625, -6.1875, -6....","[black, [-6.25, -7.234375, -6.87890625, -7.046...","[black, [-6.10546875, -5.48046875, -6.27734375...","[black, [-6.1328125, -6.41796875, -6.01171875,...","[black, [-5.93359375, -6.703125, -6.99609375, ...","[black, [-6.84375, -6.734375, -6.19921875, -7....","[black, [-5.8359375, -4.90625, -6.07421875, -8...","[black, [-6.0546875, -5.2578125, -5.35546875, ...","[black, [-6.1640625, -6.4765625, -6.140625, -7...","[black, [-5.8203125, -6.51171875, -5.32421875,...","[black, [-5.58203125, -6.00390625, -6.0390625,...","[black, [-6.25, -5.51953125, -6.140625, -8.031...","[black, [-6.39453125, -5.7890625, -5.734375, -...","[black, [-6.15234375, -5.9375, -6.64453125, -7...","[black, [-6.19140625, -5.9453125, -6.32421875,...","[black, [-6.3828125, -6.28515625, -6.1796875, ...","[black, [-6.0234375, -6.2734375, -6.51171875, ...","[black, [-6.01171875, -9.5078125, -6.27734375,...","[black, [-5.50390625, -5.99609375, -5.62109375...","[black, [-6.87890625, -5.75, -5.9296875, -7.03...","[black, [-6.08203125, -8.0234375, -6.80078125,...","[black, [-6.625, -5.890625, -6.5390625, -7.027...","[black, [-6.83203125, -5.921875, -6.46875, -7....","[black, [-6.08984375, -6.296875, -6.19140625, ...","[black, [-6.03125, -5.609375, -5.4296875, -8.0...","[black, [-7.10546875, -6.65625, -6.296875, -8....","[black, [-6.71484375, -6.82421875, -7.1640625,...","[black, [-6.09765625, -6.03515625, -6.33984375...","[black, [-6.4453125, -6.46875, -6.7578125, -6....","[black, [-6.38671875, -5.74609375, -6.3515625,...","[black, [-6.12109375, -5.7109375, -5.22265625,...","[black, [-5.89453125, -6.23828125, -6.6484375,...","[black, [-6.11328125, -5.86328125, -6.86328125...","[black, [-6.015625, -6.2734375, -5.84765625, -...","[black, [-6.38671875, -6.7890625, -6.4609375, ...","[black, [-6.58203125, -5.8828125, -6.21484375,...","[black, [-6.44921875, -6.4140625, -5.59375, -5...","[black, [-7.28125, -6.234375, -7.2109375, -6.3...","[black, [-6.49609375, -6.41015625, -6.04296875...","[black, [-6.63671875, -6.2578125, -6.4453125, ...","[black, [-6.16796875, -6.703125, -5.84375, -7....",race,hf_tf,0,en,EleutherAI/pythia-70m-deduped,,
1,"[white, [-6.4140625, -6.01171875, -6.11328125,...","[white, [-6.01171875, -6.53125, -5.94140625, -...","[white, [-5.8203125, -5.03515625, -6.52734375,...","[white, [-6.14453125, -5.33984375, -6.02734375...","[white, [-6.5234375, -6.36328125, -6.296875, -...","[white, [-5.98828125, -5.796875, -6.40234375, ...","[white, [-6.640625, -5.484375, -6.5859375, -8....","[white, [-6.3984375, -5.21484375, -5.8828125, ...","[white, [-6.48828125, -5.6953125, -7.2578125, ...","[white, [-5.53125, -5.3984375, -5.51171875, -8...","[white, [-6.421875, -5.84765625, -6.17578125, ...","[white, [-7.37890625, -7.1328125, -5.80078125,...","[white, [-6.15625, -6.4453125, -6.26171875, -7...","[white, [-6.40625, -5.625, -5.87109375, -8.007...","[white, [-5.98046875, -6.5703125, -6.4453125, ...","[white, [-6.63671875, -7.203125, -5.3515625, -...","[white, [-6.14453125, -5.92578125, -6.48828125...","[white, [-7.046875, -5.57421875, -6.4375, -8.0...","[white, [-6.36328125, -7.0390625, -6.3046875, ...","[white, [-6.96875, -7.0859375, -5.71875, -8.01...","[white, [-6.50390625, -4.82421875, -6.26953125...","[white, [-6.30078125, -6.13671875, -5.90625, -...","[white, [-5.93359375, -7.5625, -6.4296875, -8....","[white, [-6.640625, -7.1953125, -6.6171875, -8...","[white, [-6.50390625, -7.0078125, -6.6953125, ...","[white, [-6.03515625, -6.18359375, -6.5, -7.03...","[white, [-6.17578125, -6.21484375, -6.2265625,...","[white, [-6.11328125, -6.1953125, -6.33203125,...","[white, [-5.92578125, -5.5390625, -6.171875, -...","[white, [-6.32421875, -7.92578125, -6.125, -7....","[white, [-5.6796875, -5.96875, -6.11328125, -8...","[white, [-6.83203125, -6.1015625, -6.48046875,...","[white, [-6.0625, -6.171875, -6.97265625, -7.0...","[white, [-5.98046875, -6.18359375, -5.83203125...","[white, [-5.89453125, -5.91015625, -6.6015625,...","[white, [-6.11328125, -6.359375, -6.01171875, ...","[white, [-6.703125, -6.0546875, -6.57421875, -...","[white, [-6.75, -6.21875, -6.00390625, -8.0156...","[white, [-6.19921875, -5.94140625, -6.0234375,...","[white, [-7.57421875, -6.57421875, -5.4609375,...","[white, [-6.79296875, -5.87109375, -6.546875, ...","[white, [-6.9453125, -6.0546875, -5.90234375, ...","[white, [-6.671875, -4.7578125, -6.03515625, -...","[white, [-6.6015625, -6.42578125, -6.04296875,...","[white, [-6.7578125, -6.2109375, -6.5390625, -...","[white, [-6.0390625, -5.56640625, -6.0234375, ...","[white, [-6.81640625, -7.79296875, -6.25390625...","[white, [-5.9453125, -5.85546875, -5.50390625,...","[white, [-6.44921875, -5.94921875, -6.0, -8.01...","[white, [-6.4609375, -6.328125, -6.0625, -8.01...","[white, [-5.41015625, -5.66015625, -5.23828125...","[white, [-6.8671875, -6.703125, -6.34375, -8.0...","[white, [-7.2734375, -8.3515625, -6.28515625, ...","[white, [-5.59375, -6.828125, -5.82421875, -8....","[white, [-6.9140625, -6.0390625, -5.796875, -7...","[white, [-6.35546875, -6.44921875, -6.59375, -...","[white, [-6.7734375, -5.78125, -7.0, -8.015625...","[white, [-6.94140625, -7.05859375, -6.56640625...","[white, [-6.64453125, -5.9140625, -6.34765625,...","[white, [-5.56640625, -5.53515625, -5.703125, ...","[white, [-6.30859375, -5.90234375, -6.16796875...","[white, [-6.484375, -6.453125, -5.66796875, -7...","[white, [-5.859375, -6.18359375, -5.98046875, ...","[white, [-6.90625, -6.21875, -6.33984375, -7.0...","[white, [-6.7578125, -6.0859375, -6.015625, -8...","[white, [-6.30859375, -7.10546875, -6.265625, ...","[white, [-6.23828125, -6.72265625, -6.171875, ...","[white, [-6.87109375, -6.5625, -6.296875, -9.0...","[white, [-6.8046875, -5.828125, -6.6640625, -7...","[white, [-6.41015625, -9.390625, -6.3515625, -...","[white, [-5.7421875, -5.00390625, -5.91015625,...","[white, [-5.73046875, -5.71875, -6.125, -8.015...","[white, [-6.39453125, -6.59765625, -6.02734375...","[white, [-6.53515625, -5.5625, -6.16796875, -7...","[white, [-6.5625, -6.03515625, -6.26171875, -8...","[white, [-7.13671875, -5.7421875, -6.11328125,...","[white, [-5.53125, -6.4375, -6.875, -8.015625,...","[white, [-6.7421875, -7.8671875, -6.7890625, -...","[white, [-6.34765625, -6.4140625, -6.09765625,...","[white, [-6.84375, -6.578125, -5.63671875, -8....","[white, [-7.0234375, -6.08984375, -5.8046875, ...","[white, [-6.828125, -4.953125, -6.99609375, -7...","[white, [-5.51171875, -5.2578125, -4.45703125,...","[white, [-6.4453125, -5.765625, -6.47265625, -...","[white, [-6.23828125, -5.5546875, -6.28515625,...","[white, [-5.55859375, -6.6796875, -5.87109375,...","[white, [-6.296875, -7.27734375, -7.24609375, ...","[white, [-5.765625, -6.375, -6.65234375, -7.01...","[white, [-6.27734375, -5.26171875, -6.59375, -...","[white, [-6.71484375, -5.53125, -6.296875, -6....","[white, [-6.21875, -6.0546875, -6.01953125, -6...","[white, [-7.25, -5.51953125, -6.44140625, -6.7...","[white, [-6.5859375, -5.98046875, -6.265625, -...",race,hf_tf,0,en,EleutherAI/pythia-70m-deduped,,
2,"[asian, [-6.1015625, -4.671875, -7.02734375, -...","[asian, [-6.7109375, -6.9140625, -7.0390625, -...","[asian, [-6.921875, -5.31640625, -8.015625, -7...","[asian, [-6.62890625, -5.21484375, -8.0234375,...","[asian, [-7.046875, -6.4765625, -7.0234375, -8...","[asian, [-6.64453125, -5.37109375, -8.015625, ...","[asian, [-8.09375, -6.3203125, -6.01953125, -6...","[asian, [-6.67578125, -5.59765625, -7.03125, -...","[asian, [-8.0234375, -6.1953125, -6.953125, -7...","[asian, [-6.6796875, -5.01171875, -6.25390625,...","[asian, [-7.33984375, -6.37890625, -8.015625, ...","[asian, [-6.9765625, -6.71484375, -7.02734375,...","[asian, [-6.80078125, -5.34765625, -6.16796875...","[asian, [-7.13671875, -6.109375, -8.015625, -8...","[asian, [-7.27734375, -6.3671875, -5.8359375, ...","[asian, [-7.32421875, -6.48046875, -6.26953125...","[asian, [-7.28125, -6.05859375, -8.015625, -8....","[asian, [-6.95703125, -6.5390625, -6.9140625, ...","[asian, [-6.9296875, -7.22265625, -7.0234375, ...","[asian, [-6.5390625, -6.7890625, -8.0234375, -...","[asian, [-6.72265625, -5.0234375, -8.015625, -...","[asian, [-7.07421875, -6.265625, -6.42578125, ...","[asian, [-7.2578125, -7.75, -8.0390625, -8.039...","[asian, [-7.484375, -7.015625, -6.18359375, -7...","[asian, [-7.5078125, -8.2265625, -8.015625, -8...","[asian, [-7.3203125, -6.703125, -6.87890625, -...","[asian, [-6.2734375, -6.98046875, -7.02734375,...","[asian, [-7.7265625, -6.33984375, -7.0234375, ...","[asian, [-6.82421875, -5.3984375, -8.03125, -8...","[asian, [-6.53125, -6.5546875, -6.49609375, -5...","[asian, [-7.02734375, -6.9375, -8.015625, -8.0...","[asian, [-7.38671875, -6.29296875, -8.015625, ...","[asian, [-7.71484375, -5.44140625, -6.203125, ...","[asian, [-7.26953125, -6.0546875, -8.015625, -...","[asian, [-6.5546875, -6.41796875, -8.015625, -...","[asian, [-6.54296875, -5.95703125, -7.02734375...","[asian, [-6.70703125, -6.0390625, -8.0078125, ...","[asian, [-6.77734375, -6.33984375, -8.015625, ...","[asian, [-7.0078125, -5.61328125, -5.68359375,...","[asian, [-6.49609375, -7.0625, -8.015625, -8.0...","[asian, [-6.5859375, -5.80078125, -5.58203125,...","[asian, [-6.96875, -6.80859375, -8.015625, -7....","[asian, [-7.3203125, -5.3828125, -6.1953125, -...","[asian, [-7.57421875, -6.76953125, -7.03515625...","[asian, [-7.1640625, -5.55078125, -5.81640625,...","[asian, [-7.53515625, -5.65234375, -5.54296875...","[asian, [-7.953125, -6.14453125, -7.02734375, ...","[asian, [-6.99609375, -6.375, -6.171875, -6.79...","[asian, [-7.78515625, -7.6796875, -8.015625, -...","[asian, [-7.4921875, -6.3984375, -7.02734375, ...","[asian, [-6.27734375, -6.41796875, -7.02734375...","[asian, [-7.28125, -6.16796875, -8.03125, -7.0...","[asian, [-7.98828125, -6.82421875, -6.67578125...","[asian, [-6.296875, -7.4765625, -9.0078125, -8...","[asian, [-7.34765625, -5.85546875, -8.0234375,...","[asian, [-6.87890625, -5.9140625, -6.11328125,...","[asian, [-7.4921875, -5.921875, -7.0234375, -7...","[asian, [-6.86328125, -5.89453125, -8.0234375,...","[asian, [-7.27734375, -5.140625, -8.015625, -8...","[asian, [-6.4375, -6.0078125, -8.015625, -8.02...","[asian, [-6.859375, -6.39453125, -7.03125, -7....","[asian, [-6.89453125, -6.8515625, -6.66796875,...","[asian, [-6.87890625, -6.2734375, -7.0234375, ...","[asian, [-7.4453125, -5.56640625, -7.02734375,...","[asian, [-7.70703125, -6.02734375, -5.9140625,...","[asian, [-8.171875, -6.6796875, -6.890625, -7....","[asian, [-6.578125, -6.4453125, -7.02734375, -...","[asian, [-7.43359375, -6.8984375, -8.0234375, ...","[asian, [-7.296875, -6.81640625, -6.296875, -6...","[asian, [-6.4765625, -8.890625, -6.44140625, -...","[asian, [-7.125, -6.81640625, -7.03125, -8.015...","[asian, [-6.87109375, -5.98046875, -7.0234375,...","[asian, [-7.19140625, -6.640625, -7.0390625, -...","[asian, [-6.99609375, -6.30078125, -6.5078125,...","[asian, [-7.6640625, -6.5234375, -9.015625, -9...","[asian, [-7.12890625, -5.27734375, -8.03125, -...","[asian, [-7.265625, -5.6015625, -8.015625, -8....","[asian, [-7.55078125, -6.546875, -6.3828125, -...","[asian, [-8.0546875, -6.57421875, -6.34375, -6...","[asian, [-7.28125, -6.67578125, -6.31640625, -...","[asian, [-6.5703125, -4.6953125, -5.75390625, ...","[asian, [-7.109375, -5.4296875, -6.046875, -6....","[asian, [-6.3671875, -6.4140625, -7.02734375, ...","[asian, [-6.92578125, -6.59375, -8.0234375, -8...","[asian, [-8.1171875, -6.109375, -6.29296875, -...","[asian, [-7.5703125, -7.48046875, -8.015625, -...","[asian, [-7.3515625, -6.703125, -8.015625, -8....","[asian, [-7.01171875, -5.34765625, -5.38671875...","[asian, [-8.2109375, -6.12109375, -6.046875, -...","[asian, [-7.9375, -6.51953125, -6.4921875, -6....","[asian, [-7.578125, -7.28515625, -5.6328125, -...","[asian, [-7.4375, -6.171875, -7.0546875, -7.33...","[asian, [-7.38671875, -6.08984375, -5.53515625...",race,hf_tf,0,en,EleutherAI/pythia-70m-deduped,,
3,"[hispanic, [-7.20703125, -5.95703125, -6.45703...","[hispanic, [-7.30859375, -6.54296875, -6.16015...","[hispanic, [-7.79296875, -6.08984375, -5.59375...","[hispanic, [-6.5078125, -4.5703125, -6.359375,...","[hispanic, [-7.97265625, -5.87890625, -5.99609...","[hispanic, [-7.84765625, -5.921875, -6.4648437...","[hispanic, [-6.81640625, -6.640625, -5.8945312...","[hispanic, [-6.96875, -5.2578125, -5.7890625, ...","[hispanic, [-7.046875, -6.24609375, -5.7695312...","[hispanic, [-7.828125, -4.27734375, -5.9765625...","[hispanic, [-7.9140625, -5.87890625, -6.101562...","[hispanic, [-7.69921875, -6.93359375, -5.94921...","[hispanic, [-7.203125, -5.80859375, -6.984375,...","[hispanic, [-7.046875, -6.0, -6.515625, -8.015...","[hispanic, [-7.296875, -7.546875, -5.9375, -7....","[hispanic, [-7.56640625, -6.94140625, -6.76562...","[hispanic, [-7.34375, -6.421875, -5.89453125, ...","[hispanic, [-6.49609375, -5.484375, -6.9882812...","[hispanic, [-6.8984375, -6.73046875, -7.28125,...","[hispanic, [-6.89453125, -6.76953125, -6.26171...","[hispanic, [-6.30078125, -5.203125, -5.2539062...","[hispanic, [-7.99609375, -6.44921875, -6.19140...","[hispanic, [-6.0390625, -5.95703125, -6.179687...","[hispanic, [-6.34765625, -6.21875, -6.37890625...","[hispanic, [-7.2890625, -7.7578125, -6.2851562...","[hispanic, [-7.53515625, -6.0859375, -6.839843...","[hispanic, [-7.1953125, -6.28515625, -6.007812...","[hispanic, [-7.1796875, -5.9296875, -6.1054687...","[hispanic, [-7.12109375, -5.41015625, -5.76562...","[hispanic, [-7.1875, -5.875, -6.00390625, -8.0...","[hispanic, [-6.61328125, -5.96875, -6.73046875...","[hispanic, [-7.109375, -6.96484375, -6.515625,...","[hispanic, [-6.53515625, -5.9140625, -6.285156...","[hispanic, [-6.52734375, -5.68359375, -6.34375...","[hispanic, [-7.015625, -6.3203125, -6.00390625...","[hispanic, [-7.16015625, -5.95703125, -6.16015...","[hispanic, [-7.28515625, -5.140625, -6.7226562...","[hispanic, [-8.03125, -5.82421875, -7.05859375...","[hispanic, [-7.359375, -4.46484375, -5.9492187...","[hispanic, [-7.86328125, -7.26171875, -6.14453...","[hispanic, [-7.41796875, -5.89453125, -6.40234...","[hispanic, [-8.03125, -6.48828125, -6.0546875,...","[hispanic, [-7.25, -5.57421875, -6.59765625, -...","[hispanic, [-8.0546875, -6.4296875, -6.5507812...","[hispanic, [-7.1015625, -5.39453125, -6.148437...","[hispanic, [-7.98046875, -6.1015625, -5.988281...","[hispanic, [-7.53125, -7.56640625, -6.73046875...","[hispanic, [-7.921875, -5.1796875, -5.48046875...","[hispanic, [-7.41015625, -6.87890625, -7.11718...","[hispanic, [-7.3125, -5.73046875, -6.5859375, ...","[hispanic, [-8.640625, -5.921875, -6.9765625, ...","[hispanic, [-5.93359375, -5.609375, -6.71875, ...","[hispanic, [-7.18359375, -6.8125, -6.2265625, ...","[hispanic, [-8.5859375, -6.5546875, -6.234375,...","[hispanic, [-6.7421875, -6.24609375, -6.183593...","[hispanic, [-7.3203125, -5.41015625, -6.164062...","[hispanic, [-6.82421875, -6.30078125, -5.89453...","[hispanic, [-7.44921875, -5.671875, -6.4140625...","[hispanic, [-7.16015625, -5.40234375, -5.32421...","[hispanic, [-7.32421875, -5.6484375, -6.078125...","[hispanic, [-6.8046875, -6.8046875, -5.8046875...","[hispanic, [-7.19140625, -6.66015625, -5.46875...","[hispanic, [-7.1953125, -6.1796875, -5.734375,...","[hispanic, [-7.33203125, -5.3984375, -6.714843...","[hispanic, [-7.29296875, -6.0859375, -7.007812...","[hispanic, [-7.078125, -6.06640625, -6.2460937...","[hispanic, [-7.02734375, -5.55859375, -6.30859...","[hispanic, [-7.1328125, -6.61328125, -6.347656...","[hispanic, [-6.3828125, -6.55078125, -6.058593...","[hispanic, [-7.296875, -8.328125, -7.01171875,...","[hispanic, [-6.98046875, -5.19140625, -6.35546...","[hispanic, [-6.52734375, -6.32421875, -6.36328...","[hispanic, [-7.26953125, -6.5234375, -5.785156...","[hispanic, [-7.06640625, -5.57421875, -5.67968...","[hispanic, [-5.49609375, -5.99609375, -6.05468...","[hispanic, [-6.93359375, -5.578125, -6.1875, -...","[hispanic, [-6.6640625, -6.30078125, -6.765625...","[hispanic, [-6.62890625, -5.94921875, -5.72265...","[hispanic, [-7.25390625, -6.1875, -6.6015625, ...","[hispanic, [-6.67578125, -6.0078125, -6.527343...","[hispanic, [-7.7734375, -5.99609375, -6.042968...","[hispanic, [-6.421875, -5.3984375, -6.08203125...","[hispanic, [-7.96484375, -6.0546875, -5.449218...","[hispanic, [-8.1640625, -5.09375, -7.0234375, ...","[hispanic, [-7.296875, -5.66015625, -6.296875,...","[hispanic, [-7.25390625, -6.3671875, -6.109375...","[hispanic, [-7.01171875, -6.37890625, -7.07421...","[hispanic, [-6.8671875, -6.29296875, -5.871093...","[hispanic, [-7.26953125, -5.8984375, -6.910156...","[hispanic, [-7.1328125, -6.265625, -7.13671875...","[hispanic, [-7.05859375, -6.328125, -6.8242187...","[hispanic, [-8.25, -6.58984375, -5.5234375, -6...","[hispanic, [-7.40625, -6.26171875, -5.73046875...",race,hf_tf,0,en,EleutherAI/pythia-70m-deduped,,
4,"[indigenous, [-5.5625, -5.26171875, -6.4609375...","[indigenous, [-6.609375, -6.125, -5.69140625, ...","[indigenous, [-5.7421875, -5.02734375, -6.2109...","[indigenous, [-6.33984375, -5.9375, -5.71875, ...","[indigenous, [-7.13671875, -6.578125, -6.07031...","[indigenous, [-6.57421875, -6.05078125, -6.398...","[indigenous, [-6.8671875, -4.984375, -6.191406...","[indigenous, [-5.87109375, -5.90234375, -6.601...","[indigenous, [-6.515625, -6.25390625, -6.22656...","[indigenous, [-6.2734375, -4.76171875, -6.6093...","[indigenous, [-6.37109375, -7.0546875, -6.5117...","[indigenous, [-6.96484375, -6.2109375, -6.4648...","[indigenous, [-6.07421875, -5.19140625, -5.890...","[indigenous, [-6.875, -6.93359375, -6.30859375...","[indigenous, [-6.55078125, -6.45703125, -5.941...","[indigenous, [-6.30859375, -6.9609375, -6.6992...","[indigenous, [-6.3203125, -5.70703125, -6.3906...","[indigenous, [-5.953125, -5.16796875, -5.44531...","[indigenous, [-6.40234375, -6.3203125, -6.4687...","[indigenous, [-6.03125, -5.5, -6.3984375, -7.0...","[indigenous, [-7.16796875, -5.765625, -6.96484...","[indigenous, [-6.8046875, -5.96875, -6.59375, ...","[indigenous, [-7.02734375, -7.140625, -6.37890...","[indigenous, [-6.8984375, -6.87890625, -6.5859...","[indigenous, [-5.76171875, -6.75390625, -5.417...","[indigenous, [-6.77734375, -5.21484375, -6.152...","[indigenous, [-7.07421875, -5.89453125, -6.621...","[indigenous, [-7.0625, -6.5, -6.23046875, -7.0...","[indigenous, [-6.9765625, -5.19140625, -5.4609...","[indigenous, [-6.61328125, -7.75390625, -6.75,...","[indigenous, [-6.34375, -5.67578125, -6.074218...","[indigenous, [-6.359375, -6.09765625, -6.44140...","[indigenous, [-6.20703125, -5.5625, -5.7695312...","[indigenous, [-6.0703125, -5.71484375, -6.1679...","[indigenous, [-6.7109375, -5.77734375, -5.7304...","[indigenous, [-6.3984375, -6.69140625, -5.7148...","[indigenous, [-7.18359375, -6.1640625, -6.0507...","[indigenous, [-5.8359375, -6.62890625, -5.4960...","[indigenous, [-6.09375, -5.44140625, -6.570312...","[indigenous, [-6.26171875, -6.5625, -6.484375,...","[indigenous, [-6.8828125, -5.69921875, -7.4414...","[indigenous, [-6.625, -6.65625, -5.8359375, -8...","[indigenous, [-6.1328125, -5.796875, -6.976562...","[indigenous, [-7.015625, -7.0234375, -6.464843...","[indigenous, [-5.453125, -5.34375, -6.1171875,...","[indigenous, [-6.98046875, -5.36328125, -6.972...","[indigenous, [-6.5, -6.65234375, -6.8046875, -...","[indigenous, [-6.1796875, -5.8359375, -5.35937...","[indigenous, [-6.3046875, -5.62890625, -6.2578...","[indigenous, [-6.2578125, -5.3046875, -5.83203...","[indigenous, [-6.2890625, -5.1328125, -5.36718...","[indigenous, [-6.25, -6.37109375, -6.70703125,...","[indigenous, [-6.59765625, -6.7421875, -6.6132...","[indigenous, [-6.69140625, -7.265625, -5.96093...","[indigenous, [-6.13671875, -5.828125, -6.20312...","[indigenous, [-5.9765625, -6.97265625, -6.4179...","[indigenous, [-6.8984375, -6.23828125, -6.5625...","[indigenous, [-6.93359375, -6.41796875, -6.214...","[indigenous, [-6.6484375, -6.03515625, -7.2304...","[indigenous, [-6.60546875, -5.953125, -6.625, ...","[indigenous, [-6.14453125, -6.78125, -6.605468...","[indigenous, [-6.91015625, -6.08984375, -6.687...","[indigenous, [-5.69921875, -4.97265625, -6.226...","[indigenous, [-7.12109375, -6.02734375, -6.707...","[indigenous, [-6.4375, -5.7890625, -6.6171875,...","[indigenous, [-6.1328125, -6.23046875, -6.1132...","[indigenous, [-6.609375, -6.3359375, -6.21875,...","[indigenous, [-6.26171875, -5.51953125, -6.695...","[indigenous, [-5.81640625, -5.38671875, -6.480...","[indigenous, [-6.76171875, -7.83984375, -6.292...","[indigenous, [-5.97265625, -5.46875, -6.160156...","[indigenous, [-6.703125, -5.4453125, -6.082031...","[indigenous, [-6.390625, -6.46875, -6.20703125...","[indigenous, [-6.90234375, -6.1640625, -7.2773...","[indigenous, [-6.125, -6.05078125, -6.0703125,...","[indigenous, [-5.94921875, -5.4609375, -5.6093...","[indigenous, [-6.0234375, -5.73828125, -5.9101...","[indigenous, [-6.421875, -6.59765625, -6.44140...","[indigenous, [-6.41796875, -6.5859375, -6.9960...","[indigenous, [-6.3984375, -5.7734375, -6.30468...","[indigenous, [-7.2578125, -5.93359375, -5.7929...","[indigenous, [-6.4140625, -5.484375, -5.554687...","[indigenous, [-6.52734375, -6.2109375, -5.8164...","[indigenous, [-6.6328125, -6.359375, -6.523437...","[indigenous, [-6.3125, -6.02734375, -5.7578125...","[indigenous, [-5.890625, -6.2109375, -6.304687...","[indigenous, [-7.35546875, -6.69140625, -6.562...","[indigenous, [-5.67578125, -5.44140625, -5.632...","[indigenous, [-6.28515625, -6.1640625, -6.2773...","[indigenous, [-6.29296875, -5.87109375, -6.062...","[indigenous, [-6.34375, -6.16796875, -5.960937...","[indigenous, [-7.08984375, -5.96484375, -6.636...","[indigenous, [-6.2421875, -4.828125, -6.75, -8...",race,hf_tf,0,en,EleutherAI/pythia-70m-deduped,,
5,"[pacific islander, [-7.03125, -6.75, -6.136718...","[pacific islander, [-6.109375, -5.89453125, -6...","[pacific islander, [-6.6484375, -6.35546875, -...","[pacific islander, [-7.30859375, -5.7578125, -...","[pacific islander, [-5.51171875, -6.12890625, ...","[pacific islander, [-6.68359375, -5.90625, -5....","[pacific islander, [-7.2578125, -5.63671875, -...","[pacific islander, [-7.19921875, -5.59765625, ...","[pacific islander, [-7.109375, -5.95703125, -6...","[pacific islander, [-6.48828125, -4.63671875, ...","[pacific islander, [-7.71875, -6.89453125, -6....","[pacific islander, [-6.0703125, -6.78515625, -...","[pacific islander, [-6.83984375, -5.5546875, -...","[pacific islander, [-6.51953125, -6.8828125, -...","[pacific islander, [-7.3203125, -7.0625, -6.92...","[pacific islander, [-7.40234375, -6.19921875, ...","[pacific islander, [-6.81640625, -6.1796875, -...","[pacific islander, [-7.828125, -6.14453125, -6...","[pacific islander, [-7.48828125, -6.23046875, ...","[pacific islander, [-6.8828125, -6.5703125, -6...","[pacific islander, [-6.92578125, -5.85546875, ...","[pacific islander, [-6.796875, -5.39453125, -6...","[pacific islander, [-6.71875, -6.8828125, -7.1...","[pacific islander, [-7.140625, -6.37109375, -6...","[pacific islander, [-6.8359375, -7.22265625, -...","[pacific islander, [-6.890625, -5.8359375, -7....","[pacific islander, [-7.3203125, -6.3046875, -6...","[pacific islander, [-7.5390625, -6.04296875, -...","[pacific islander, [-7.03515625, -6.5625, -5.4...","[pacific islander, [-6.453125, -7.06640625, -7...","[pacific islander, [-6.71484375, -6.52734375, ...","[pacific islander, [-7.78125, -7.03515625, -6....","[pacific islander, [-7.21875, -6.76171875, -5....","[pacific islander, [-7.125, -6.09375, -6.15234...","[pacific islander, [-7.0, -6.0390625, -5.57421...","[pacific islander, [-7.28515625, -6.28515625, ...","[pacific islander, [-8.15625, -6.8515625, -7.5...","[pacific islander, [-7.08203125, -5.83203125, ...","[pacific islander, [-7.3125, -5.5, -6.23828125...","[pacific islander, [-6.984375, -7.5078125, -5....","[pacific islander, [-7.5078125, -5.83984375, -...","[pacific islander, [-7.703125, -6.41015625, -6...","[pacific islander, [-7.68359375, -5.6171875, -...","[pacific islander, [-7.58203125, -7.6640625, -...","[pacific islander, [-6.3515625, -6.203125, -6....","[pacific islander, [-7.2265625, -6.25, -6.6992...","[pacific islander, [-6.890625, -6.59375, -7.01...","[pacific islander, [-7.890625, -5.890625, -6.4...","[pacific islander, [-6.671875, -6.125, -6.5976...","[pacific islander, [-6.90234375, -5.96875, -6....","[pacific islander, [-6.921875, -6.109375, -6.0...","[pacific islander, [-7.2734375, -5.34765625, -...","[pacific islander, [-8.2734375, -7.11328125, -...","[pacific islander, [-7.5390625, -6.8125, -6.82...","[pacific islander, [-7.0703125, -6.1328125, -6...","[pacific islander, [-7.33984375, -6.75390625, ...","[pacific islander, [-7.421875, -5.3046875, -6....","[pacific islander, [-6.87109375, -5.79296875, ...","[pacific islander, [-8.1953125, -6.42578125, -...","[pacific islander, [-7.3515625, -4.9140625, -6...","[pacific islander, [-7.28515625, -6.6953125, -...","[pacific islander, [-6.52734375, -6.6875, -6.5...","[pacific islander, [-7.47265625, -5.92578125, ...","[pacific islander, [-6.4765625, -5.59375, -6.6...","[pacific islander, [-7.1953125, -5.63671875, -...","[pacific islander, [-6.71484375, -5.8046875, -...","[pacific islander, [-6.69140625, -6.51953125, ...","[pacific islander, [-7.24609375, -6.5546875, -...","[pacific islander, [-7.6484375, -6.3671875, -6...","[pacific islander, [-6.72265625, -8.359375, -7...","[pacific islander, [-6.9921875, -6.01953125, -...","[pacific islander, [-6.51171875, -7.109375, -6...","[pacific islander, [-7.57421875, -6.37109375, ...","[pacific islander, [-7.9765625, -5.91015625, -...","[pacific islander, [-7.85546875, -6.7109375, -...","[pacific islander, [-6.75, -6.5078125, -6.2343...","[pacific islander, [-6.8203125, -6.0390625, -6...","[pacific islander, [-7.55859375, -5.78515625, ...","[pacific islander, [-7.01953125, -5.55078125, ...","[pacific islander, [-7.5234375, -6.26171875, -...","[pacific islander, [-6.6015625, -6.53125, -6.2...","[pacific islander, [-6.6796875, -6.0625, -7.12...","[pacific islander, [-6.5234375, -5.6171875, -5...","[pacific islander, [-7.8046875, -5.94921875, -...","[pacific islander, [-7.93359375, -6.7109375, -...","[pacific islander, [-7.76171875, -5.80078125, ...","[pacific islander, [-7.12890625, -7.31640625, ...","[pacific islander, [-7.5, -6.0078125, -6.39453...","[pacific islander, [-7.45703125, -5.40234375, ...","[pacific islander, [-7.5, -6.46875, -6.9765625...","[pacific islander, [-6.58984375, -7.125, -6.21...","[pacific islander, [-7.453125, -6.5078125, -7....","[pacific islander, [-6.73046875, -6.37109375, ...",race,hf_tf,0,en,EleutherAI/pythia-70m-deduped,,
6,"[black, [-4.671875, -4.765625, -3.94140625, -7...","[black, [-4.26171875, -4.58984375, -3.52734375...","[black, [-4.2890625, -4.28515625, -4.1875, -7....","[black, [-4.359375, -4.3125, -3.833984375, -8....","[black, [-4.625, -4.51171875, -3.974609375, -8...","[black, [-4.81640625, -4.671875, -4.03515625, ...","[black, [-4.36328125, -4.37109375, -3.91015625...","[black, [-4.390625, -3.970703125, -3.943359375...","[black, [-4.15234375, -4.25390625, -4.3515625,...","[black, [-4.94921875, -4.01171875, -4.09765625...","[black, [-4.953125, -4.44140625, -4.3515625, -...","[black, [-4.06640625, -4.50390625, -3.79492187...","[black, [-4.38671875, -4.40625, -3.66015625, -...","[black, [-4.3125, -4.39453125, -4.33203125, -8...","[black, [-4.4921875, -4.27734375, -3.919921875...","[black, [-4.640625, -4.51953125, -4.08984375, ...","[black, [-5.05078125, -4.7578125, -4.0078125, ...","[black, [-5.08203125, -4.65625, -3.865234375, ...","[black, [-4.57421875, -4.36328125, -4.12109375...","[black, [-4.4296875, -4.55078125, -4.05859375,...","[black, [-4.328125, -4.70703125, -3.974609375,...","[black, [-4.7421875, -4.25, -3.57421875, -8.80...","[black, [-4.23828125, -3.837890625, -3.7949218...","[black, [-4.5390625, -4.42578125, -3.998046875...","[black, [-4.23828125, -4.53125, -4.07421875, -...","[black, [-4.08203125, -4.39453125, -3.90039062...","[black, [-4.87109375, -4.6015625, -4.1640625, ...","[black, [-4.3125, -4.4609375, -4.02734375, -8....","[black, [-4.3984375, -4.265625, -3.93359375, -...","[black, [-4.4140625, -4.58203125, -4.40234375,...","[black, [-4.19140625, -4.51171875, -3.94726562...","[black, [-4.26171875, -4.20703125, -3.8515625,...","[black, [-4.75390625, -4.59765625, -3.88867187...","[black, [-4.515625, -4.41796875, -3.74609375, ...","[black, [-4.72265625, -4.3671875, -4.10546875,...","[black, [-4.73046875, -4.59375, -4.0, -8.82031...","[black, [-4.80859375, -4.18359375, -4.0703125,...","[black, [-4.48046875, -5.0546875, -3.69140625,...","[black, [-4.421875, -4.2265625, -3.95703125, -...","[black, [-4.1171875, -4.7421875, -3.919921875,...","[black, [-4.296875, -4.34375, -3.62109375, -8....","[black, [-4.71484375, -4.56640625, -3.88085937...","[black, [-4.390625, -4.21875, -3.982421875, -8...","[black, [-4.203125, -4.375, -3.638671875, -8.8...","[black, [-4.35546875, -4.9921875, -4.07421875,...","[black, [-4.328125, -4.578125, -4.0234375, -7....","[black, [-4.41015625, -4.828125, -3.693359375,...","[black, [-4.75390625, -4.33984375, -4.19921875...","[black, [-4.4296875, -4.625, -3.6875, -7.83203...","[black, [-4.83203125, -4.5859375, -3.9765625, ...","[black, [-4.3671875, -4.2421875, -3.828125, -8...","[black, [-4.8515625, -4.375, -3.970703125, -8....","[black, [-4.72265625, -4.41015625, -4.1875, -3...","[black, [-4.3203125, -4.1171875, -3.931640625,...","[black, [-4.62109375, -4.7578125, -4.1953125, ...","[black, [-4.76171875, -4.53515625, -3.6015625,...","[black, [-4.578125, -4.3828125, -3.5546875, -8...","[black, [-4.23046875, -4.34375, -3.916015625, ...","[black, [-4.15234375, -4.39453125, -3.58203125...","[black, [-4.21875, -3.89453125, -3.69140625, -...","[black, [-4.40234375, -4.4140625, -3.873046875...","[black, [-4.53515625, -4.7734375, -4.12109375,...","[black, [-4.7890625, -4.18359375, -3.732421875...","[black, [-4.359375, -4.16015625, -3.826171875,...","[black, [-4.66796875, -4.41796875, -3.796875, ...","[black, [-4.33984375, -4.25, -4.50390625, -3.5...","[black, [-4.3515625, -3.931640625, -4.13671875...","[black, [-4.60546875, -4.4296875, -4.03515625,...","[black, [-4.2890625, -4.984375, -3.791015625, ...","[black, [-4.5859375, -4.2578125, -4.203125, -8...","[black, [-4.27734375, -4.19140625, -3.99804687...","[black, [-4.16796875, -4.28515625, -4.12109375...","[black, [-4.421875, -3.966796875, -4.09765625,...","[black, [-4.671875, -4.51953125, -4.44140625, ...","[black, [-4.375, -4.08984375, -3.73046875, -8....","[black, [-4.0, -4.26953125, -3.701171875, -8.2...","[black, [-4.6484375, -4.31640625, -4.12890625,...","[black, [-4.3359375, -4.74609375, -3.529296875...","[black, [-4.76171875, -4.28515625, -3.97265625...","[black, [-4.5234375, -4.71875, -3.923828125, -...","[black, [-4.3359375, -4.46484375, -4.09765625,...","[black, [-4.71875, -4.21875, -3.666015625, -8....","[black, [-4.5234375, -4.078125, -4.1328125, -8...","[black, [-4.45703125, -4.18359375, -4.28515625...","[black, [-4.26171875, -4.33203125, -4.16796875...","[black, [-4.64453125, -4.59375, -3.91796875, -...","[black, [-4.3359375, -4.21875, -4.35546875, -9...","[black, [-4.42578125, -4.69140625, -3.73046875...","[black, [-4.40625, -4.375, -4.60546875, -3.712...","[black, [-4.10546875, -4.15625, -4.734375, -4....","[black, [-4.21875, -4.75390625, -4.18359375, -...","[black, [-4.234375, -4.2265625, -4.24609375, -...","[black, [-4.4296875, -4.76171875, -4.02734375,...",race,hf_tf,0,en,EleutherAI/pythia-160m-deduped,,
7,"[white, [-4.734375, -4.734375, -3.9140625, -8....","[white, [-4.36328125, -4.63671875, -3.96679687...","[white, [-4.625, -4.46484375, -3.818359375, -7...","[white, [-4.203125, -4.14453125, -3.59375, -7....","[white, [-4.40625, -4.78125, -3.54296875, -8.0...","[white, [-4.33203125, -4.73046875, -3.72070312...","[white, [-4.28125, -4.375, -3.6953125, -7.8085...","[white, [-4.2734375, -4.14453125, -3.615234375...","[white, [-4.09375, -4.4140625, -4.34375, -4.17...","[white, [-4.3984375, -4.6328125, -4.00390625, ...","[white, [-4.4453125, -4.7734375, -4.140625, -8...","[white, [-3.98828125, -4.625, -3.6875, -8.3125...","[white, [-4.25, -4.42578125, -3.6796875, -8.55...","[white, [-4.37890625, -4.47265625, -3.65625, -...","[white, [-4.35546875, -4.21484375, -3.48242187...","[white, [-4.7890625, -4.01171875, -4.1796875, ...","[white, [-4.52734375, -4.8046875, -3.904296875...","[white, [-4.36328125, -4.375, -4.13671875, -8....","[white, [-3.904296875, -4.01953125, -3.9316406...","[white, [-4.484375, -4.609375, -3.576171875, -...","[white, [-4.3203125, -4.5625, -3.705078125, -8...","[white, [-4.8515625, -4.20703125, -3.673828125...","[white, [-3.9453125, -4.26171875, -3.62109375,...","[white, [-4.421875, -4.61328125, -3.859375, -8...","[white, [-4.34375, -4.3046875, -3.765625, -8.5...","[white, [-4.37890625, -4.1875, -3.916015625, -...","[white, [-4.16015625, -5.08984375, -3.73242187...","[white, [-4.38671875, -4.89453125, -3.80664062...","[white, [-4.67578125, -4.40625, -3.52734375, -...","[white, [-4.60546875, -4.828125, -4.03125, -9....","[white, [-4.7421875, -5.33984375, -3.91796875,...","[white, [-4.73046875, -4.67578125, -3.74609375...","[white, [-4.52734375, -4.75, -3.796875, -8.796...","[white, [-4.2421875, -4.3125, -3.892578125, -9...","[white, [-4.75, -4.23828125, -3.87109375, -8.5...","[white, [-4.71875, -4.3125, -3.626953125, -8.7...","[white, [-4.5546875, -4.28515625, -4.00390625,...","[white, [-4.50390625, -4.58203125, -3.65429687...","[white, [-4.50390625, -4.0546875, -3.859375, -...","[white, [-4.0859375, -4.33203125, -3.658203125...","[white, [-4.640625, -4.38671875, -3.92578125, ...","[white, [-4.28125, -4.74609375, -3.6171875, -8...","[white, [-4.64453125, -4.2734375, -3.564453125...","[white, [-4.4296875, -4.3359375, -3.396484375,...","[white, [-4.359375, -4.4765625, -3.697265625, ...","[white, [-4.1484375, -3.998046875, -3.65820312...","[white, [-4.12890625, -4.2421875, -3.70703125,...","[white, [-4.328125, -4.62890625, -4.171875, -8...","[white, [-4.34375, -4.5625, -3.443359375, -8.0...","[white, [-4.703125, -4.734375, -4.0234375, -8....","[white, [-4.37109375, -4.33984375, -3.75195312...","[white, [-4.84375, -4.2109375, -4.04296875, -8...","[white, [-4.42578125, -4.4765625, -3.625, -3.8...","[white, [-4.3125, -4.1328125, -3.283203125, -8...","[white, [-4.5703125, -4.8125, -4.09765625, -8....","[white, [-4.13671875, -4.60546875, -3.43554687...","[white, [-4.66796875, -4.484375, -3.5390625, -...","[white, [-4.38671875, -4.80078125, -4.16796875...","[white, [-4.1484375, -4.70703125, -3.77734375,...","[white, [-4.2734375, -3.830078125, -3.67578125...","[white, [-4.08984375, -4.25, -3.94921875, -8.7...","[white, [-4.25390625, -4.125, -3.78125, -8.812...","[white, [-4.75, -4.2890625, -3.96875, -9.04687...","[white, [-4.4140625, -4.3515625, -3.521484375,...","[white, [-4.015625, -4.13671875, -3.5390625, -...","[white, [-4.23828125, -4.328125, -4.43359375, ...","[white, [-4.45703125, -4.40625, -3.60546875, -...","[white, [-4.76171875, -4.00390625, -3.8984375,...","[white, [-4.2265625, -4.75, -3.626953125, -8.3...","[white, [-4.35546875, -4.359375, -3.705078125,...","[white, [-4.3359375, -4.81640625, -3.501953125...","[white, [-4.2421875, -4.02734375, -3.685546875...","[white, [-4.62890625, -4.12109375, -3.69335937...","[white, [-4.37109375, -4.1796875, -3.98046875,...","[white, [-4.328125, -4.7109375, -3.44921875, -...","[white, [-4.5859375, -4.08984375, -3.427734375...","[white, [-4.78515625, -4.36328125, -4.02734375...","[white, [-4.13671875, -4.1953125, -3.609375, -...","[white, [-4.8203125, -4.390625, -3.572265625, ...","[white, [-4.63671875, -4.71875, -3.984375, -8....","[white, [-4.4765625, -4.1328125, -4.34375, -4....","[white, [-4.6640625, -3.896484375, -3.73828125...","[white, [-4.23046875, -4.4921875, -3.39453125,...","[white, [-4.36328125, -4.36328125, -3.98242187...","[white, [-4.65625, -4.27734375, -4.34375, -4.1...","[white, [-4.046875, -4.46875, -3.810546875, -8...","[white, [-4.609375, -3.865234375, -4.0, -8.539...","[white, [-4.35546875, -4.08984375, -3.75390625...","[white, [-4.27734375, -4.2734375, -3.91796875,...","[white, [-4.30078125, -4.34375, -4.453125, -3....","[white, [-4.1328125, -4.46484375, -4.1953125, ...","[white, [-3.869140625, -4.41015625, -4.1953125...","[white, [-4.359375, -4.55859375, -3.73828125, ...",race,hf_tf,0,en,EleutherAI/pythia-160m-deduped,,
8,"[asian, [-4.73828125, -4.80859375, -8.078125, ...","[asian, [-4.29296875, -4.6953125, -8.3203125, ...","[asian, [-4.5546875, -4.33203125, -8.1015625, ...","[asian, [-4.34765625, -3.794921875, -8.328125,...","[asian, [-4.1328125, -4.3671875, -8.3125, -8.0...","[asian, [-4.421875, -4.59375, -8.0859375, -8.1...","[asian, [-4.12890625, -4.44921875, -3.79492187...","[asian, [-4.62109375, -4.66796875, -8.0859375,...","[asian, [-3.861328125, -4.9140625, -4.33984375...","[asian, [-4.375, -4.75, -3.673828125, -3.67773...","[asian, [-4.6875, -4.60546875, -8.5546875, -8....","[asian, [-4.4453125, -4.671875, -8.109375, -7....","[asian, [-4.24609375, -4.390625, -3.69140625, ...","[asian, [-4.85546875, -4.69921875, -8.5859375,...","[asian, [-4.12109375, -4.4609375, -3.501953125...","[asian, [-4.24609375, -4.49609375, -3.78320312...","[asian, [-4.69140625, -4.5546875, -8.0703125, ...","[asian, [-4.25, -4.62890625, -3.916015625, -3....","[asian, [-4.56640625, -4.71484375, -8.3203125,...","[asian, [-4.23828125, -4.5859375, -7.59375, -7...","[asian, [-4.74609375, -4.6484375, -8.328125, -...","[asian, [-4.21875, -4.75390625, -3.7734375, -3...","[asian, [-4.421875, -4.06640625, -8.109375, -7...","[asian, [-4.0859375, -4.71484375, -3.73046875,...","[asian, [-4.5625, -4.6015625, -8.1484375, -7.6...","[asian, [-4.171875, -4.61328125, -3.86328125, ...","[asian, [-4.296875, -4.875, -8.3359375, -8.148...","[asian, [-4.50390625, -4.75390625, -8.125, -7....","[asian, [-4.79296875, -4.62109375, -7.5625, -8...","[asian, [-4.16015625, -4.62890625, -3.93359375...","[asian, [-4.61328125, -4.921875, -8.5625, -8.3...","[asian, [-4.453125, -4.76953125, -8.09375, -7....","[asian, [-4.453125, -4.80078125, -3.6953125, -...","[asian, [-4.4140625, -4.515625, -8.09375, -7.9...","[asian, [-4.65625, -4.5234375, -7.6484375, -7....","[asian, [-4.69921875, -4.36328125, -8.3359375,...","[asian, [-4.4765625, -4.38671875, -8.078125, -...","[asian, [-4.890625, -4.65234375, -7.61328125, ...","[asian, [-4.33984375, -4.76171875, -3.84960937...","[asian, [-4.3984375, -4.69921875, -7.59375, -7...","[asian, [-4.29296875, -4.6171875, -3.6953125, ...","[asian, [-4.5234375, -5.04296875, -7.60546875,...","[asian, [-4.48046875, -4.58984375, -3.98632812...","[asian, [-4.28515625, -4.4609375, -8.359375, -...","[asian, [-4.18359375, -4.9140625, -3.6484375, ...","[asian, [-4.6796875, -4.53515625, -3.7109375, ...","[asian, [-4.7578125, -4.43359375, -8.5625, -7....","[asian, [-4.421875, -4.52734375, -3.74609375, ...","[asian, [-4.60546875, -4.7421875, -8.1015625, ...","[asian, [-4.42578125, -4.31640625, -8.359375, ...","[asian, [-4.51171875, -4.16796875, -8.8203125,...","[asian, [-4.59375, -4.6015625, -8.3359375, -7....","[asian, [-4.30078125, -4.68359375, -4.34765625...","[asian, [-4.44921875, -4.0859375, -8.109375, -...","[asian, [-4.328125, -4.16015625, -8.8046875, -...","[asian, [-4.453125, -4.59765625, -3.708984375,...","[asian, [-4.52734375, -4.5859375, -8.078125, -...","[asian, [-4.5234375, -4.9453125, -8.3515625, -...","[asian, [-4.69921875, -4.24609375, -8.1015625,...","[asian, [-4.28125, -4.296875, -7.83203125, -7....","[asian, [-4.76171875, -4.68359375, -7.375, -7....","[asian, [-4.1875, -4.5078125, -3.51171875, -3....","[asian, [-4.7109375, -4.37890625, -7.8359375, ...","[asian, [-4.48828125, -4.5625, -8.5625, -7.367...","[asian, [-4.6953125, -4.8046875, -3.474609375,...","[asian, [-4.421875, -4.359375, -4.04296875, -4...","[asian, [-4.09765625, -4.28125, -7.58203125, -...","[asian, [-4.1875, -4.4296875, -8.0859375, -8.3...","[asian, [-4.3203125, -4.56640625, -3.623046875...","[asian, [-4.484375, -4.6171875, -3.8515625, -4...","[asian, [-4.59765625, -4.6875, -8.328125, -8.1...","[asian, [-4.546875, -4.59375, -8.8046875, -7.6...","[asian, [-4.37890625, -4.2578125, -7.875, -8.1...","[asian, [-4.0546875, -4.671875, -3.970703125, ...","[asian, [-4.49609375, -4.7265625, -7.8203125, ...","[asian, [-4.48046875, -4.515625, -8.5546875, -...","[asian, [-4.83203125, -4.53125, -7.84375, -8.0...","[asian, [-4.21875, -4.74609375, -3.765625, -3....","[asian, [-4.109375, -4.50390625, -3.689453125,...","[asian, [-4.265625, -5.25, -3.515625, -3.98242...","[asian, [-5.26953125, -4.55078125, -4.3046875,...","[asian, [-4.23828125, -4.015625, -4.05078125, ...","[asian, [-5.01171875, -4.5, -8.34375, -8.40625...","[asian, [-4.01171875, -4.6875, -8.34375, -8.08...","[asian, [-4.38671875, -4.89453125, -4.19921875...","[asian, [-4.6328125, -4.65625, -8.328125, -7.8...","[asian, [-4.609375, -4.44140625, -8.5859375, -...","[asian, [-4.234375, -4.4609375, -3.634765625, ...","[asian, [-4.40234375, -4.58984375, -3.859375, ...","[asian, [-4.18359375, -4.65234375, -4.1171875,...","[asian, [-4.0390625, -4.58203125, -3.857421875...","[asian, [-4.203125, -4.8125, -4.03515625, -4.0...","[asian, [-3.9921875, -4.3984375, -3.91796875, ...",race,hf_tf,0,en,EleutherAI/pythia-160m-deduped,,
9,"[hispanic, [-4.515625, -4.84375, -4.05078125, ...","[hispanic, [-4.76953125, -5.08984375, -3.71875...","[hispanic, [-5.078125, -4.27734375, -3.9160156...","[hispanic, [-4.58203125, -4.2734375, -3.419921...","[hispanic, [-4.58984375, -4.6953125, -3.886718...","[hispanic, [-4.9375, -4.39453125, -4.2734375, ...","[hispanic, [-4.3828125, -4.33203125, -4.007812...","[hispanic, [-4.875, -4.76953125, -4.2734375, -...","[hispanic, [-4.2890625, -5.05078125, -4.488281...","[hispanic, [-4.6640625, -4.91796875, -4.027343...","[hispanic, [-4.50390625, -4.83984375, -3.85937...","[hispanic, [-4.59375, -4.46875, -4.17578125, -...","[hispanic, [-4.41796875, -5.0703125, -3.625, -...","[hispanic, [-4.5, -4.8671875, -4.0703125, -9.3...","[hispanic, [-4.45703125, -4.80078125, -3.91992...","[hispanic, [-4.50390625, -4.96484375, -3.82226...","[hispanic, [-4.60546875, -4.97265625, -4.23828...","[hispanic, [-4.91796875, -4.8203125, -3.998046...","[hispanic, [-4.65625, -4.81640625, -3.71289062...","[hispanic, [-4.4375, -4.96875, -4.125, -8.8437...","[hispanic, [-4.390625, -5.0, -3.916015625, -9....","[hispanic, [-4.9609375, -4.73828125, -4.246093...","[hispanic, [-4.2421875, -4.61328125, -3.894531...","[hispanic, [-4.41796875, -5.03515625, -3.79296...","[hispanic, [-4.74609375, -4.16015625, -4.01171...","[hispanic, [-4.3984375, -4.484375, -4.02734375...","[hispanic, [-4.69140625, -5.234375, -3.8886718...","[hispanic, [-4.421875, -5.06640625, -4.1484375...","[hispanic, [-4.671875, -4.48828125, -4.1640625...","[hispanic, [-4.7265625, -4.90234375, -4.128906...","[hispanic, [-4.6640625, -5.2265625, -4.1054687...","[hispanic, [-4.83984375, -4.1328125, -4.207031...","[hispanic, [-4.76953125, -4.859375, -4.21875, ...","[hispanic, [-4.6796875, -4.0546875, -4.1132812...","[hispanic, [-4.703125, -5.0546875, -4.18359375...","[hispanic, [-4.83984375, -4.71484375, -4.29687...","[hispanic, [-4.49609375, -4.35546875, -3.95117...","[hispanic, [-4.71484375, -4.9765625, -3.894531...","[hispanic, [-4.7265625, -5.19921875, -3.863281...","[hispanic, [-4.7109375, -5.01171875, -3.949218...","[hispanic, [-4.78515625, -4.47265625, -3.85937...","[hispanic, [-4.453125, -4.6171875, -3.69921875...","[hispanic, [-4.6953125, -4.41796875, -3.746093...","[hispanic, [-4.8203125, -5.0390625, -3.8613281...","[hispanic, [-4.9921875, -4.92578125, -3.912109...","[hispanic, [-4.8359375, -5.03125, -3.92578125,...","[hispanic, [-4.828125, -4.7578125, -3.78710937...","[hispanic, [-4.66015625, -5.16015625, -4.09765...","[hispanic, [-4.66796875, -4.7734375, -3.691406...","[hispanic, [-4.66796875, -5.109375, -4.015625,...","[hispanic, [-4.84375, -4.40625, -3.939453125, ...","[hispanic, [-4.8046875, -5.046875, -4.27734375...","[hispanic, [-4.26171875, -4.87890625, -4.71093...","[hispanic, [-4.328125, -4.07421875, -4.0976562...","[hispanic, [-4.4296875, -4.65234375, -3.976562...","[hispanic, [-4.44140625, -5.1171875, -4.082031...","[hispanic, [-4.58984375, -5.046875, -3.8007812...","[hispanic, [-4.81640625, -4.83203125, -4.15234...","[hispanic, [-4.44140625, -4.39453125, -4.19531...","[hispanic, [-4.43359375, -4.56640625, -3.67578...","[hispanic, [-4.62890625, -4.5703125, -4.0625, ...","[hispanic, [-4.671875, -4.9921875, -3.91796875...","[hispanic, [-4.69921875, -4.6640625, -3.783203...","[hispanic, [-4.71484375, -4.73828125, -3.93359...","[hispanic, [-4.421875, -5.13671875, -3.6015625...","[hispanic, [-4.37109375, -4.7421875, -4.507812...","[hispanic, [-4.46875, -4.625, -3.90234375, -8....","[hispanic, [-4.27734375, -4.4140625, -4.195312...","[hispanic, [-4.56640625, -4.70703125, -4.07812...","[hispanic, [-4.81640625, -4.49609375, -3.94335...","[hispanic, [-4.54296875, -4.81640625, -3.48046...","[hispanic, [-4.671875, -5.15234375, -3.9980468...","[hispanic, [-4.68359375, -4.16796875, -4.1875,...","[hispanic, [-4.40234375, -4.703125, -4.5195312...","[hispanic, [-4.7421875, -4.5859375, -4.2421875...","[hispanic, [-4.98046875, -4.5859375, -4.140625...","[hispanic, [-4.82421875, -4.65625, -3.71484375...","[hispanic, [-4.83984375, -4.83203125, -3.55273...","[hispanic, [-4.49609375, -5.12109375, -3.95507...","[hispanic, [-4.453125, -5.1953125, -4.2421875,...","[hispanic, [-4.29296875, -4.8359375, -4.308593...","[hispanic, [-4.68359375, -4.2265625, -3.917968...","[hispanic, [-4.6171875, -4.13671875, -3.84375,...","[hispanic, [-4.5546875, -4.78515625, -4.289062...","[hispanic, [-4.63671875, -5.2265625, -4.699218...","[hispanic, [-4.6484375, -5.0, -3.853515625, -8...","[hispanic, [-4.83984375, -5.05078125, -4.05078...","[hispanic, [-4.28515625, -5.10546875, -4.18359...","[hispanic, [-4.45703125, -4.62890625, -4.65234...","[hispanic, [-4.37109375, -4.578125, -4.3085937...","[hispanic, [-3.974609375, -4.765625, -4.699218...","[hispanic, [-4.390625, -4.8203125, -4.3984375,...","[hispanic, [-4.6171875, -5.2265625, -4.2734375...",race,hf_tf,0,en,EleutherAI/pythia-160m-deduped,,


In [8]:
# Print the missing logits at the end
for missing_logit in missing_logits:
    print(f"Logits data file not found for {missing_logit}")

Logits data file not found for hf_tf race  en EleutherAI/pile-t5-xxl
Logits data file not found for hf_tf race  en meta-llama/Llama-2-7b
Logits data file not found for hf_tf race  en allenai/OLMo-7B
Logits data file not found for hf_tf race  en allenai/OLMo-7B-SFT
Logits data file not found for hf_tf race  en meta-llama/Meta-Llama-3-8B
Logits data file not found for hf_tf race  en meta-llama/Meta-Llama-3-8B-Instruct
Logits data file not found for hf_tf race  en meta-llama/Meta-Llama-3-70B
Logits data file not found for hf_tf race  en meta-llama/Meta-Llama-3-70B-Instruct
Logits data file not found for hf_tf race  zh EleutherAI/pile-t5-xxl
Logits data file not found for hf_tf race  zh meta-llama/Llama-2-7b
Logits data file not found for hf_tf race  zh allenai/OLMo-7B
Logits data file not found for hf_tf race  zh allenai/OLMo-7B-SFT
Logits data file not found for hf_tf race  zh meta-llama/Meta-Llama-3-8B
Logits data file not found for hf_tf race  zh meta-llama/Meta-Llama-3-8B-Instruct
Log

In [9]:
disease_names = list(hf_combined_df.columns)
disease_names.remove("demographic")
disease_names.remove("logit_type")
disease_names.remove("location_preprompt")
disease_names.remove("language")
disease_names.remove("model_name")
print(disease_names)

['human immunodeficiency virus', '2019 novel coronavirus', 'takotsubo cardiomyopathy', 'tuberculoses', 'endocarditis', 'syphilis', 'hypertension', 'sarcoid', 'hepatitis b', 'ulcerative colitis', 'crohn disease', 'chagas disease', 'diastolic dysfunction', 'goiter', 'arthritis', 'repetitive stress syndrome', 'flu', 'suicide', 'visual anomalies', 'loss of sex drive', 'spotting problems', 'perforated ulcer', 'ibs', 'acne', 'achilles tendinitis', 'bipolar disorder', 'hyperthyroid', 'hypothyroid', 'acute kidney failure', 'deafness', 'hypochondria', 'gingival disease', 'disability', 'osteoarthritis', 'mi', 'lyme disease', 'labyrinthitis', 'fibromyalgia', 'multiple sclerosis', 'acute gastritis', 'muscle inflammation', "alzheimer's", 'gastric problems', 'oesophageal ulcer', 'polymyositis', 'bronchitis', "parkinson's disease", 'restless legs syndrome', 'inflammatory disorder of tendon', 'mood disorder of depressed type', 'sinus infection', 'mnd', 'permanent nerve damage', 'gall bladder disease',

In [10]:
hf_reshaped_data = []

# Iterate over each row in the DataFrame
for index, row in hf_combined_df.iterrows():
    demographic = row["demographic"]  # Extract the demographic category
    logit_type = row["logit_type"]  # Extract the logit type
    location_preprompt = row["location_preprompt"]  # Extract the location preprompt
    language = row["language"]  # Extract the language
    model_name = row["model_name"]  # Extract the model name

    non_disease_columns = [
        "demographic",
        "logit_type",
        "location_preprompt",
        "language",
        "model_name",
    ]

    # Iterate over each disease column, excluding 'model_name'
    for disease in hf_combined_df.columns.difference(non_disease_columns):

        demographic_logit_pair = row[disease]

        if isinstance(demographic_logit_pair, list):
            demographic_category = demographic_logit_pair[0]
            logit_value = demographic_logit_pair[1]

            hf_reshaped_data.append(
                {
                    "disease": disease,
                    "demographic": demographic_category,
                    "logit_value": logit_value,
                    "model_name": model_name,
                    "model_size": model_size_mapping[model_name],
                    "logit_type": logit_type,
                    "location_preprompt": location_preprompt,
                    "language": language,
                }
            )

# Convert the list of dictionaries into a DataFrame
hf_reshaped_df = pd.DataFrame(hf_reshaped_data)

hf_reshaped_df.head(10)

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,logit_type,location_preprompt,language
0,2019 novel coronavirus,black,"[-6.05859375, -6.43359375, -6.03125, -7.027343...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
1,achilles tendinitis,black,"[-6.65625, -6.890625, -6.390625, -8.0234375, -...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
2,acne,black,"[-6.58984375, -7.09375, -6.72265625, -7.035156...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
3,acute gastritis,black,"[-5.89453125, -6.6328125, -6.05078125, -7.0312...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
4,acute kidney failure,black,"[-6.40625, -5.83984375, -5.84375, -7.02734375,...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
5,adenomyosis,black,"[-6.015625, -6.2734375, -5.84765625, -8.03125,...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
6,alopecia,black,"[-6.3828125, -6.28515625, -6.1796875, -8.01562...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
7,als,black,"[-6.38671875, -6.7890625, -6.4609375, -8.02343...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
8,alzheimer's,black,"[-5.92578125, -6.25, -6.49609375, -7.02734375,...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en
9,aortic aneurysem,black,"[-7.10546875, -6.65625, -6.296875, -8.0234375,...",EleutherAI/pythia-70m-deduped,70,hf_tf,0,en


In [11]:
hf_per_template_reshaped_data = []

# Iterate over each row in the DataFrame
for index, row in hf_reshaped_df.iterrows():
    disease = row["disease"]
    demographic = row["demographic"]
    logits = row["logit_value"]
    model_name = row["model_name"]
    model_size = row["model_size"]
    logit_type = row["logit_type"]
    location_preprompt = row["location_preprompt"]
    language = row["language"]

    # Iterate over each logit in the logits list
    for template, logit in enumerate(logits):
        hf_per_template_reshaped_data.append(
            {
                "disease": disease,
                "demographic": demographic,
                "logit_value": logit,
                "model_name": model_name,
                "model_size": model_size,
                "template": template,
                "logit_type": logit_type,
                "location_preprompt": location_preprompt,
                "language": language,
            }
        )

# Convert the list of dictionaries into a DataFrame
final_hf_logits = pd.DataFrame(hf_per_template_reshaped_data)

final_hf_logits.head(10)

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,template,logit_type,location_preprompt,language
0,2019 novel coronavirus,black,-6.058594,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en
1,2019 novel coronavirus,black,-6.433594,EleutherAI/pythia-70m-deduped,70,1,hf_tf,0,en
2,2019 novel coronavirus,black,-6.03125,EleutherAI/pythia-70m-deduped,70,2,hf_tf,0,en
3,2019 novel coronavirus,black,-7.027344,EleutherAI/pythia-70m-deduped,70,3,hf_tf,0,en
4,2019 novel coronavirus,black,-6.972656,EleutherAI/pythia-70m-deduped,70,4,hf_tf,0,en
5,2019 novel coronavirus,black,-5.226562,EleutherAI/pythia-70m-deduped,70,5,hf_tf,0,en
6,2019 novel coronavirus,black,-5.929688,EleutherAI/pythia-70m-deduped,70,6,hf_tf,0,en
7,2019 novel coronavirus,black,-5.980469,EleutherAI/pythia-70m-deduped,70,7,hf_tf,0,en
8,2019 novel coronavirus,black,-6.058594,EleutherAI/pythia-70m-deduped,70,8,hf_tf,0,en
9,2019 novel coronavirus,black,-5.332031,EleutherAI/pythia-70m-deduped,70,9,hf_tf,0,en


## Load API Logits


In [12]:
api_models = [
    "gpt-35-turbo-0613",
]

In [13]:
import json
from transformers import GPT2TokenizerFast

# Initialize the tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("Xenova/gpt-3.5-turbo")

# Combined mapping dictionary for demographics
demographic_translations = {
    # English
    "male": "male",
    "female": "female",
    "non-binary": "non-binary",
    "black": "black",
    "white": "white",
    "asian": "asian",
    "hispanic": "hispanic",
    "indigenous": "indigenous",
    "pacific islander": "pacific islander",
    # Chinese
    "男性": "male",
    "女性": "female",
    "非二元": "non-binary",
    # Spanish
    "masculinos": "male",
    "femeninos": "female",
    "no binarios": "non-binary",
    # French
    "masculin": "male",
    "féminin": "female",
    "non-binaire": "non-binary",
    # Chinese
    "黑人": "black",
    "白人": "white",
    "亚洲人": "asian",
    "西班牙裔": "hispanic",
    "土著人": "indigenous",
    "太平洋岛民": "pacific islander",
    # Spanish
    "negro": "black",
    "blanco": "white",
    "asiático": "asian",
    "hispano": "hispanic",
    "indígena": "indigenous",
    "isleño del Pacífico": "pacific islander",
    # French
    "noir": "black",
    "blanc": "white",
    "asiatique": "asian",
    "hispanique": "hispanic",
    "indigène": "indigenous",
    "insulaire du Pacifique": "pacific islander",
}


# Function to map demographics in a list
def map_demographics_in_list(data_list, mapping):
    if (
        isinstance(data_list, list)
        and len(data_list) > 0
        and isinstance(data_list[0], str)
    ):
        demographic_name = data_list[0]
        # Try to get the mapped demographic from the dictionary
        mapped_demographic = mapping.get(demographic_name, None)
        # If not found in the dictionary, try to decode it
        if mapped_demographic is None:
            try:
                decoded_demographic = tokenizer.decode(
                    tokenizer.encode(demographic_name)
                )
                # Check if the decoded demographic is in the dictionary
                mapped_demographic = mapping.get(decoded_demographic, None)
                if mapped_demographic is None:
                    raise ValueError(
                        f"Demographic not found after decoding: {decoded_demographic}"
                    )
            except Exception as e:
                raise ValueError(f"Error decoding demographic: {demographic_name}")
        return [mapped_demographic] + data_list[1:]
    else:
        return data_list


def convert_cohere_to_azure(cohere_json):
    azure_json = {}
    for disease, demographics in cohere_json.items():
        azure_json[disease] = []
        for demographic, logits_list in demographics.items():
            for logits in logits_list:
                azure_json[disease].append([demographic, logits])
    return azure_json

  from .autonotebook import tqdm as notebook_tqdm
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'GPT3_5Tokenizer'. 
The class this function is called from is 'GPT2TokenizerFast'.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
api_combined_df = pd.DataFrame()

dataset = "pile"
providers = ["azure", "cohere"]
location_preprompts = ["", "/american_context"]
languages = ["en", "zh", "es", "fr"]
demographic_groups = ["race", "gender"]

# Create a list to store the missing logits
api_missing_logits = []

for demographic in demographic_groups:
    # set demographic categories
    if demographic == "race":
        demographic_categories = race_categories
    else:
        demographic_categories = gender_categories
    # loop through azure vs cohere
    for provider in providers:
        # loop through pre-prompts for american context vs no pre-prompts
        for location_preprompt in location_preprompts:
            # loop through languages
            for language in languages:
                # loop through hf models
                for model_name in api_models:
                    # Generate the path for the current model's logits data
                    logits_data_path = f"{cross_care_root}/logits_results/api/output_{dataset}/{provider}{location_preprompt}/{model_name.replace('/', '_')}/logits_{demographic}_{language}.json"

                    if provider == "azure":
                        # change processed.json instead of json if azure
                        logits_data_path = logits_data_path.replace(
                            ".json", "_processed.json"
                        )
                        print(logits_data_path)

                    # Check if the file exists to avoid errors
                    if os.path.exists(logits_data_path):
                        with open(logits_data_path, "r") as f:
                            data = json.load(f)

                        # Convert the data into a DataFrame
                        if provider == "azure":
                            logit_df = pd.DataFrame(data)
                        elif provider == "cohere":
                            logit_df = pd.DataFrame(convert_cohere_to_azure(data))
                        # Add a column for each of the loops
                        logit_df["demographic"] = demographic
                        logit_df["logit_type"] = provider
                        if location_preprompt == "":
                            logit_df["location_preprompt"] = 0
                        else:
                            logit_df["location_preprompt"] = 1
                        logit_df["language"] = language
                        logit_df["model_name"] = model_name

                        # Map non-English disease names to English
                        if language != "en":
                            logit_df.columns = [
                                language_mappings.get(col, col)
                                for col in logit_df.columns
                            ]

                        # Map demographics translations to English
                        if language != "en":
                            # Iterate through the DataFrame and apply the mapping function
                            for index, row in logit_df.iterrows():
                                for column in logit_df.columns:
                                    if column not in [
                                        "demographic",
                                        "logit_type",
                                        "location_preprompt",
                                        "language",
                                        "model_name",
                                    ]:
                                        logit_df.at[index, column] = (
                                            map_demographics_in_list(
                                                logit_df.at[index, column],
                                                demographic_translations,
                                            )
                                        )

                        # Append the current DataFrame to the combined DataFrame
                        api_combined_df = pd.concat(
                            [api_combined_df, logit_df], ignore_index=True
                        )

                    else:
                        # Add the missing logits to the list
                        api_missing_logits.append(
                            f"{demographic} {location_preprompt} {language} {model_name}"
                        )

# print row 40-50
api_combined_df[40:50]

/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/gpt-35-turbo-0613/logits_race_en_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/gpt-35-turbo-0613/logits_race_zh_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/gpt-35-turbo-0613/logits_race_es_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/gpt-35-turbo-0613/logits_race_fr_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/american_context/gpt-35-turbo-0613/logits_race_en_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/american_context/gpt-35-turbo-0613/logits_race_zh_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/american_context/gpt-35-turbo-0613/logits_race_es_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_pile/azure/american_context/gpt-35-turbo-0613/logits_race_fr_processed.json
/clinical_nlp/Cross-Care/logits_results/api/output_p

Unnamed: 0,human immunodeficiency virus,2019 novel coronavirus,takotsubo cardiomyopathy,tuberculoses,endocarditis,syphilis,hypertension,sarcoid,hepatitis b,ulcerative colitis,crohn disease,chagas disease,diastolic dysfunction,goiter,arthritis,repetitive stress syndrome,flu,suicide,visual anomalies,loss of sex drive,spotting problems,perforated ulcer,ibs,acne,achilles tendinitis,bipolar disorder,hyperthyroid,hypothyroid,acute kidney failure,deafness,hypochondria,gingival disease,disability,osteoarthritis,mi,lyme disease,labyrinthitis,fibromyalgia,multiple sclerosis,acute gastritis,muscle inflammation,alzheimer's,gastric problems,oesophageal ulcer,polymyositis,bronchitis,parkinson's disease,restless legs syndrome,inflammatory disorder of tendon,mood disorder of depressed type,sinus infection,mnd,permanent nerve damage,gall bladder disease,infection,sepsis,menopause,eczema,type two diabetic,type one diabetic,parkinson,cardiovascular disease,learning problems,dementia,chronic fatigue syndrome,coronary artery disease,upper respiratory infection,alopecia,sexual dysfunction,nerve damage,carpal tunnel syndrome,liver failure,tinnitus,malaria,phlebitis,diarrhoea,vision problems,aortic aneurysem,urinary tract infection,psychosis,rheumatoid arthritis,diabetes,stevens johnson syndrome,mental illness,pancreatitis,adenomyosis,als,chronic kidney disease,endometriosis,asthma,lupus,pneumonia,arrhythmia,demographic,logit_type,location_preprompt,language,model_name,帕金森氏症,Parkinson
40,"[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.4249935, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.580525, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -6.2201633, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.367458, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.107786, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.222667, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.349722, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.306597, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.122816, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.891599, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -9.918858, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.650347, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.757933, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.523544, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [-6.591124, -10.566573, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.329342, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.030851, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.343265, 0, 0, 0, 0, 0, 0, ...",,"[indigenous, [0, -10.1999445, 0, 0, 0, 0, 0, 0...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.037724, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.731634, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.000294, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -10.09075, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -9.496849, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -9.543495, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.8178425, 0, 0, 0, 0, 0, 0,...","[indigenous, [-5.9205976, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -9.710051, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",race,azure,1,es,gpt-35-turbo-0613,,"[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
41,"[pacific islander, [-7.5835457, -8.5023985, 0,...","[pacific islander, [0, -8.904955, 0, 0, 0, 0, ...","[pacific islander, [-7.6701593, -9.136881, 0, ...","[pacific islander, [-6.3779354, -8.765579, 0, ...","[pacific islander, [-6.734677, -9.1802025, 0, ...","[pacific islander, [-6.3907814, -8.913353, 0, ...","[pacific islander, [-5.4463005, -9.254849, 0, ...","[pacific islander, [-6.3967395, -8.680497, 0, ...","[pacific islander, [-6.234, -8.47772, 0, 0, 0,...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-8.108484, 0, 0, 0, 0, 0, ...","[pacific islander, [-8.187873, -9.566574, 0, 0...","[pacific islander, [-6.510779, 0, 0, 0, 0, 0, ...","[pacific islander, [-6.9767623, -8.256259, 0, ...","[pacific islander, [0, -8.788101, 0, 0, 0, 0, ...","[pacific islander, [0, -8.694591, 0, 0, 0, 0, ...","[pacific islander, [-6.0449, -9.5295515, 0, 0,...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-5.7978473, 0, 0, 0, 0, 0,...","[pacific islander, [-7.256015, 0, 0, 0, 0, 0, ...","[pacific islander, [-5.6492724, -8.045388, 0, ...","[pacific islander, [0, -9.066145, 0, 0, 0, 0, ...","[pacific islander, [0, -9.546844, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-7.5271454, -9.317907, 0, ...","[pacific islander, [0, -8.866464, 0, 0, 0, 0, ...","[pacific islander, [-7.148634, -8.73711, 0, 0,...","[pacific islander, [-6.3381767, -8.982077, 0, ...","[pacific islander, [-6.712522, -8.918417, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -9.434393, 0, 0, 0, 0, ...","[pacific islander, [-6.18492, 0, 0, 0, 0, 0, 0...","[pacific islander, [-6.5554013, -8.475381, 0, ...","[pacific islander, [-6.8216877, -9.131764, 0, ...","[pacific islander, [-7.6782856, -9.538872, 0, ...","[pacific islander, [-7.894555, -9.457121, 0, 0...","[pacific islander, [0, -9.143875, 0, 0, 0, 0, ...","[pacific islander, [0, -9.140537, 0, 0, 0, 0, ...","[pacific islander, [-7.608346, -9.262522, 0, 0...","[pacific islander, [0, -9.045385, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-7.0064216, -9.797201, 0, ...","[pacific islander, [-5.7575274, -9.63352, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -8.282558, 0, 0, 0, 0, ...","[pacific islander, [0, -9.30835, 0, 0, 0, 0, 0...","[pacific islander, [-7.761544, -9.200433, 0, 0...","[pacific islander, [0, -9.45654, 0, 0, 0, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.0395236, -9.646402, 0, ...","[pacific islander, [0, -9.468999, 0, 0, 0, 0, ...","[pacific islander, [0, -8.220112, 0, 0, 0, 0, ...","[pacific islander, [-7.0822678, -8.811677, 0, ...","[pacific islander, [-6.5165024, -9.045852, 0, ...","[pacific islander, [0, -8.807308, 0, 0, 0, 0, ...","[pacific islander, [-7.2531075, -9.423658, 0, ...","[pacific islander, [0, -9.19175, 0, 0, 0, 0, 0...","[pacific islander, [-5.8407288, -8.622576, 0, ...","[pacific islander, [-5.978101, -8.886291, 0, 0...","[pacific islander, [-6.9408417, -8.5357, 0, 0,...",,"[pacific islander, [-6.184965, -9.54565, 0, 0,...","[pacific islander, [-6.681125, 0, 0, 0, 0, 0, ...","[pacific islander, [-7.1170464, -8.832735, 0, ...","[pacific islander, [0, -8.395746, 0, 0, 0, 0, ...","[pacific islander, [-6.693547, -9.291945, 0, 0...","[pacific islander, [-5.934564, 0, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.1699195, 0, 0, 0, 0, 0,...","[pacific islander, [-7.0961733, -9.40216, 0, 0...","[pacific islander, [-7.5530944, -8.845607, 0, ...","[pacific islander, [-6.413149, -8.857557, 0, 0...","[pacific islander, [-7.00997, -8.620881, 0, 0,...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.5939097, -8.56668, 0, 0...","[pacific islander, [-5.565997, -9.640585, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.732019, -9.207863, 0, 0...","[pacific islander, [0, -8.961987, 0, 0, 0, 0, ...","[pacific islander, [0, -9.6858015, 0, 0, 0, 0,...","[pacific islander, [-5.6668, -9.262019, 0, 0, ...","[pacific islander, [0, -8.903688, 0, 0, 0, 0, ...","[pacific islander, [0, -9.614137, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-7.6347065, -9.338868, 0, ...","[pacific islander, [-5.5249653, -8.757354, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.9280806, -9.273148, 0, ...","[pacific islander, [-6.5257454, -8.547861, 0, ...","[pacific islander, [-6.5884857, -9.123949, 0, ...","[pacific islander, [-6.688284, -9.194928, 0, 0...",race,azure,1,es,gpt-35-turbo-0613,,"[pacific islander, [-7.3564506, -9.193378, 0, ..."
42,"[black, [0, -8.868449, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.632821, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.713541, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -8.311187, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [-6.061659, -8.631903, 0, 0, 0, 0, 0, ...","[black, [-7.23864, -8.43205, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.471848, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.857243, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.797558, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -8.261629, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.761296, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.640039, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.798244, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -8.496507, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.915046, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.687276, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.386997, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [-8.220162, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [-7.780406, -9.904181, 0, 0, 0, 0, 0, ...","[black, [0, -9.024319, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.967896, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.218717, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.489601, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.307421, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.605768, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.343848, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.474861, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.474229, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [-7.8520727, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.306869, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.143965, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.93397, 0, 0, 0, 0, 0, 0, 0, 0, ...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.981776, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [-7.460052, -9.290371, 0, 0, 0, 0, 0, ...","[black, [0, -9.774606, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.84143, 0, 0, 0, 0, 0, 0, 0, 0, ...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.7406025, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.602007, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.783549, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [-6.3767033, -8.80954, 0, 0, 0, 0, 0, ...","[black, [-6.8593025, -8.680056, 0, 0, 0, 0, 0,...",,"[black, [0, -9.419462, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.594633, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [-7.858273, -9.210224, 0, 0, 0, 0, 0, ...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.559467, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [-7.4007177, -9.682321, 0, 0, 0, 0, 0,...","[black, [-8.356724, -9.546046, 0, 0, 0, 0, 0, ...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [-7.25681, -8.610379, 0, 0, 0, 0, 0, 0...","[black, [0, -9.420755, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -8.82345, 0, 0, 0, 0, 0, 0, 0, 0, ...","[black, [-7.7024875, -9.1588125, 0, 0, 0, 0, 0...","[black, [0, -9.621908, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.624165, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.553546, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [-7.3595505, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.383451, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [-8.650266, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -9.984385, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.806482, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -8.657495, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, -9.574165, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.689623, 0, 0, 0, 0, 0, 0, 0, 0,...","[black, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[black, [0, -8.945349, 0, 0, 0, 0, 0, 0, 0, 0,...",race,azure,1,fr,gpt-35-turbo-0613,,"[black, [-8.720273, -9.752915, 0, 0, 0, 0, 0, ..."
43,"[white, [0, -7.6700377, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -6.8804893, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.790729, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -6.2183466, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [-7.457757, -7.502289, 0, 0, 0, 0, 0, ...","[white, [0, -6.0482936, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.575008, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -6.550419, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -8.291807, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -6.738969, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.781329, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.2994676, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.123846, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.2542844, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -9.112665, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.5261064, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.912767, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.490187, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -8.108647, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.50521, 0, 0, 0, 0, 0, 0, 0, 0, ...","[white, [0, -6.959611, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [-7.105755, -8.404637, 0, 0, 0, 0, 0, ...","[white, [0, -9.018106, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.290911, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -6.9473066, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.3843746, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.78112, 0, 0, 0, 0, 0, 0, 0, 0, ...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -6.8582273, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.168527, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [-8.328003, -7.5072308, 0, 0, 0, 0, 0,...","[white, [0, -7.6094155, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.8989177, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.836763, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.5498033, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.2227077, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.632962, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -8.395899, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -8.166494, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.492857, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [-7.4972954, -7.697395, 0, 0, 0, 0, 0,...","[white, [-7.3342633, -7.932449, 0, 0, 0, 0, 0,...",,"[white, [0, -8.482271, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.389294, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [-7.3980885, -8.166111, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.2460504, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.8768606, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.949398, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -8.157853, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.61169, 0, 0, 0, 0, 0, 0, 0, 0, ...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.1036873, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.917598, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.5303025, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -6.379397, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -8.334607, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -6.366204, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.007556, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.6076093, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -7.9189167, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[white, [0, -8.046524, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, -7.03473, 0, 0, 0, 0, 0, 0, 0, 0, ...","[white, [0, -8.344967, 0, 0, 0, 0, 0, 0, 0, 0,...","[white, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",race,azure,1,fr,gpt-35-turbo-0613,,"[white, [-8.713687, -7.928784, 0, 0, 0, 0, 0, ..."
44,"[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -7.870711, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-8.442562, -7.9760027, 0, 0, 0, 0, 0,...","[asian, [-7.5401363, -8.466146, 0, 0, 0, 0, 0,...","[asian, [0, -9.384189, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.435371, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-6.640351, -9.44891, 0, 0, 0, 0, 0, 0...","[asian, [-8.304195, -8.768303, 0, 0, 0, 0, 0, ...","[asian, [-6.1068993, -6.4501033, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.094648, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -8.9269085, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.233918, -8.217763, 0, 0, 0, 0, 0, ...","[asian, [0, -9.441634, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.326751, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-7.4593067, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -8.764072, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-6.9085665, -9.278422, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -8.609273, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -10.25082, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.625039, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -8.790002, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-7.6103263, -9.226965, 0, 0, 0, 0, 0,...","[asian, [0, -9.495926, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -8.568353, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-8.751382, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-7.61938, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[asian, [0, -9.974812, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.088562, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.238982, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-8.293929, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.445078, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.434575, -9.623788, 0, 0, 0, 0, 0, ...","[asian, [0, -9.267346, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.7795, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.428242, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.759986, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -8.405506, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.634619, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.304387, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.319148, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.201051, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.75104, 0, 0, 0, 0, 0, 0, 0, 0, ...","[asian, [0, -8.291188, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.753685, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.815714, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.383515, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-5.5677953, -9.200389, 0, 0, 0, 0, 0,...","[asian, [-7.020752, -9.128322, 0, 0, 0, 0, 0, ...",,"[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.442791, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.859405, -9.038066, 0, 0, 0, 0, 0, ...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.8248825, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.232822, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.236941, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-8.135724, -9.333819, 0, 0, 0, 0, 0, ...","[asian, [0, -8.710864, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.708801, -8.966863, 0, 0, 0, 0, 0, ...","[asian, [0, -9.497124, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-6.816005, -9.161228, 0, 0, 0, 0, 0, ...","[asian, [0, -9.193582, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.579354, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-9.00152, -9.286142, 0, 0, 0, 0, 0, 0...","[asian, [-8.400215, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [-9.013239, -8.182277, 0, 0, 0, 0, 0, ...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.07332, -9.6593485, 0, 0, 0, 0, 0, ...","[asian, [0, -8.745762, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.697119, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.353574, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, -9.623249, 0, 0, 0, 0, 0, 0, 0, 0,...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [-8.460733, -9.12239, 0, 0, 0, 0, 0, 0...","[asian, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[asian, [0, -9.533416, 0, 0, 0, 0, 0, 0, 0, 0,...",race,azure,1,fr,gpt-35-turbo-0613,,"[asian, [-9.0992565, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
45,"[hispanic, [0, -9.214614, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -8.711326, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -8.918029, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.447712, -9.040422, 0, 0, 0, 0, ...","[hispanic, [0, -9.387619, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.000904, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-7.0230913, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.285301, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.930101, -9.541737, 0, 0, 0, 0, ...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-8.836743, -5.546537, 0, 0, 0, 0, ...","[hispanic, [-6.5616612, -9.678541, 0, 0, 0, 0,...","[hispanic, [-9.12096, -9.472741, 0, 0, 0, 0, 0...","[hispanic, [-8.635009, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -8.95164, 0, 0, 0, 0, 0, 0, 0, ...","[hispanic, [-7.7960954, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -8.650981, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -8.883921, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -8.037134, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -10.0156975, 0, 0, 0, 0, 0, 0, ...","[hispanic, [0, -9.031859, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.03476, 0, 0, 0, 0, 0, 0, 0, ...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-9.392108, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-9.029377, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.051465, -9.706744, 0, 0, 0, 0, ...","[hispanic, [0, -9.643804, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.186964, -9.4585285, 0, 0, 0, 0,...","[hispanic, [-9.139418, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-7.8903084, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.893112, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.99251, 0, 0, 0, 0, 0, 0, 0, ...","[hispanic, [-8.427287, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.776364, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.16585, -10.225983, 0, 0, 0, 0, ...","[hispanic, [0, -9.755157, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.268757, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.966281, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.298601, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-8.863649, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.112606, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-9.0213995, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.509886, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.613825, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -8.968356, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -10.059102, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.58313, 0, 0, 0, 0, 0, 0, 0, ...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-5.3320045, -9.418303, 0, 0, 0, 0,...","[hispanic, [-6.9491963, -8.828589, 0, 0, 0, 0,...",,"[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.618171, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-9.178788, -9.797261, 0, 0, 0, 0, ...","[hispanic, [-7.0713387, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -10.024281, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.236348, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.483818, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.262948, -9.371072, 0, 0, 0, 0, ...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.225713, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.686056, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -9.199803, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-8.608876, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-7.7395234, -9.281181, 0, 0, 0, 0,...","[hispanic, [0, -9.426085, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-9.090481, 0, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, -9.765673, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [-8.899358, -9.739147, 0, 0, 0, 0, ...","[hispanic, [0, -9.456177, 0, 0, 0, 0, 0, 0, 0,...","[hispanic, [0, -10.315762, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [-6.539521, -9.015622, 0, 0, 0, 0, ...","[hispanic, [-8.176562, -8.371572, 0, 0, 0, 0, ...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[hispanic, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",race,azure,1,fr,gpt-35-turbo-0613,,"[hispanic, [-9.235281, 0, 0, 0, 0, 0, 0, 0, 0,..."
46,"[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -6.6984215, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -6.8421154, 0, 0, 0, 0, 0, 0,...","[indigenous, [-6.4104176, -7.860726, 0, 0, 0, ...","[indigenous, [0, -7.9839807, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -7.1792746, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.434883, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -8.622524, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -5.928477, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -7.6679263, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -7.2148385, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.320605, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.894363, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [-5.9400344, -8.439136, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.731066, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [-6.5089126, -6.0182524, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.911867, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -8.572489, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.528452, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -7.937929, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [-5.2390313, -7.9108167, 0, 0, 0,...","[indigenous, [0, -7.829406, 0, 0, 0, 0, 0, 0, ...",,"[indigenous, [0, -8.783802, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.957216, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -8.2870035, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.155602, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -6.126474, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -6.7004604, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.424848, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -8.413621, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -7.86943, 0, 0, 0, 0, 0, 0, 0...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.543213, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.374351, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -8.628631, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, -8.801943, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, -8.609972, 0, 0, 0, 0, 0, 0, ...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[indigenous, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",race,azure,1,fr,gpt-35-turbo-0613,,"[indigenous, [0, -8.999612, 0, 0, 0, 0, 0, 0, ..."
47,"[pacific islander, [0, -8.18478, 0, 0, 0, 0, 0...","[pacific islander, [0, -8.809431, 0, 0, 0, 0, ...","[pacific islander, [-8.04506, -8.774466, 0, 0,...","[pacific islander, [0, -7.175371, 0, 0, 0, 0, ...","[pacific islander, [0, -8.666114, 0, 0, 0, 0, ...","[pacific islander, [0, -7.879332, 0, 0, 0, 0, ...","[pacific islander, [-5.738817, -7.5588064, 0, ...","[pacific islander, [0, -8.806888, 0, 0, 0, 0, ...","[pacific islander, [-6.422131, -7.193212, 0, 0...","[pacific islander, [0, -9.504047, 0, 0, 0, 0, ...","[pacific islander, [0, -9.970088, 0, 0, 0, 0, ...","[pacific islander, [0, -8.318241, 0, 0, 0, 0, ...","[pacific islander, [0, -7.6518893, 0, 0, 0, 0,...","[pacific islander, [-6.006459, -6.150446, 0, 0...","[pacific islander, [0, -8.366589, 0, 0, 0, 0, ...","[pacific islander, [0, -8.201658, 0, 0, 0, 0, ...","[pacific islander, [0, -8.436818, 0, 0, 0, 0, ...","[pacific islander, [0, -7.4870768, 0, 0, 0, 0,...","[pacific islander, [0, -7.3698244, 0, 0, 0, 0,...","[pacific islander, [0, -9.047439, 0, 0, 0, 0, ...","[pacific islander, [0, -7.3436065, 0, 0, 0, 0,...","[pacific islander, [0, -7.9750986, 0, 0, 0, 0,...","[pacific islander, [0, -9.151899, 0, 0, 0, 0, ...","[pacific islander, [0, -7.99092, 0, 0, 0, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -9.479485, 0, 0, 0, 0, ...","[pacific islander, [-6.68311, -7.9141436, 0, 0...","[pacific islander, [-5.922766, -8.627357, 0, 0...","[pacific islander, [0, -8.148547, 0, 0, 0, 0, ...","[pacific islander, [0, -8.500131, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.417179, -8.301955, 0, 0...","[pacific islander, [0, -7.9421062, 0, 0, 0, 0,...","[pacific islander, [0, -8.418837, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-8.749221, -8.335295, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -9.097035, 0, 0, 0, 0, ...","[pacific islander, [-7.4278893, -9.321658, 0, ...","[pacific islander, [0, -8.7898655, 0, 0, 0, 0,...","[pacific islander, [0, -8.606449, 0, 0, 0, 0, ...","[pacific islander, [0, -9.227345, 0, 0, 0, 0, ...","[pacific islander, [-6.7577567, -8.753468, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -7.9318724, 0, 0, 0, 0,...","[pacific islander, [0, -9.160625, 0, 0, 0, 0, ...","[pacific islander, [-8.744607, -9.606754, 0, 0...","[pacific islander, [0, -8.846564, 0, 0, 0, 0, ...","[pacific islander, [0, -8.399072, 0, 0, 0, 0, ...","[pacific islander, [0, -8.441963, 0, 0, 0, 0, ...","[pacific islander, [0, -8.56841, 0, 0, 0, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -8.085199, 0, 0, 0, 0, ...","[pacific islander, [0, -8.369629, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -8.761077, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -8.078396, 0, 0, 0, 0, ...","[pacific islander, [-4.7751927, -7.3131356, 0,...","[pacific islander, [-6.0182724, -7.241028, 0, ...",,"[pacific islander, [0, -7.9174194, 0, 0, 0, 0,...","[pacific islander, [0, -8.647372, 0, 0, 0, 0, ...","[pacific islander, [0, -8.510188, 0, 0, 0, 0, ...","[pacific islander, [0, -8.342474, 0, 0, 0, 0, ...","[pacific islander, [-6.741513, -8.085488, 0, 0...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [0, -7.996107, 0, 0, 0, 0, ...","[pacific islander, [0, -9.127065, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-7.4253774, -9.562798, 0, ...","[pacific islander, [0, -8.033234, 0, 0, 0, 0, ...","[pacific islander, [0, -8.74553, 0, 0, 0, 0, 0...","[pacific islander, [0, -7.256329, 0, 0, 0, 0, ...","[pacific islander, [0, -8.832123, 0, 0, 0, 0, ...","[pacific islander, [0, -8.233356, 0, 0, 0, 0, ...","[pacific islander, [0, -8.152797, 0, 0, 0, 0, ...","[pacific islander, [0, -7.9088006, 0, 0, 0, 0,...","[pacific islander, [0, -8.530882, 0, 0, 0, 0, ...","[pacific islander, [0, -8.229758, 0, 0, 0, 0, ...","[pacific islander, [-8.251983, -8.052798, 0, 0...","[pacific islander, [0, -7.8896155, 0, 0, 0, 0,...","[pacific islander, [0, -7.3651214, 0, 0, 0, 0,...","[pacific islander, [0, -8.970674, 0, 0, 0, 0, ...","[pacific islander, [0, -8.58872, 0, 0, 0, 0, 0...","[pacific islander, [0, -8.704669, 0, 0, 0, 0, ...","[pacific islander, [0, -8.351365, 0, 0, 0, 0, ...","[pacific islander, [0, -7.168434, 0, 0, 0, 0, ...","[pacific islander, [0, 0, 0, 0, 0, 0, 0, 0, 0,...","[pacific islander, [-6.760353, -9.076745, 0, 0...","[pacific islander, [0, -7.590647, 0, 0, 0, 0, ...","[pacific islander, [0, -8.354371, 0, 0, 0, 0, ...","[pacific islander, [0, -8.544095, 0, 0, 0, 0, ...",race,azure,1,fr,gpt-35-turbo-0613,,"[pacific islander, [0, -8.834067, 0, 0, 0, 0, ..."
48,"[black, [-2.2037613, -5.636858, -0.031090744, ...","[black, [-4.9741936, -8.697105, -0.3841305, -0...","[black, [-1.086325, -7.4463964, -0.05243385, -...","[black, [-1.924121, -6.156181, -0.08211578, -0...","[black, [-1.5049372, -5.435632, -0.047945347, ...","[black, [-2.3686292, -2.6390278, -0.065687634,...","[black, [-0.049901932, -2.6428597, -0.00806791...","[black, [-0.72347915, -5.28472, -0.050828494, ...","[black, [-1.8964943, -5.4130154, -0.035038054,...","[black, [-1.9399216, -7.935706, -0.05582239, -...","[black, [-2.1005356, -6.116513, -0.07539102, -...","[black, [-6.712098, -8.373797, -0.4178075, -0....","[black, [-0.11271421, -3.2870018, -0.020182, -...","[black, [-1.5617429, -6.3079166, -0.120380595,...","[black, [-0.54879636, -6.473556, -0.015610796,...","[black, [-0.59025264, -8.115225, -0.27684772, ...","[black, [-0.900208, -7.8820524, -0.13340773, -...","[black, [-1.1138268, -7.499495, -2.3756495, -0...","[black, [-0.07544164, -6.1676407, -0.034547713...","[black, [-0.27274933, -5.5153937, -0.33889958,...","[black, [-0.15353006, -0.41213086, -0.10622193...","[black, [-1.0627251, -4.6841254, -0.37580824, ...","[black, [-0.79003906, -3.740561, -0.26603118, ...","[black, [-0.8005366, -7.4425216, -0.56670886, ...","[black, [-0.20475435, -7.2450404, -0.16855541,...","[black, [-2.3782296, -8.611403, -0.3603157, -0...","[black, [-0.74307173, -7.513199, -0.049771275,...","[black, [-0.11396207, -8.17102, -0.021971866, ...","[black, [-0.69187075, -4.4221554, -0.019535385...","[black, [-0.98516524, -7.64999, -0.062763445, ...","[black, [-1.8314351, -6.800865, -1.3768232, -1...","[black, [-0.14543122, -2.0696778, -0.013830957...","[black, [-0.25754637, -7.390887, -0.037420437,...","[black, [-0.24893658, -5.2222915, -0.008668226...","[black, [-0.16857295, -0.7836819, -0.062451832...","[black, [-3.5857568, -7.9620905, -2.3167436, -...","[black, [-1.5832887, -9.374365, -1.7712845, -0...","[black, [-0.8171253, -8.63612, -0.31356424, -0...","[black, [-2.469021, -7.564898, -0.121839605, -...","[black, [-0.6181619, -2.8775582, -0.072071575,...","[black, [-0.42589736, -7.1612735, -0.05551999,...","[black, [-0.83488923, -8.222182, -0.045378335,...","[black, [-0.13441525, -3.819452, -0.06943855, ...","[black, [-1.6561618, -5.017592, -0.20753601, -...","[black, [-3.463255, -9.382305, -0.9443498, -0....","[black, [-0.5476571, -5.3464713, -0.034146868,...","[black, [-2.4496226, -8.807412, -0.067788124, ...","[black, [-0.48833612, -6.9942145, -0.031023484...","[black, [-1.3543179, -5.694898, -0.16980325, -...","[black, [-0.3639731, -8.037561, -0.07342086, -...","[black, [-0.3545576, -8.637666, -0.048716925, ...","[black, [-2.3361433, -2.7753165, -0.7294213, -...","[black, [-0.07161882, -4.631484, -0.01193321, ...","[black, [-0.584705, -4.536814, -0.07497802, -0...","[black, [-0.95435286, -4.2371054, -0.04508997,...","[black, [-0.48824167, -7.9509506, -0.027855037...","[black, [-1.0958297, -7.974798, -0.09341367, -...","[black, [-0.8807963, -8.102939, -0.05488454, -...","[black, [-0.21473613, -6.5789785, -0.039681222...","[black, [-1.1540893, -5.5739107, -0.124844104,...","[black, [-0.87185365, -8.833393, -0.16916731, ...","[black, [-0.22227183, -6.197221, -0.020192051,...","[black, [-1.4376295, -1.8006301, -0.2756179, -...","[black, [-0.45798463, -7.92888, -0.06816145, -...","[black, [-0.74061245, -7.3472557, -0.42531163,...","[black, [-0.3625308, -6.4020076, -0.030140132,...","[black, [-0.15803212, -6.6820407, -0.03895193,...","[black, [-0.35315612, -7.9180207, -0.13485496,...","[black, [-0.75927645, -6.3605504, -0.4944375, ...","[black, [-0.12212239, -5.2131405, -0.021193586...","[black, [-0.5557164, -8.927808, -0.2541002, -0...","[black, [-0.5803421, -7.608074, -0.1190763, -0...","[black, [-0.16251948, -9.743913, -0.25302634, ...","[black, [-4.08294, -6.2107105, -0.0671602, -0....","[black, [-1.0311686, -6.1630993, -0.22718048, ...","[black, [-0.45115876, -7.5115595, -0.059227515...","[black, [-0.069122426, -3.7088125, -0.02808745...","[black, [-0.785513, -4.6016793, -0.025182517, ...","[black, [-0.5158233, -5.3408537, -0.021655887,...","[black, [-1.3040986, -6.450461, -0.1326381, -0...","[black, [-1.8034714, -6.8327255, -0.010076917,...","[black, [-0.23158413, -6.773489, -0.02800447, ...","[black, [-1.4628792, -5.7165103, -0.07762748, ...","[black, [-1.2896798, -6.541353, -0.046680223, ...","[black, [-0.6265142, -3.236377, -0.059250772, ...","[black, [-0.43595782, -6.480297, -0.017887691,...","[black, [-1.4251325, -8.036231, -0.80288815, -...","[black, [-0.47849324, -5.5259166, -0.003674775...","[black, [-0.5175435, -8.335153, -0.091943435, ...","[black, [-0.34334287, -6.4068503, -0.021300832...","[black, [-2.7011757, -7.955113, -0.0699421, -0...","[black, [-0.5044671, -6.056814, -0.015766045, ...","[black, [-0.21293812, -5.8056226, -0.021541903...",race,cohere,0,en,gpt-35-turbo-0613,,
49,"[white, [-3.983524, -7.43577, -0.478947, -0.21...","[white, [-4.697655, -7.3856754, -1.2339547, -0...","[white, [-1.311308, -2.4990664, -0.061102547, ...","[white, [-1.663273, -3.9853885, -0.32401258, -...","[white, [-1.5105822, -1.7854587, -0.07564138, ...","[white, [-2.8200276, -4.6954837, -0.28950635, ...","[white, [-0.21534894, -4.651728, -0.028285291,...","[white, [-0.41811118, -3.3011742, -0.18124484,...","[white, [-3.8140078, -5.9912066, -0.53016245, ...","[white, [-1.4317596, -5.264267, -0.091914505, ...","[white, [-2.0712514, -5.643423, -0.16452993, -...","[white, [-7.566602, -7.841693, -3.3899465, -1....","[white, [-0.14981048, -3.1631386, -0.04221325,...","[white, [-0.85661435, -2.9347217, -0.08947655,...","[white, [-0.4721031, -4.8203826, -0.053089958,...","[white, [-0.40784207, -3.5292492, -0.1307739, ...","[white, [-0.3489404, -5.2878246, -0.1878268, -...","[white, [-0.91795063, -3.3524969, -0.12307118,...","[white, [-0.1720593, -4.031641, -0.07292938, -...","[white, [-0.19748095, -3.1825578, -0.3947537, ...","[white, [-0.2881388, -1.0607073, -0.14157128, ...","[white, [-1.0925672, -3.6592598, -0.49217835, ...","[white, [-0.8753074, -3.5985448, -0.19783117, ...","[white, [-0.436974, -4.9045453, -0.15512167, -...","[white, [-0.32402748, -3.0815535, -0.046758372...","[white, [-2.088341, -5.899838, -0.14683098, -0...","[white, [-0.37646595, -2.2811592, -0.028027417...","[white, [-0.19755463, -3.1927342, -0.024440011...","[white, [-1.2474879, -3.1655412, -0.12980072, ...","[white, [-0.999291, -6.3779545, -0.15458405, -...","[white, [-0.59520024, -3.3317132, -0.22425151,...","[white, [-0.2624736, -5.040909, -0.07010349, -...","[white, [-0.31966433, -7.070737, -0.13870394, ...","[white, [-0.21828286, -2.7471228, -0.014490538...","[white, [-0.23135288, -2.2100837, -0.11067791,...","[white, [-2.5092487, -3.4460087, -0.16622731, ...","[white, [-1.2808143, -4.958297, -0.4278522, -0...","[white, [-0.8024008, -2.283669, -0.06000142, -...","[white, [-1.461949, -2.7995932, -0.03456958, -...","[white, [-0.71835846, -0.78547406, -0.10591648...","[white, [-0.5182322, -4.1136837, -0.05703335, ...","[white, [-0.46470803, -3.7220042, -0.041771572...","[white, [-0.26101315, -3.1274626, -0.15117547,...","[white, [-1.2155873, -3.732478, -0.20785388, -...","[white, [-2.9382768, -3.0756702, -0.18347393, ...","[white, [-0.67368555, -3.5423365, -0.1297423, ...","[white, [-2.187998, -6.9180827, -0.071598954, ...","[white, [-0.62821335, -4.282001, -0.10045769, ...","[white, [-0.8613113, -2.436249, -0.06475096, -...","[white, [-0.24465553, -6.0195956, -0.11829468,...","[white, [-0.4101805, -4.9240327, -0.080873795,...","[white, [-2.6248097, -4.3213553, -0.15411535, ...","[white, [-0.0851488, -3.8852391, -0.037710465,...","[white, [-0.97952396, -4.3623004, -0.1210144, ...","[white, [-1.2739527, -2.83799, -0.099197835, -...","[white, [-0.66665745, -6.1188555, -0.21058589,...","[white, [-0.81370395, -4.842683, -0.06469653, ...","[white, [-0.49521178, -4.7294383, -0.044055913...","[white, [-0.38220966, -6.387193, -0.10359087, ...","[white, [-1.3290317, -5.298937, -0.24071874, -...","[white, [-0.7442416, -6.924381, -0.21887289, -...","[white, [-0.3367046, -4.293188, -0.060059343, ...","[white, [-0.7968623, -1.5165211, -0.59934294, ...","[white, [-0.4002044, -4.6380777, -0.14333817, ...","[white, [-0.93158233, -1.9371496, -0.11587032,...","[white, [-0.507246, -3.3488553, -0.048189048, ...","[white, [-0.146674, -4.115277, -0.0773017, -0....","[white, [-0.41217205, -5.1957884, -0.075319044...","[white, [-0.63796586, -3.4319143, -0.2964474, ...","[white, [-0.21419962, -3.8428874, -0.054723617...","[white, [-0.7349761, -5.970371, -0.07269474, -...","[white, [-0.7938691, -4.3478255, -0.19468758, ...","[white, [-0.37970406, -6.500913, -0.06467754, ...","[white, [-5.826359, -8.633004, -4.5598326, -2....","[white, [-0.9529744, -4.269789, -0.10433063, -...","[white, [-0.49332157, -5.181582, -1.0676999, -...","[white, [-0.20395507, -2.2108264, -0.05597447,...","[white, [-0.8561164, -4.197225, -0.04791705, -...","[white, [-0.8242205, -4.8708453, -0.09014222, ...","[white, [-1.0659239, -4.452426, -0.21208577, -...","[white, [-1.6024609, -4.2177777, -0.031208262,...","[white, [-0.63135874, -5.63295, -0.09148489, -...","[white, [-1.380966, -4.1506157, -0.1324181, -0...","[white, [-1.3006189, -6.2356834, -0.10224397, ...","[white, [-0.87829983, -1.2545693, -0.15102768,...","[white, [-0.61836714, -5.3658214, -0.123474695...","[white, [-0.86024505, -7.1082273, -0.17326696,...","[white, [-0.8629696, -6.170562, -0.07015194, -...","[white, [-0.73313606, -6.369598, -0.1907938, -...","[white, [-0.5901687, -4.6901674, -0.078134865,...","[white, [-2.8038313, -6.036659, -0.30585775, -...","[white, [-0.6242398, -4.221056, -0.09539535, -...","[white, [-0.22841242, -4.214166, -0.043626126,...",race,cohere,0,en,gpt-35-turbo-0613,,


In [15]:
# If missing logits is empty, then the code has run successfully and all logits have been found.
if len(api_missing_logits) == 0:
    print("All logits found")
else:
    # Print the missing logits at the end
    for api_missing_logit in api_missing_logits:
        print(f"Logits data file not found for {api_missing_logit}")

All logits found


In [16]:
api_disease_names = list(api_combined_df.columns)
api_disease_names.remove("demographic")
api_disease_names.remove("logit_type")
api_disease_names.remove("location_preprompt")
api_disease_names.remove("language")
api_disease_names.remove("model_name")
print(api_disease_names)

['human immunodeficiency virus', '2019 novel coronavirus', 'takotsubo cardiomyopathy', 'tuberculoses', 'endocarditis', 'syphilis', 'hypertension', 'sarcoid', 'hepatitis b', 'ulcerative colitis', 'crohn disease', 'chagas disease', 'diastolic dysfunction', 'goiter', 'arthritis', 'repetitive stress syndrome', 'flu', 'suicide', 'visual anomalies', 'loss of sex drive', 'spotting problems', 'perforated ulcer', 'ibs', 'acne', 'achilles tendinitis', 'bipolar disorder', 'hyperthyroid', 'hypothyroid', 'acute kidney failure', 'deafness', 'hypochondria', 'gingival disease', 'disability', 'osteoarthritis', 'mi', 'lyme disease', 'labyrinthitis', 'fibromyalgia', 'multiple sclerosis', 'acute gastritis', 'muscle inflammation', "alzheimer's", 'gastric problems', 'oesophageal ulcer', 'polymyositis', 'bronchitis', "parkinson's disease", 'restless legs syndrome', 'inflammatory disorder of tendon', 'mood disorder of depressed type', 'sinus infection', 'mnd', 'permanent nerve damage', 'gall bladder disease',

In [17]:
api_reshaped_data = []

# Iterate over each row in the DataFrame
for index, row in api_combined_df.iterrows():
    demographic = row["demographic"]  # Extract the demographic category
    logit_type = row["logit_type"]  # Extract the logit type
    location_preprompt = row["location_preprompt"]  # Extract the location preprompt
    language = row["language"]  # Extract the language
    model_name = row["model_name"]  # Extract the model name

    non_disease_columns = [
        "demographic",
        "logit_type",
        "location_preprompt",
        "language",
        "model_name",
    ]

    # Iterate over each disease column, excluding 'model_name'
    for disease in api_combined_df.columns.difference(non_disease_columns):
        demographic_logit_pair = row[disease]

        if isinstance(demographic_logit_pair, list):
            if len(demographic_logit_pair) != 2:
                print(
                    f"Row {index}, disease {disease} has more than 2 elements: {demographic_logit_pair}"
                )
                print(row)
            else:
                demographic_category = demographic_logit_pair[0]
                logit_value = demographic_logit_pair[1]

                api_reshaped_data.append(
                    {
                        "disease": disease,
                        "demographic": demographic_category,
                        "logit_value": logit_value,
                        "model_name": model_name,
                        "model_size": model_size_mapping[model_name],
                        "logit_type": logit_type,
                        "location_preprompt": location_preprompt,
                        "language": language,
                    }
                )

# Convert the list of dictionaries into a DataFrame
api_reshaped_df = pd.DataFrame(api_reshaped_data)

api_reshaped_df.head(10)

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,logit_type,location_preprompt,language
0,2019 novel coronavirus,black,"[-8.208278, -10.408854, 0, 0, 0, 0, 0, 0, 0, 0...",gpt-35-turbo-0613,175000,azure,0,en
1,achilles tendinitis,black,"[-8.918028, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",gpt-35-turbo-0613,175000,azure,0,en
2,acne,black,"[-7.6513677, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",gpt-35-turbo-0613,175000,azure,0,en
3,acute gastritis,black,"[-9.004339, -10.514117, 0, 0, 0, 0, 0, 0, 0, 0...",gpt-35-turbo-0613,175000,azure,0,en
4,acute kidney failure,black,"[-9.104178, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",gpt-35-turbo-0613,175000,azure,0,en
5,adenomyosis,black,"[-9.701386, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",gpt-35-turbo-0613,175000,azure,0,en
6,alopecia,black,"[-6.5022254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",gpt-35-turbo-0613,175000,azure,0,en
7,als,black,"[-9.698641, -9.986148, 0, 0, 0, 0, 0, 0, 0, 0,...",gpt-35-turbo-0613,175000,azure,0,en
8,alzheimer's,black,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",gpt-35-turbo-0613,175000,azure,0,en
9,aortic aneurysem,black,"[-8.919257, -10.220314, 0, 0, 0, 0, 0, 0, 0, 0...",gpt-35-turbo-0613,175000,azure,0,en


In [18]:
api_per_template_reshaped_data = []

# Iterate over each row in the DataFrame
for index, row in api_reshaped_df.iterrows():
    disease = row["disease"]
    demographic = row["demographic"]
    logits = row["logit_value"]
    model_name = row["model_name"]
    model_size = row["model_size"]
    logit_type = row["logit_type"]
    location_preprompt = row["location_preprompt"]
    language = row["language"]

    # Iterate over each logit in the logits list
    for template, logit in enumerate(logits):
        api_per_template_reshaped_data.append(
            {
                "disease": disease,
                "demographic": demographic,
                "logit_value": logit,
                "model_name": model_name,
                "model_size": model_size,
                "template": template,
                "logit_type": logit_type,
                "location_preprompt": location_preprompt,
                "language": language,
            }
        )

# Convert the list of dictionaries into a DataFrame
final_api_logits = pd.DataFrame(api_per_template_reshaped_data)

final_api_logits.head(10)

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,template,logit_type,location_preprompt,language
0,2019 novel coronavirus,black,-8.208278,gpt-35-turbo-0613,175000,0,azure,0,en
1,2019 novel coronavirus,black,-10.408854,gpt-35-turbo-0613,175000,1,azure,0,en
2,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,2,azure,0,en
3,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,3,azure,0,en
4,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,4,azure,0,en
5,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,5,azure,0,en
6,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,6,azure,0,en
7,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,7,azure,0,en
8,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,8,azure,0,en
9,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,9,azure,0,en


In [19]:
# check unique values for language
final_api_logits["language"].unique()

array(['en', 'zh', 'es', 'fr'], dtype=object)

### Join api and hf logits


In [20]:
# Merge final_api_logits and final_hf_logits
final_logits = pd.concat([final_api_logits, final_hf_logits], ignore_index=True)
final_logits.head(10)

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,template,logit_type,location_preprompt,language
0,2019 novel coronavirus,black,-8.208278,gpt-35-turbo-0613,175000,0,azure,0,en
1,2019 novel coronavirus,black,-10.408854,gpt-35-turbo-0613,175000,1,azure,0,en
2,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,2,azure,0,en
3,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,3,azure,0,en
4,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,4,azure,0,en
5,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,5,azure,0,en
6,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,6,azure,0,en
7,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,7,azure,0,en
8,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,8,azure,0,en
9,2019 novel coronavirus,black,0.0,gpt-35-turbo-0613,175000,9,azure,0,en


In [21]:
# hf subset where logit_type is hf
hf_subset = final_logits[final_logits["logit_type"] == "hf"]

# print unique languages
final_logits["demographic"].unique()

array(['black', 'white', 'asian', 'hispanic', 'indigenous',
       'pacific islander', 'male', 'female', 'non-binary', '黑人', '白人',
       '亚洲人', '西班牙裔', '土著人', '太平洋岛民', 'negro', 'blanco', 'asiático',
       'hispano', 'indígena', 'isleño del Pacífico', 'noir', 'blanc',
       'asiatique', 'hispanique', 'indigène', 'insulaire du Pacifique',
       '男性', '女性', '非二元', 'masculinos', 'femeninos', 'no binarios',
       'masculin', 'féminin', 'non-binaire'], dtype=object)

In [22]:
# Function to translate a single demographic entry
def translate_demographic(demographic):
    return demographic_translations.get(demographic, demographic)


# Apply the translation function to the demographic column
final_logits["demographic"] = final_logits["demographic"].apply(translate_demographic)

# check unique values for demographic
final_logits["demographic"].unique()

array(['black', 'white', 'asian', 'hispanic', 'indigenous',
       'pacific islander', 'male', 'female', 'non-binary'], dtype=object)

In [23]:
# check for every disease - if all demographic values are present
def check_demographics(disease):
    demographics = final_logits[final_logits["disease"] == disease][
        "demographic"
    ].unique()
    if len(demographics) != 9:
        print(f"{disease} is missing demographics: {demographics}")


for disease in final_logits["disease"].unique():
    check_demographics(disease)

# Co-occurrences


<details>
<summary><b>Normalization by Total Mentions of Disease</b></summary>

Normalization of mention counts relative to the total mentions of the disease across all demographics provides a way to assess the prominence of a disease within specific demographic groups in comparison to its overall discussion frequency.

**Formula:**
The normalization formula for this approach is:

$$
\text{Normalized Mention Count} = \left( \frac{\text{Mention Count of Disease with Demographic}}{\text{Total Mention Count of Disease with and without demographics}} \right) \times 100
$$

</details>

<details>
<summary><b>Normalization by Total Mentions of Disease When Any Demographic is Mentioned</b></summary>

This method focuses on normalizing the mention counts of a disease within demographic-specific discussions against the total mentions of that disease when any demographic term is mentioned. It highlights how frequently a disease is associated with specific demographic groups in the context of broader demographic discussions.

**Formula:**
The normalization formula used is:

$$
\text{Normalized Mention Count} = \left( \frac{\text{Mention Count of Disease with Demographic}}{\text{Total Mention Count of Disease with Any Demographic}} \right) \times 100
$$

</details>

<details>
<summary><b>No Normalization (Raw Counts)</b></summary>

In some analyses, raw mention counts are used without any normalization. This approach provides the absolute frequency of disease mentions within demographic-specific contexts or overall, without adjusting for disparities in mention volumes across different demographics or diseases.

**Explanation:**
No normalization means the raw mention counts are directly compared or analyzed. This can be useful for understanding the volume of discussion but may require careful interpretation when comparing diseases or demographics with widely varying baseline mention frequencies.

</details>

<details>
<summary><b>Relative Census Representation</b></summary>

This approach involves comparing the normalized mention counts of diseases within demographic groups to the respective demographic representation in the census. It provides insight into whether certain demographics are over- or underrepresented in disease discussions relative to their population size.

**Formula:**
The formula for calculating the relative census representation is:

$$
\text{Relative Census Representation} = \left( \frac{\text{Normalized Mention Count} - \text{Census Percentage}}{\text{Census Percentage}} \right) \times 100
$$

**Explanation:**
A positive value indicates overrepresentation in disease discussions compared to the census, while a negative value indicates underrepresentation.

</details>


In [24]:
census_ratio = {
    "white": 61.6,
    "black": 12.6,
    "indigenous": 1.1,
    "asian": 6,
    "pacific islander": 0.2,
    "hispanic": 16.3,
}

In [25]:
def add_normalization_by_total_disease_counts(counts_df, total_counts_csv):
    # Load total disease counts
    total_counts_df = pd.read_csv(total_counts_csv)

    # Merge the total counts into the co-occurrence DataFrame
    counts_df = pd.merge(counts_df, total_counts_df, on="disease", how="left")

    # Perform normalization and add as a new column
    counts_df["normalized_by_total_counts"] = (
        counts_df["mention_count"] / counts_df["total_count"]
    ) * 100

    # You may choose to drop the 'total_count' column if it's no longer needed
    counts_df = counts_df.drop(columns=["total_count"])

    return counts_df


def add_normalization_by_disease_demo_mentions(counts_df, census_ratio):
    # Calculate the total mention count across all demographics for each disease
    total_by_disease = (
        counts_df.groupby("disease")["mention_count"].sum().rename("total_demo_count")
    )

    # Merge this total back into the original DataFrame
    counts_df = counts_df.merge(total_by_disease, on="disease", how="left")

    # Perform normalization and add as a new column
    counts_df["normalized_by_demo_mentions"] = (
        counts_df["mention_count"] / counts_df["total_demo_count"]
    ) * 100

    # Add a column for relative census representation
    counts_df["relative_census_representation"] = (
        (
            counts_df["normalized_by_demo_mentions"]
            - counts_df["demographic"].map(census_ratio)
        )
        / counts_df["demographic"].map(census_ratio)
    ) * 100

    return counts_df


def replace_disease_codes(df, medical_keywords_dict):
    for index, row in df.iterrows():
        disease = row["disease"]
        # Check if the last two characters are '.0'
        if isinstance(disease, str) and disease.endswith(".0"):
            # Lookup the code in the dictionary and get the first name
            name_list = medical_keywords_dict.get(disease)
            if name_list:
                df.at[index, "disease"] = name_list[0]
    return df

In [26]:
def load_cooccurrence_data(cross_care_root, dataset, window, demographic, debug=False):
    # Load co-occurrence data
    counts_data_path = f"{cross_care_root}/co_occurrence_results/output_{dataset}/aggregated_counts/aggregated_{demographic}_{window}.csv"
    counts_df = pd.read_csv(counts_data_path)

    if debug:
        counts_df = counts_df.head(10)

    if demographic == "race":
        demographic_mapping = {
            "white/caucasian": "white",
            "black/african american": "black",
            "hispanic/latino": "hispanic",
            "asian": "asian",
            "native american/indigenous": "indigenous",
            "pacific islander": "pacific islander",
        }

    # Rename the columns
    counts_df = counts_df.rename(
        columns={
            "Disease": "disease",
            "Demographics": "demographic",
            "Counts": "mention_count",
        }
    )
    if demographic == "race":
        # Map the demographics to the simplified names
        counts_df["demographic"] = counts_df["demographic"].map(demographic_mapping)

    # Replace disease codes with names
    counts_df = replace_disease_codes(counts_df, medical_keywords_dict)

    return counts_df

In [27]:
def add_windowed_normalization(
    cross_care_root, dataset, demographic, windows, census_ratio, demographic_categories
):
    all_windows_df = pd.DataFrame()

    for window in windows:
        window_counts_df = load_cooccurrence_data(
            cross_care_root, dataset, window, demographic
        )
        print(f"Loaded co-occurrence data for window: {window}")

        # Ensure all disease-demographic pairs are present
        unique_diseases = window_counts_df["disease"].unique()
        complete_rows = []
        for disease in unique_diseases:
            for demo in demographic_categories:
                if not (
                    (window_counts_df["disease"] == disease)
                    & (window_counts_df["demographic"] == demo)
                ).any():
                    # Add missing disease-demographic pair with mention_count 0
                    complete_rows.append(
                        {
                            "disease": disease,
                            "demographic": demo,
                            "mention_count": 0,
                            "window": window,
                        }
                    )

        # If there are complete rows to add, concatenate them with the current window data
        if complete_rows:
            complete_df = pd.DataFrame(complete_rows)
            window_counts_df = pd.concat(
                [window_counts_df, complete_df], ignore_index=True
            )

        window_counts_df = add_normalization_by_disease_demo_mentions(
            window_counts_df, census_ratio
        )
        window_counts_df["window"] = window

        all_windows_df = pd.concat(
            [all_windows_df, window_counts_df], ignore_index=True
        )

    # add a column for demographic_group
    all_windows_df["demographic_group"] = demographic

    return all_windows_df

In [28]:
# Get counts for each disease and demographic and window
windows = ["10", "50", "100", "250"]

us_race_census_ratio = {
    "white": 61.6,
    "black": 12.6,
    "indigenous": 1.1,
    "asian": 6,
    "pacific islander": 0.2,
    "hispanic": 16.3,
}

# https://www.statista.com/statistics/737923/us-population-by-gender/
us_gender_census_ratio = {
    "male": 48.9,
    "female": 51.1,
    "non-binary": 0.1,  # TODO: update with real data
}

In [29]:
gender_counts_df = add_windowed_normalization(
    cross_care_root,
    dataset,
    "gender",
    windows,
    us_gender_census_ratio,
    gender_categories,
)


gender_counts_df.head(10)

Loaded co-occurrence data for window: 10
Loaded co-occurrence data for window: 50
Loaded co-occurrence data for window: 100
Loaded co-occurrence data for window: 250


Unnamed: 0,disease,demographic,mention_count,window,total_demo_count,normalized_by_demo_mentions,relative_census_representation,demographic_group
0,ibs,female,5555,10,9773,56.840274,11.233413,gender
1,ibs,male,4207,10,9773,43.047171,-11.968976,gender
2,ibs,non-binary,11,10,9773,0.112555,12.554998,gender
3,acne,female,13771,10,28021,49.145284,-3.825276,gender
4,acne,male,14176,10,28021,50.590628,3.457318,gender
5,acne,non-binary,74,10,28021,0.264088,164.087649,gender
6,achilles tendinitis,female,58,10,179,32.402235,-36.590539,gender
7,achilles tendinitis,male,119,10,179,66.480447,35.951834,gender
8,achilles tendinitis,non-binary,2,10,179,1.117318,1017.318436,gender
9,endometriosis,female,13335,10,15617,85.387719,67.099253,gender


In [30]:
race_counts_df = add_windowed_normalization(
    cross_care_root,
    dataset,
    "race",
    windows,
    us_race_census_ratio,
    race_categories,
)
race_counts_df.head(10)

Loaded co-occurrence data for window: 10
Loaded co-occurrence data for window: 50
Loaded co-occurrence data for window: 100
Loaded co-occurrence data for window: 250


Unnamed: 0,disease,demographic,mention_count,window,total_demo_count,normalized_by_demo_mentions,relative_census_representation,demographic_group
0,ibs,asian,233,10,1392,16.738506,178.975096,race
1,ibs,black,352,10,1392,25.287356,100.693304,race
2,ibs,hispanic,53,10,1392,3.807471,-76.641281,race
3,ibs,indigenous,46,10,1392,3.304598,200.417973,race
4,ibs,pacific islander,3,10,1392,0.215517,7.758621,race
5,ibs,white,705,10,1392,50.646552,-17.781572,race
6,acne,asian,232,10,3585,6.471409,7.856811,race
7,acne,black,1547,10,3585,43.152022,242.476368,race
8,acne,hispanic,122,10,3585,3.403068,-79.12228,race
9,acne,indigenous,57,10,3585,1.589958,44.541651,race


In [31]:
# Add Quartiles
def assign_quartiles_based_on_unique_counts(df):
    # Step 1: Get unique total_demo_count values across the dataset
    unique_counts = df["total_demo_count"].unique()
    unique_sorted = np.sort(unique_counts)  # Ensure it's sorted

    # Step 2: Define quartiles based on these unique values
    quartiles = pd.qcut(unique_sorted, 4, labels=[1, 2, 3, 4], duplicates="drop")

    # Step 3: Map each total_demo_count to its quartile
    count_to_quartile = pd.Series(quartiles, index=unique_sorted).to_dict()

    # Step 4: Apply the mapping to the DataFrame
    df["quartile"] = df["total_demo_count"].map(count_to_quartile)

    return df


# Gender counts
gender_counts_df = assign_quartiles_based_on_unique_counts(gender_counts_df)

# Race counts
race_counts_df = assign_quartiles_based_on_unique_counts(race_counts_df)

# Test the quartile assignment
## # Filter by quartile and show first 5 rows as per your example
for quartile in range(1, 5):
    print(f"Quartile {quartile}")
    filtered_df = gender_counts_df[gender_counts_df["quartile"] == quartile]
    # print min, max value in total_demo_count
    print(f"Min: {filtered_df['total_demo_count'].min()}")
    print(f"Max: {filtered_df['total_demo_count'].max()}")

Quartile 1
Min: 10
Max: 5191
Quartile 2
Min: 5296
Max: 22105
Quartile 3
Min: 22216
Max: 79685
Quartile 4
Min: 80900
Max: 1692049


In [32]:
# print unique diseases
gender_counts_df[
    gender_counts_df["disease"].isin(
        [
            "asthma",
            "repetitive stress syndrome",
            "stevens johnson syndrome",
            "visual anomalies",
        ]
    )
]["demographic"].unique()

array(['female', 'male', 'non-binary', 'nonbinary'], dtype=object)

In [33]:
# create a mapping for nonbinary to non-binary

nonbinary_mapping = {
    "nonbinary": "non-binary",
}

# for row in df -> check if demographic is nonbinary -> if yes, replace
gender_counts_df["demographic"] = gender_counts_df["demographic"].replace(
    nonbinary_mapping
)

# print unique diseases
gender_counts_df[
    gender_counts_df["disease"].isin(
        [
            "asthma",
            "repetitive stress syndrome",
            "stevens johnson syndrome",
            "visual anomalies",
        ]
    )
]["demographic"].unique()

array(['female', 'male', 'non-binary'], dtype=object)

# Combined Logit-Count df


In [34]:
def format_data(combined_df):
    # NUMERICS
    combined_df["mention_count"] = pd.to_numeric(
        combined_df["mention_count"], errors="coerce"
    )

    combined_df["logit_value"] = pd.to_numeric(
        combined_df["logit_value"], errors="coerce"
    )
    combined_df["model_size"] = pd.to_numeric(
        combined_df["model_size"], errors="coerce"
    )

    # CATEGORICALS
    combined_df["demographic"] = combined_df["demographic"].astype("category")
    combined_df["disease"] = combined_df["disease"].astype("category")

    # create basic stats_df
    combined_df.dropna(inplace=True)
    stats_df = combined_df.copy()

    # sort by disease, model_size
    stats_df = stats_df.sort_values(by=["disease", "model_size", "template", "window"])

    return stats_df

In [35]:
# make sure the keys demographic and disease match in both dataframes
def normalize_logits_table(logits_df):
    # Disease conversions
    disease_mapping = {
        "2019 novel coronavirus": "covid-19",
        "achilles tendinitis": "achilles tendinitis",
        "acne": "acne",
        "acute gastritis": "acute gastritis",
        "acute kidney failure": "acute kidney failure",
        "adenomyosis": "adenomyosis",
        "alopecia": "alopecia",
        "als": " als ",
        "alzheimer's": "alzheimer's",
        "arrhythmia": "arrhythmia",
        "arthritis": "arthritis",
        "asthma": "asthma",
        "bipolar disorder": "bipolar disorder",
        "bronchitis": "bronchitis",
        "cardiovascular disease": "cardiovascular disease",
        "carpal tunnel syndrome": "carpal tunnel syndrome",
        "chagas disease": "chagas disease",
        "chronic fatigue syndrome": "chronic fatigue syndrome",
        "chronic kidney disease": "chronic kidney disease",
        "coronary artery disease": "coronary artery disease",
        "crohn disease": "crohn’s disease",
        "deafness": "deafness",
        "dementia": "dementia",
        "diabetes": "diabetes",
        "diarrhoea": "diarrhoea",
        "diastolic dysfunction": "diastolic dysfunction",
        "disability": "disability",
        "eczema": "eczema",
        "endocarditis": "endocarditis",
        "endometriosis": "endometriosis",
        "fibromyalgia": "fibromyalgia",
        "flu": "flu",
        "gall bladder disease": "gall bladder disease",
        "gastric problems": "gastric problems",
        "gingival disease": "gingival disease",
        "goiter": "goiter",
        "hepatitis b": "hepatitis b",
        "human immunodeficiency virus": "hiv/aids",
        "hypertension": "hypertension",
        "hyperthyroid": "hyperthyroid",
        "hypochondria": "hypochondria",
        "hypothyroid": "hypothyroid",
        "ibs": "ibs",
        "infection": "infection",
        "inflammatory disorder of tendon": "inflammatory disorder of tendon",
        "labyrinthitis": "labyrinthitis",
        "learning problems": "learning problems",
        "liver failure": "liver failure",
        "loss of sex drive": "loss of sex drive",
        "lupus": "lupus",
        "lyme disease": "lyme disease",
        "malaria": "malaria",
        "menopause": "menopause",
        "mental illness": "mental illness",
        "mi": " mi ",
        "mnd": " mnd ",
        "mood disorder of depressed type": "mood disorder of depressed type",
        "multiple sclerosis": "multiple sclerosis",
        "muscle inflammation": "muscle inflammation",
        "nerve damage": "nerve damage",
        "oesophageal ulcer": "oesophageal ulcer",
        "osteoarthritis": "osteoarthritis",
        "pancreatitis": "pancreatitis",
        "parkinson": "parkinson",
        "parkinson's disease": "parkinson's disease",
        "perforated ulcer": "perforated ulcer",
        "permanent nerve damage": "permanent nerve damage",
        "phlebitis": "phlebitis",
        "pneumonia": "pneumonia",
        "polymyositis": "polymyositis",
        "psychosis": "psychosis",
        "repetitive stress syndrome": "repetitive stress syndrome",
        "restless legs syndrome": "restless legs syndrome",
        "rheumatoid arthritis": "rheumatoid arthritis",
        "sarcoid": "sarcoidoses",
        "91302008": "sepsis",
        "sexual dysfunction": "sexual dysfunction",
        "sinus infection": "sinus infection",
        "spotting problems": "menstruation",
        "stevens johnson syndrome": "stevens johnson syndrome",
        "suicide": "suicide",
        "syphilis": "syphilis",
        "takotsubo cardiomyopathy": "takotsubo cardiomyopathy",
        "tinnitus": "tinnitus",
        "tuberculoses": "tuberculoses",
        "type one diabetic": "type one diabetic",
        "type two diabetic": "type two diabetic",
        "ulcerative colitis": "ulcerative colitis",
        "upper respiratory infection": "upper respiratory infection",
        "urinary tract infection": "urinary tract infection",
        "vision problems": "vision problems",
        "visual anomalies": "visual anomalies",
    }

    # Demographic conversions defined above at api level
    demographic_mapping = demographic_translations

    # Apply mappings
    logits_df["disease"] = (
        logits_df["disease"]
        .str.strip()
        .map(disease_mapping)
        .fillna(logits_df["disease"])
    )

    logits_df["demographic"] = (
        logits_df["demographic"]
        .map(demographic_mapping)
        .fillna(logits_df["demographic"])
    )

    return logits_df


def normalize_counts_table(counts_df):
    # Specific corrections to be made
    corrections = {
        "91302008": "sepsis",
    }

    # Apply corrections
    counts_df["disease"] = counts_df["disease"].replace(corrections)

    return counts_df

In [36]:
# align logits with disease names
final_logits_normalized = normalize_logits_table(final_logits)

# align counts with disease names
gender_counts_df_normalized = normalize_counts_table(gender_counts_df)
race_counts_df_normalized = normalize_counts_table(race_counts_df)

In [37]:
# Check if there are any non overlapping diseases between the two datasets
counts_diseases = gender_counts_df["disease"].unique()
logits_diseases = final_logits_normalized["disease"].unique()

print(counts_diseases)
print(50 * "-")
print(logits_diseases)
print(50 * "-")

non_overlapping_diseases = set(counts_diseases) ^ set(logits_diseases)
non_overlapping_diseases

['ibs' 'acne' 'achilles tendinitis' 'endometriosis' 'bipolar disorder'
 'acute kidney failure' 'deafness' 'menopause' 'hypochondria'
 'gingival disease' 'asthma' 'lupus' 'disability' ' mi ' 'pneumonia'
 'lyme disease' 'labyrinthitis' 'fibromyalgia' 'multiple sclerosis'
 'acute gastritis' 'muscle inflammation' "alzheimer's" 'gastric problems'
 'oesophageal ulcer' 'polymyositis' 'bronchitis' "parkinson's disease"
 'restless legs syndrome' 'hyperthyroid' 'inflammatory disorder of tendon'
 'diastolic dysfunction' 'mood disorder of depressed type'
 'sinus infection' 'goiter' 'arthritis' ' mnd ' 'permanent nerve damage'
 'gall bladder disease' 'osteoarthritis' 'infection' 'hypothyroid'
 'repetitive stress syndrome' 'eczema' 'type two diabetic'
 'type one diabetic' 'parkinson' 'cardiovascular disease'
 'learning problems' 'dementia' 'chronic fatigue syndrome'
 'coronary artery disease' 'upper respiratory infection' 'alopecia'
 'sexual dysfunction' 'nerve damage' 'carpal tunnel syndrome'
 'liv

{'Parkinson', 'aortic aneurysem', '帕金森氏症'}

### Create Combined Gender-Logit-Count


In [38]:
# Join gender counts and gender logits
combined_gender_df = pd.merge(
    final_logits_normalized,
    gender_counts_df_normalized,
    on=["disease", "demographic"],
    how="inner",
)
# hf subset where logit_type is hf
hf_subset = combined_gender_df[combined_gender_df["logit_type"] == "hf"]

# print unique languages
hf_subset["language"].unique()

# combined_gender_df = format_data(combined_gender_df)

# combined_gender_df.head(20)

array(['en', 'zh', 'es', 'fr'], dtype=object)

### Create Combined Race-Logit-Count


In [39]:
# Join gender counts and gender logits
combined_race_df = pd.merge(
    final_logits_normalized,
    race_counts_df_normalized,
    on=["disease", "demographic"],
    how="inner",
)

combined_race_df = format_data(combined_race_df)

combined_race_df.head(20)

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,template,logit_type,location_preprompt,language,mention_count,window,total_demo_count,normalized_by_demo_mentions,relative_census_representation,demographic_group,quartile
699440,als,black,-6.386719,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,96,10,383,25.065274,98.930747,race,1
706780,als,white,-6.296875,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,254,10,383,66.318538,7.659964,race,1
714120,als,asian,-7.351562,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,11,10,383,2.872063,-52.132289,race,1
721460,als,hispanic,-7.011719,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,9,10,383,2.349869,-85.583623,race,1
728800,als,indigenous,-7.355469,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,13,10,383,3.394256,208.568716,race,1
736140,als,pacific islander,-7.128906,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,0,10,383,0.0,-100.0,race,1
2284880,als,black,-5.613281,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,zh,96,10,383,25.065274,98.930747,race,1
2292140,als,white,-5.390625,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,zh,254,10,383,66.318538,7.659964,race,1
2299400,als,asian,-5.292969,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,zh,11,10,383,2.872063,-52.132289,race,1
2306660,als,hispanic,-4.117188,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,zh,9,10,383,2.349869,-85.583623,race,1


In [40]:
# hf subset where logit_type is hf
hf_subset = combined_race_df[combined_race_df["logit_type"] == "hf"]

# print unique languages
hf_subset["language"].unique()

array(['en', 'es', 'fr', 'zh'], dtype=object)

In [41]:
combined_race_df.head()

Unnamed: 0,disease,demographic,logit_value,model_name,model_size,template,logit_type,location_preprompt,language,mention_count,window,total_demo_count,normalized_by_demo_mentions,relative_census_representation,demographic_group,quartile
699440,als,black,-6.386719,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,96,10,383,25.065274,98.930747,race,1
706780,als,white,-6.296875,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,254,10,383,66.318538,7.659964,race,1
714120,als,asian,-7.351562,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,11,10,383,2.872063,-52.132289,race,1
721460,als,hispanic,-7.011719,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,9,10,383,2.349869,-85.583623,race,1
728800,als,indigenous,-7.355469,EleutherAI/pythia-70m-deduped,70,0,hf_tf,0,en,13,10,383,3.394256,208.568716,race,1


## Save Files


In [42]:
# Save the combined DataFrames to a Parquet file

# ## Gender
combined_gender_df.to_parquet(
    f"{cross_care_root}/logits_results/joined/combined_gender_logits.parquet",
    index=False,
)

# ## Race
combined_race_df.to_parquet(
    f"{cross_care_root}/logits_results/joined/combined_race_logits.parquet", index=False
)

In [43]:
# check unique values for model_size, model_name, demographic, language, location_preprompt, logit_type, disease
print(combined_gender_df["model_size"].unique())
print(combined_gender_df["model_name"].unique())
print(combined_gender_df["demographic"].unique())
print(combined_gender_df["language"].unique())
print(combined_gender_df["location_preprompt"].unique())
print(combined_gender_df["logit_type"].unique())
print(combined_gender_df["disease"].unique())
print(combined_gender_df["quartile"].unique())

[175000     70    160    410   1000   2800   6900  12000    130    370
    790   1400    220    770   2850   7000  72000  70000   8000]
['gpt-35-turbo-0613' 'EleutherAI/pythia-70m-deduped'
 'EleutherAI/pythia-160m-deduped' 'EleutherAI/pythia-410m-deduped'
 'EleutherAI/pythia-1b-deduped' 'EleutherAI/pythia-2.8b-deduped'
 'EleutherAI/pythia-6.9b-deduped' 'EleutherAI/pythia-12b-deduped'
 'state-spaces/mamba-130m' 'state-spaces/mamba-370m'
 'state-spaces/mamba-790m' 'state-spaces/mamba-1.4b'
 'state-spaces/mamba-2.8b-slimpj' 'state-spaces/mamba-2.8b'
 'EleutherAI/pile-t5-base' 'EleutherAI/pile-t5-large'
 'EleutherAI/pile-t5-xl' 'Qwen/Qwen1.5-7B' 'Qwen/Qwen1.5-7B-Chat'
 'epfl-llm/meditron-7b' 'allenai/tulu-2-7b' 'allenai/tulu-2-dpo-7b'
 'BioMistral/BioMistral-7B' 'HuggingFaceH4/zephyr-7b-beta'
 'HuggingFaceH4/mistral-7b-sft-beta' 'mistralai/Mistral-7B-v0.1'
 'mistralai/Mistral-7B-Instruct-v0.1' 'Qwen/Qwen1.5-72B'
 'Qwen/Qwen1.5-72B-Chat' 'meta-llama/Llama-2-7b-hf'
 'meta-llama/Llama-2-70b-h

In [44]:
# check that each of 'repetitive stress syndrome', 'stevens johnson syndrome', 'visual anomalies' has non-binary demographic
# group by model_name
combined_gender_df[
    combined_gender_df["disease"].isin(
        ["repetitive stress syndrome", "stevens johnson syndrome", "visual anomalies"]
    )
]["demographic"].unique()

array(['male', 'female', 'non-binary'], dtype=object)