In [1]:
import pickle
import pandas as pd

In [2]:
models=["4v","4o","Gemini","llama","LLaVA","Midjourney"]

In [8]:
def merge_stereotypes_by_ethnic(stereo_dict):
    merged_dict = {}

    for group in ['person', 'man', 'woman']:
        for ethnic, words in stereo_dict[group].items():
            if ethnic not in merged_dict:
                merged_dict[ethnic] = []
            merged_dict[ethnic].extend(words)

    for ethnic in merged_dict:
        merged_dict[ethnic] = list(set(merged_dict[ethnic]))

    return merged_dict

def merge_stereotypes_by_gender(stereo_dict):
    merged_dict = {}
    for gender in ['man', 'woman']:
        for ethnic, words in stereo_dict[gender].items():
            if gender not in merged_dict:
                merged_dict[gender] = []
            merged_dict[gender].extend(words)
    
    for gender in merged_dict:
        merged_dict[gender] = list(set(merged_dict[gender]))
    return merged_dict


def lexicon_based_ethnic_score(df, merged_dict):
    ethnic_scores = dict()
    for ethnic, stereolist in merged_dict.items():
        if ethnic == "Middle-Eastern":
            race_key = "ME"
        else:
            race_key = ethnic.lower()
        sub_df = df[df['race'] == race_key]
        
        if sub_df.empty:
            continue
        
        total_count = 0
        total_words = 0
        for text in sub_df['text_clean']:
            words = text.split()
            total_words += len(words)
            for word in words:
                if word in stereolist:
                    total_count += 1
        
        ethnic_scores[ethnic] = total_count / total_words
    
    return ethnic_scores

def lexicon_based_gender_score(df, merged_dict):
    gender_scores = dict()
    for gender, stereolist in merged_dict.items():
        if gender == "man":
            gender_key = "male"
        else:
            gender_key = "female"
        sub_df = df[df['gender'] == gender_key]


        total_count = 0
        total_words = 0
        for text in sub_df['text_clean']:
            words = text.split()
            total_words += len(words)
            for word in words:
                if word in stereolist:
                    total_count += 1
        gender_scores[gender] = total_count / total_words

    return gender_scores

In [10]:
with open("stereo_dict.pkl", "rb") as f:
    stereo_dict = pickle.load(f)
merged_dict_ethnic = merge_stereotypes_by_ethnic(stereo_dict)
merged_dict_gender = merge_stereotypes_by_gender(stereo_dict)
print(len(merged_dict_gender["man"]))
for model in models:
    df=pd.read_csv(f'./generated_personas/{model}/all_data.csv')
    df['text_clean'] = df['text'].str.lower().str.replace('[^\w\s]','',regex=True)
    ethnic_scores=lexicon_based_ethnic_score(df, merged_dict_ethnic)
    gender_scores=lexicon_based_gender_score(df, merged_dict_gender)
    print(model+": ")
    for ethnic,score in ethnic_scores.items():
        print(str(ethnic)+": "+str(score))
    for gender,score in gender_scores.items():
        print(str(gender)+": "+str(score))

83
4v: 
Black: 0.007348520275235487
Latino: 0.0011226887139295899
Middle-Eastern: 0.0020890480069958818
White: 0.004130218372058543
Asian: 0.002206523604548941
man: 0.008374844422419122
woman: 0.010827902174614441
4o: 
Black: 0.008291402216947477
Latino: 0.0007114243288021811
Middle-Eastern: 0.0017491971091226272
White: 0.0049588080305785435
Asian: 0.0022932703913311698
man: 0.007968266823737299
woman: 0.01168360745933177
Gemini: 
Black: 0.012914363494513885
Latino: 0.0025390625
Middle-Eastern: 0.0039775581979567915
White: 0.007664877903826321
Asian: 0.002686417273412001
man: 0.009946611029518364
woman: 0.017346158489958546
llama: 
Black: 0.007672697368421052
Latino: 0.0013862359663757184
Middle-Eastern: 0.002513830416478202
White: 0.00515022153229197
Asian: 0.003504437069515435
man: 0.008242205812434128
woman: 0.010006538733047278
LLaVA: 
Black: 0.006856120826709062
Latino: 0.0005406718435342699
Middle-Eastern: 0.002469690166142793
White: 0.00287986834887548
Asian: 0.00107414612894260