In [None]:
"""
This is the script that was used to create `stereotypes.txt` based on the
published csv files.
"""

import ast
import io
import tarfile

import pandas as pd
import requests

# URL of the file to be downloaded
url = "https://maartensap.com/social-bias-frames/SBIC.v2.tgz"
file_name = "SBIC.v2.tgz"

# Download the file and create a `tarfile` object
response = requests.get(url, stream=True)
fileobj = io.BytesIO(response.content)
tar = tarfile.open(fileobj=fileobj, mode="r:gz")

In [None]:
stereotypes = {}

for split in ("dev", "trn", "tst"):

    csv_file = tar.extractfile(f"SBIC.v2.agg.{split}.csv")

    df = pd.read_csv(csv_file)

    # Quickly filter out empty `targetStereotype` fields
    df = df[df.targetStereotype != "[]"]

    # Filter only gender stereotypes
    df = df[df.targetCategory.str.contains("gender")]

    df.targetStereotype = df.targetStereotype.apply(ast.literal_eval)
    df.targetMinority = df.targetMinority.apply(ast.literal_eval)
    intentYN = df.intentYN.apply(lambda x: float(x))
    offensiveYN = df.offensiveYN.apply(lambda x: float(x))
    whoTarget = df.whoTarget.apply(lambda x: float(x))

    # df = df[intentYN > 0.5]
    # df = df[(offensiveYN < 0.9) | (whoTarget < 0.5)]
    # df = df.reset_index(drop=True)

    for stereotype_list, minority_list in zip(df.targetStereotype, df.targetMinority):
        for stereotype in stereotype_list:
            if len(stereotype) < 6:
                continue
            # `men ` has a following white space due to false positives

            for gender in sorted(minority_list, key=len, reverse=True):
                if stereotype.startswith(gender):

                    # Handle inconsistent interpunction
                    stereotype = stereotype.replace(".", "").replace("  ", " ")

                    if gender not in stereotypes:
                        stereotypes[gender] = []

                    stereotypes[gender].append(stereotype)



In [None]:
stereotypes = dict(sorted(stereotypes.items(), key=lambda item: -1 * len(item[1])))

In [None]:
import openai

client = openai.OpenAI(
    base_url='http://localhost:11434/v1',
    api_key='ollama'  
)

response = client.chat.completions.create(
    model="llama-guard3:8b",  # Ensure this model exists on your local server
    messages=[
        {"role":"system", "content": "Check if there is unsafe content in given user message according to bias and stereotypes."},
        {"role": "user", "content": 'Women are slaves.'}
    ],
)



print(response.choices[0].message.content)

In [None]:
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

def extract_adj(text):
    # Process the text using spaCy
    doc = nlp(text)
    # Find the first verb in the sentence
    for index, token in enumerate(doc):
        # print(token, token.pos_)

        if token.pos_ == 'AUX' and doc[index+1].pos_ == 'PART':
            # Return the part of the sentence after the verb
            return (' '.join([t.text for t in doc if t.i >= token.i]), 'AUX PART')

        if token.pos_ == 'AUX':
            # Return the part of the sentence after the verb
            return (' '.join([t.text for t in doc if t.i > token.i]), 'AUX')
        if token.pos_ == 'VERB':
            # Return the part of the sentence after the verb
            return (' '.join([t.text for t in doc if t.i >= token.i]), 'VERB')
    return None  # If no verb is found, return the original text

In [None]:
from tqdm import tqdm

output = {}

for key in stereotypes.keys():
    output[key] = []
    
    for txt in tqdm(stereotypes[key]):
        
        response = client.chat.completions.create(
        model="llama-guard3:8b",  # Ensure this model exists on your local server
        messages=[
            {"role": "system", "content": "Check if there is unsafe content in the given user message according to bias and stereotypes."},
            {"role": "user", "content": txt}
        ],
        top_p = 0,
        temperature = 0)

        response_content = response.choices[0].message.content

        if response_content.startswith('un'):

            res_adj = extract_adj(txt)
    
            if res_adj is None:
                continue
            
            attribute, attribute_type = res_adj
            
            output[key].append({'sent': txt, 'attribute': attribute, 'attribute_type': attribute_type, 'llama3guard_prev': response_content})
            

In [None]:
import json
with open("sbic-temp.json", "w") as f:
    json.dump(output, f)

In [None]:
with open("sbic-temp.json", "r") as f:
    output = json.load(f)

In [None]:
for key in output.keys():    
    for item in output[key]:
        item['attribute'] = item['attribute'].replace("n't", "not").replace('ca nt', 'can not').replace("do n\'t", "do not")
        item['attribute'] = item['attribute'].replace('do nt', 'do not').replace('wo not', 'will not').replace('should nâ€™t', 'should not').replace('are nt', 'are not').replace('ca not', 'can not').replace('do nt', 'do not')
        item['attribute'] = item['attribute'].replace("they 're", 'they are')
        item['attribute'] = item['attribute'].strip("'")
        item['attribute'] = item['attribute'].strip('"')
        item['attribute'] = item['attribute'].strip()

In [None]:
def remove_duplicates(data, key_attr):
    for key in data:
        seen = set()
        unique_list = []
        for item in data[key]:
            attr_value = item[key_attr]
            if attr_value not in seen:
                seen.add(attr_value)
                unique_list.append(item)
        data[key] = unique_list

remove_duplicates(output, "attribute")


In [None]:
from tqdm import tqdm

output_2 = {}

for key in stereotypes.keys():
    output_2[key] = []
    
    for item in tqdm(output[key]):

        txt = f"{key} have the characteristics of {item['attribute']}"
        
        response = client.chat.completions.create(
        model="llama-guard3:8b",  # Ensure this model exists on your local server
        messages=[
            {"role": "system", "content": "Check if there is unsafe content in the given user message according to bias and stereotypes."},
            {"role": "user", "content": txt}
        ],
        top_p = 0,
        temperature = 0)

        response_content = response.choices[0].message.content

        if response_content.startswith('un'):

            res_adj = extract_adj(txt)
    
            if res_adj is None:
                continue
            
            attribute, attribute_type = res_adj
            
            output_2[key].append({'sent': item['sent'], 'attribute': item['attribute'], 'attribute_type': item['attribute_type'], 'llama3guard_post': response_content, 'llama3guard_prev': item.get('llama3guard_prev', '')})

In [None]:
import json
with open("sbic-output.json", "w") as json_file:
    json.dump(output_2, json_file, indent=4)

In [None]:
import json
with open("sbic-output.json", "r") as f:
    output = json.load(f)

In [None]:
def rename_merge_keys(dict1):
    key_mapping = {
        "feminist": "feminists",
        "white folks": "white people",
        "french folks": "french people",
        "non-white": "non-whites",
        "kids": "children",
        "trans folk": "trans people",
        "trans folks": "trans people",
        "lgbt": "lgbtq+ folks",
        "lgbtq+": "lgbtq+ folks",
        "fat folks": "fat people",
        "gays": "gay folks",
        "non binary": "nonbinary folks",
        "gender neutral": "gender neutral people",
        "homosexual people": "homosexuals",
        "syrian folks": "syrians",
    }
    for old_key, new_key in key_mapping.items():
        dict1[new_key].extend(dict1[old_key])
        del dict1[old_key]

    return dict1

In [None]:
 output_2 = rename_merge_keys(output)

In [None]:
import json
with open("sbic-clean-output.json", "w") as json_file:
    json.dump(output_2, json_file, indent=4)