In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("data/AUTALIC.csv")

# Define a function to determine the truth value based on annotator agreement
def determine_truth(row):
    scores = [row['A1_Score'], row['A2_Score'], row['A3_Score']]
    # Count the occurrences of each score
    counts = {score: scores.count(score) for score in set(scores)}
    # Find the score that has two or more agreements
    for score, count in counts.items():
        if count >= 2:
            return score
    # If all three annotators disagree, return -1
    return -1

# Apply the function and create the 'truth' column
df['truth'] = df.apply(determine_truth, axis=1)

# Save the processed dataset
df.to_csv("data/autalic_processed.csv", index=False)

print("Processing complete. The output is saved as 'data/autalic_processed.csv'.")


Processing complete. The output is saved as 'data/autalic_processed.csv'.


<h1>llama3<h1>

In [11]:
import pandas as pd
import subprocess
import os

# Load the dataset
df = pd.read_csv("data/autalic_test.csv")

# Initialize an empty list to store Llama 3's responses
llama3_responses = []

# Function to query Llama 3 using `ollama run` and enforce consistent short responses
def ask_llama3(sentence):
    prompt = (
        f"Answer whether the following sentence is ableist with only one of the following responses: "
        f"'Ableist', 'Not ableist', or 'Needs more context'.\n"
        f"Sentence: \"{sentence}\""
    )
    try:
        # Call the Ollama CLI using the `run` command
        result = subprocess.run(
            ["ollama", "run", "llama3"],
            input=prompt,
            text=True,
            capture_output=True,
        )
        
        # Get the output and strip any extra whitespace
        response = result.stdout.strip()
        
        # Ensure the response is one of the valid categories
        valid_responses = ['Ableist', 'Not ableist', 'Needs more context']
        if response in valid_responses:
            return response
        else:
            print(f"Unexpected response: '{response}' for sentence: {sentence}")
            return "Needs more context"  # Default to 'Needs more context' if unexpected response
        
    except Exception as e:
        print(f"Error querying Llama 3 for sentence: {sentence}\n{e}")
        return "Error"

# Ensure the 'outputs' folder exists
os.makedirs("outputs", exist_ok=True)

# Loop through each sentence and query Llama 3
for idx, sentence in enumerate(df['Sentence'], 1):
    print(f"Processing sentence {idx}/{len(df)}: {sentence}")
    response = ask_llama3(sentence)
    llama3_responses.append(response)

# Add the responses to the dataframe
df['llama3_response'] = llama3_responses

# Save the updated dataset
output_file = "outputs/autalic_llama3_result.csv"
df.to_csv(output_file, index=False)

print(f"Processing complete. Results saved to {output_file}.")


Processing sentence 1/5: Our vision is to provide meaningful employment opportunities for individuals with Down syndrome and other disabilities fostering a space where they can showcase their talents and skills in the heart of our local community
Processing sentence 2/5: It doesnt matter if you actually know autistic people with similar traits or if you know an expert only Jordan can know whats going on in his head
Processing sentence 3/5: a11y supporthttpa11ysupportio is a communitydriven website that aims to help inform developers about what is accessibility supportedhttpswwww3orgTRWCAGcc4
Processing sentence 4/5: There have been times where I have acted quiet when I didnt know what to say or was socially clueless and this might explain why many autistic guys I meet do this far more often than me because they struggle socially more than even me and I struggle socially way too much anyway
Processing sentence 5/5: I know we have a few ASD parents in here and as one of them its offensiv

<h1>gpt-4<h1>

In [None]:
import pandas as pd
import openai
import os

# Set your OpenAI API key (load from environment or .env file)
openai.api_key = os.getenv("OPENAI_API_KEY")  # Assumed to be set in environment variables

# Load the dataset
df = pd.read_csv("data/autalic_test.csv")

# Initialize an empty list to store GPT-4's responses
gpt4_responses = []

# Function to query GPT-4 using the OpenAI API and enforce consistent short responses
def ask_gpt4(sentence):
    prompt = (
        f"Answer whether the following sentence is ableist with only one of the following responses: "
        f"'Ableist', 'Not ableist', or 'Needs more context'.\n"
        f"Sentence: \"{sentence}\""
    )
    
    try:
        # Use the correct method `openai.ChatCompletion.create()` to interact with GPT-4
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=10,  # Keep response short
            temperature=0,  # Make the output more deterministic
            top_p=1,  # Use the top token probability
            n=1,  # Only one response
            stop=None  # Don't use a stop sequence
        )
        
        # Get the model's response and strip extra whitespace
        model_response = response['choices'][0]['message']['content'].strip()
        
        # Ensure the response is one of the valid categories
        valid_responses = ['Ableist', 'Not ableist', 'Needs more context']
        if model_response in valid_responses:
            return model_response
        else:
            print(f"Unexpected response: '{model_response}' for sentence: {sentence}")
            return "Needs more context"  # Default to 'Needs more context' if unexpected response
        
    except Exception as e:
        print(f"Error querying GPT-4 for sentence: {sentence}\n{e}")
        return "Error"

# Ensure the 'outputs' folder exists
os.makedirs("outputs", exist_ok=True)

# Loop through each sentence and query GPT-4
for idx, sentence in enumerate(df['Sentence'], 1):
    print(f"Processing sentence {idx}/{len(df)}: {sentence}")
    response = ask_gpt4(sentence)
    gpt4_responses.append(response)

# Add the responses to the dataframe
df['gpt4_response'] = gpt4_responses

# Save the updated dataset
output_file = "outputs/autalic_gpt4_result.csv"
df.to_csv(output_file, index=False)

print(f"Processing complete. Results saved to {output_file}.")


Processing sentence 1/5: Our vision is to provide meaningful employment opportunities for individuals with Down syndrome and other disabilities fostering a space where they can showcase their talents and skills in the heart of our local community
Error querying GPT-4 for sentence: Our vision is to provide meaningful employment opportunities for individuals with Down syndrome and other disabilities fostering a space where they can showcase their talents and skills in the heart of our local community
'Chat' object has no attribute 'Completion'
Processing sentence 2/5: It doesnt matter if you actually know autistic people with similar traits or if you know an expert only Jordan can know whats going on in his head
Error querying GPT-4 for sentence: It doesnt matter if you actually know autistic people with similar traits or if you know an expert only Jordan can know whats going on in his head
'Chat' object has no attribute 'Completion'
Processing sentence 3/5: a11y supporthttpa11ysupportio