# Using GPT to classify Tweets from Biden & Trump

- This notebook showcases the main steps involved in using GPT-4.o to label Tweets by the Trump & Biden with the best-performing prompt, that frame the political out-group as immoral, and calculates final performance metrics compared to three human coders.

# (1) Setup

### Import required packages

In [1]:
#!pip install openai==0.28

In [2]:
import openai # ensure it's version 0.28
import pandas as pd 
import numpy as np 
import tiktoken
import nltk
import nltk.data
from nltk.tokenize import sent_tokenize, word_tokenize
nltk.download('punkt')
import simpledorff
import openpyxl

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/michelleschimmel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### API Model Setup

In [4]:
# set api key
openai.api_key=api_key

In [5]:
# define which model to use 
MODEL = "gpt-4o"
MAX_TOKENS = 8000
WAIT_TIME = 0.8  
TEMPERATURE = 0.2

### Required Functions

In [7]:
# Function based on Törnberg, 2023

import time

def analyze_message(text, instruction, model="gpt-4o", temperature=0.2):
    print(f"Analyzing tweets: ")

    response = None
    tries = 0
    failed = True

    while failed:
        try:
            time.sleep(0.8)
            response = openai.ChatCompletion.create(
                model=model,
                temperature=temperature,
                messages=[
                    {"role": "system", "content": f"'{instruction}'"},
                    {"role": "user", "content": f"'{text}'"}
                ]
            )
            failed = False

        # Handle errors.
        except openai.error.APIError as e:
            print(f"OpenAI API returned an API Error: {e}")
            if tries < 10:
                print(f"Caught an APIError: {e}. Waiting 10 seconds and then trying again...")
                failed = True
                tries += 1
                time.sleep(10)
            else:
                print(f"Caught an APIError: {e}. Too many exceptions. Giving up.")
                raise e

        except openai.error.ServiceUnavailableError as e:
            print(f"OpenAI API returned a ServiceUnavailable Error: {e}")
            if tries < 10:
                print(f"Caught a ServiceUnavailable error: {e}. Waiting 10 seconds and then trying again...")
                failed = True
                tries += 1
                time.sleep(10)
            else:
                print(f"Caught a ServiceUnavailable error: {e}. Too many exceptions. Giving up.")
                raise e

        except openai.error.APIConnectionError as e:
            print(f"Failed to connect to OpenAI API: {e}")
            pass

        except openai.error.RateLimitError as e:
            print(f"OpenAI API request exceeded rate limit: {e}")
            pass

        except openai.error.InvalidRequestError as e:
            print(f"Received an InvalidRequestError. Request likely too long; cutting 10% of the text and trying again. {e}")
            time.sleep(5)
            words = text.split()
            num_words_to_remove = round(len(words) * 0.1)
            remaining_words = words[:-num_words_to_remove]
            text = ' '.join(remaining_words)
            failed = True

        except Exception as e:
            print(f"Caught an unhandled error: {e}")
            pass

    result = ''
    for choice in response.choices:
        result += choice.message.content
        return result

In [8]:
# This function saves tokens in each prompt, as the GPT-instructions are included only once for each chunk of ~35 tweets, instead of 
# having the instruction included with every tweet.

# Important: Ensure df index is an index from 0–n, otherwise the chunking doesn't work.
     
def gpt_input (df, text_column): 
    
    # define maximum lines per chunk: 
    max_lines_per_chunk = 35
    
    # Add a new column, where the index of the dataframe is used to create chunks of ~35 tweets 
    # (and then whatever is left), such that the same number is assigned to 35 tweets each for each line
    df["chunk_number"] = df.index // max_lines_per_chunk 
    
    # Group the dataframe by chunk number
    grouped_df = df.groupby("chunk_number")
    
    # Initialize a list to store the results for each chunk. This is the final list to process further
    chunk_results = []
    
    # Iterate through the grouped dataframe and make api calls for each chunk 
    for chunk_number, chunk_df in grouped_df: 
        
        # Create tweet number supplied to GPT so it understands that each tweet is supposed to be treated individually
        numbered_tweets = []
        
        # Construct the prompt (i.e., string of tweets to be supplied) by joining the lines in the current chunk: 
        for index, tweet in enumerate(chunk_df[f"{text_column}"]):
            numbered_tweet = f"{index+1}:{tweet}"
            numbered_tweets.append(numbered_tweet)
            
        # Create the final prompt: 
        prompt = "\n".join(numbered_tweets)

        # Use the analyze_message function to get the result 
        result = analyze_message (prompt, instruction, model=MODEL)
        
        # Append the results to the list 
        chunk_results.append(result)
        
    return chunk_results

In [36]:
# Function to parse the json output from GPT (including error handling).
import re
import json
import pandas as pd

def parse_json(gpt_output_list):
    cleaned_data = []
    
    # Clean GPT output, removing unwanted leading & trailing line breaks & characters
    for item in gpt_output_list:
        cleaned_item = re.sub(r'```json|\n|```', '', item).strip()
        cleaned_data.append(cleaned_item)

    # Join into one string for further output error handling
    concatenated_data = ''.join(cleaned_data)

    # Handle cases where GPT places either nothing between JSON objects (like so: }{), or random punctuation to ensure it only splits by newline
    fixed_data = re.sub(r'}\s*([.,;!?\s]*)\s*{', '}\n{', concatenated_data)
    
    # Split into individual JSON objects
    json_blocks = fixed_data.splitlines()

    json_objects = []
    
    # Parse each block as JSON
    for block in json_blocks:
        try:
            json_objects.append(json.loads(block))
            
        # Print errors & examine block
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON block: {block[:100]}...: {e}")
            print(block)
            continue

    # Flatten the list of dicts (if necessary)
    flattened_data = []
    
    for obj in json_objects:
        if isinstance(obj, list):  # Handle lists of dictionaries
            flattened_data.extend(obj)
            
        else:  # Handle single dictionaries
            flattened_data.append(obj)

    # Create a DataFrame from the flattened data
    tweet_df = pd.DataFrame(flattened_data)
    
    return tweet_df

In [14]:
# old function -- this is not really needed anymore beyond initial stage as we now have several coders, maybe automate later 
def validate_scale (human_coded_data, gpt_coded_data, unique_tweet_id, target_variable): 
    
    import simpledorff
    
    # add coder variables: 
    human_coded_data["codernum"]="human"
    gpt_coded_data["codernum"]="llm"
    
    # ensure all outputs are in numeric format
    human_coded_data[f"{target_variable}"] = pd.to_numeric(human_coded_data[f"{target_variable}"], errors='coerce')
    gpt_coded_data[f"{target_variable}"] = pd.to_numeric(gpt_coded_data[f"{target_variable}"], errors='coerce')
    
    # select only columns that are required: id, codernum, target variable
    selected_columns = [f"{unique_tweet_id}", "codernum", f"{target_variable}"]
    human_coded_data_s = human_coded_data[selected_columns]
    gpt_coded_data_s = gpt_coded_data[selected_columns]
                        
    # create validation dataset 
    validation_data = pd.concat([human_coded_data_s,gpt_coded_data_s])
                        
    # calculate krippendorff's alpha 
    KA = simpledorff.calculate_krippendorffs_alpha_for_df(
    validation_data
    ,metric_fn=simpledorff.metrics.interval_metric,
    experiment_col='id', 
    annotator_col='codernum', 
    class_col=f'{target_variable}')

    print(f"The resulting Krippendorf's Alpha is is {KA}.")                    

In [16]:
# old function -- this is not really needed anymore beyond initial stage as we now have several coders, maybe automate later 

def check_disagreements (human_coded_data, gpt_coded_data, unique_tweet_id, target_variable, filename):
    
    # merge human and gpt coded data on id and text to get the differing target variables 
    wrong = human_coded_data.merge(gpt_coded_data, on=[f'{unique_tweet_id}', 'text'])
    
    # take absolute difference between target variables to calculate disagreement
    wrong['diff'] = abs(wrong[f'{target_variable}_x']-wrong[f'{target_variable}_y'])
    
    # select columns to make further inspection easier and save to df
    column_selection=[f'{unique_tweet_id}', 'text', f'{target_variable}_x', f'{target_variable}_y', 'motivation', 'diff']
    wrong_df=wrong[column_selection] 
    
    # save as .csv to examine 
    wrong_df.to_csv(f'{filename}.csv')
    
    # return in code
    return wrong_df.sort_values(by = 'diff', ascending=False)

# (2) Trump Tweets

## Data preparation

In [17]:
# Load dataset
trump = pd.read_excel("trump_2020_coding_ECM.xlsx")

In [19]:
# cut off where we stopped coding
trump_df = trump[:124]

In [21]:
# Join tweet ID & tweet content to provide to GPT
trump_df["id_text"] = "tweet_ID " + trump_df["TEXT_ID"].astype(str) + ": " + trump_df["text"].astype(str)

# use column with both id and text for gpt input
trump_txt = trump_df[["id_text"]].copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trump_df["id_text"] = "tweet_ID " + trump_df["TEXT_ID"].astype(str) + ": " + trump_df["text"].astype(str)


In [22]:
trump_df.head(2)

Unnamed: 0,TEXT_ID,id,text,V1_BOUNDARY_DRAWING_E,V2_DEROGATION_OUTGROUP_E,V3_MORAL_WRONG_OUTGROUP_E,V4_SENTIMENT_E,V1_BOUNDARY_DRAWING_C,V2_DEROGATION_OUTGROUP_C,V3_MORAL_WRONG_OUTGROUP_C,V4_SENTIMENT_C,V1_BOUNDARY_DRAWING_M,V2_DEROGATION_OUTGROUP_M,V3_MORAL_WRONG_OUTGROUP_M,id_text
0,1877,1260000000000000000,State of Nevada â€œthinksâ€ that they can sen...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,tweet_ID 1877: State of Nevada â€œthinksâ€ th...
1,5098,1290000000000000000,Must know Election results on the night of the...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,tweet_ID 5098: Must know Election results on t...


## Final Prompt: Outgroup framed as morally wrong

Variable: V3_MORAL_WRONG_OUTGROUP

In [32]:
instruction = """The following tweets are written by the leader of the Republican party in the United States. 
For each tweet, please evaluate whether it presents another political party, another party's leadership, or another party's voters and supporters as immoral.
It is crucial that you do NOT evaluate portrayals of media outlets or channels, other countries or their leadership, or portrayals of the tweet authors' own party.

The following three aspects must be present together in a tweet:
(1) A clear reference to the political out-group
(2) Calling attention to one of the following moral vices in the out-group's actions or character. The dimensions do not have to be explicitly mentioned but serve as a guide to understand the different kinds of moral vices associated with different accusations.
- harming others (both directly or indirectly, e.g., by failing to help or taking away their rights)
- cheating on or exploiting other people
- betraying or abandoning one’s in-group
- disrespecting authority or tradition
- damaging or contaminating the purity of sacred entities.
(3) Attributing intentionality to the out-group’s vices. For example, the political out-group must be portrayed as intentionally harming others. Unintentional harm, for example, poor decision making, would not be sufficient to code this concept as present in the tweet.

It is crucial that regular political criticism of the out-party is not included if there is no clear judgment as immoral, such as 
- Criticising the out-party's policy positions or decisions 
- Expressing a desire to change the leadership (e.g., by voting them out)

Provide your response in JSON format, as follows:
{"tweet_id":"146", "immoral_outgroup": "1/0", "justification": "Provide a brief justification
for your choice."} 
"0" represents “no portrayal as immoral” 
"1" represents “portrayal as immoral”.

Example: 
Tweet: “tweet_ID 153: Time and time again, that party shows that they don’t care about treating their political opponents in a fair way – they think this is the only way they can win.”
Response: "tweet_id":"153", "immoral_outgroup":"1", "justification": "this tweet portrays the tweet author's political out-group as immoral because they are being unfair and ready to cheat or exploit others to win." 

Ensure NOT to skip any tweets and to seperate all JSON objects using \n.
"""

In [33]:
# send requests to API with correct input 
trump_immoral_results2 = gpt_input(trump_txt, "id_text")

Analyzing tweets: 
Analyzing tweets: 
Analyzing tweets: 
Analyzing tweets: 


In [37]:
# Parse GPT output
gpt_label_immoral2 = parse_json(trump_immoral_results2)

In [38]:
# Examine
gpt_label_immoral2

Unnamed: 0,tweet_id,immoral_outgroup,justification
0,1877,1,The tweet accuses the state of Nevada of inten...
1,5098,0,The tweet expresses a desire for timely electi...
2,3912,0,The tweet does not reference another political...
3,1182,0,The tweet criticizes a bill and calls for acti...
4,3068,0,The tweet criticizes an individual and suggest...
...,...,...,...
119,2190,0,The tweet is a simple announcement and does no...
120,2244,0,The tweet discusses economic progress and does...
121,3377,0,The tweet criticizes the mayor of Portland but...
122,2211,0,The tweet is vague and does not reference any ...


In [39]:
# Rename columns to match with gpt output
trump_df = trump_df.rename(columns = {"TEXT_ID":"tweet_id"})

# Ensure column format on each sides is string
trump_df['tweet_id'] = trump_df['tweet_id'].astype(str)
gpt_label_immoral2['tweet_id'] = gpt_label_immoral2['tweet_id'].astype(str)

# Merge dfs on tweet id
immoral_df_t2 = trump_df.merge(gpt_label_immoral2, on = "tweet_id")

In [42]:
immoral_df_t2.head(2)

Unnamed: 0,tweet_id,id,text,V1_BOUNDARY_DRAWING_E,V2_DEROGATION_OUTGROUP_E,V3_MORAL_WRONG_OUTGROUP_E,V4_SENTIMENT_E,V1_BOUNDARY_DRAWING_C,V2_DEROGATION_OUTGROUP_C,V3_MORAL_WRONG_OUTGROUP_C,V4_SENTIMENT_C,V1_BOUNDARY_DRAWING_M,V2_DEROGATION_OUTGROUP_M,V3_MORAL_WRONG_OUTGROUP_M,id_text,immoral_outgroup,justification
0,1877,1260000000000000000,State of Nevada â€œthinksâ€ that they can sen...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,tweet_ID 1877: State of Nevada â€œthinksâ€ th...,1,The tweet accuses the state of Nevada of inten...
1,5098,1290000000000000000,Must know Election results on the night of the...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,tweet_ID 5098: Must know Election results on t...,0,The tweet expresses a desire for timely electi...


In [43]:
# rename immoral_outgroup to variable name for consistency
immoral_df_t2 = immoral_df_t2.rename(columns={"immoral_outgroup":"V3_MORAL_WRONG_OUTGROUP_GPT"})

In [44]:
# save as .csv 
immoral_df_t2.to_csv("immoral_trump_gpt2.csv")

### Calculate Performance

In [45]:
# select variables, GPT is treated here as an additional coder
immoral_outgroup_kripp2 = immoral_df_t2[["tweet_id","V3_MORAL_WRONG_OUTGROUP_E", "V3_MORAL_WRONG_OUTGROUP_C", "V3_MORAL_WRONG_OUTGROUP_M", "V3_MORAL_WRONG_OUTGROUP_GPT"]]

In [53]:
# ensure gpt dtype is numeric
immoral_outgroup_kripp2['V3_MORAL_WRONG_OUTGROUP_GPT'] = pd.to_numeric(immoral_outgroup_kripp2['V3_MORAL_WRONG_OUTGROUP_GPT'], errors='raise')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  immoral_outgroup_kripp2['V3_MORAL_WRONG_OUTGROUP_GPT'] = pd.to_numeric(immoral_outgroup_kripp2['V3_MORAL_WRONG_OUTGROUP_GPT'], errors='raise')


In [55]:
# Calculate krippendorff's alpha

# Bring df in long format with coder ID column 
immoral_long2 = pd.melt(immoral_outgroup_kripp2,
    id_vars=["tweet_id"], 
                 var_name="CODER_ID", 
                 value_name="MORAL_WRONG")

# Calculate KAlpha
simpledorff.calculate_krippendorffs_alpha_for_df(
    immoral_long2,experiment_col='tweet_id',
    annotator_col='CODER_ID',
    class_col='MORAL_WRONG')

0.7541319209676208

# (3) Biden Tweets

In [56]:
biden_df = pd.read_excel("biden_2020_coding_E_C_M.xlsx")

In [57]:
# rename to tweet id & change to string to later do merging with GPT labels
biden_df = biden_df.rename(columns = {"TEXT_ID":"tweet_id"}).astype(str)

# create column with tweet ID and text 
biden_df["id_text"] = "tweet_ID " + biden_df["tweet_id"].astype(str) + ": " + biden_df["text"].astype(str)

# use column with both id and text
biden_txt = biden_df[["id_text"]].copy()

In [61]:
# Remove missing data 

# Replace faulty human coder artefact inside df that says "nan" with actual NaN (np.nan)
coder_labels = ["V1_BOUNDARY_DRAWING_E", "V2_DEROGATION_OUTGROUP_E", 
                "V3_MORAL_WRONG_OUTGROUP_E", "V4_SENTIMENT_E", 
                "V1_BOUNDARY_DRAWING_C", "V2_DEROGATION_OUTGROUP_C", 
                "V3_MORAL_WRONG_OUTGROUP_C", "V4_SENTIMENT_C", 
                "V1_BOUNDARY_DRAWING_M", "V2_DEROGATION_OUTGROUP_M", "V3_MORAL_WRONG_OUTGROUP_M"]

biden_df[coder_labels] = biden_df[coder_labels].replace("nan", pd.NA)

# drop all nans 
biden_df = biden_df.dropna(subset=coder_labels)

# Check if any na's left
biden_df.isna().any()

In [65]:
# Ensure GPT datatype is numeric
biden_df[coder_labels] = biden_df[coder_labels].apply(pd.to_numeric, errors = "raise")

## Final Prompt: Outgroup framed as morally wrong 

In [78]:
# this is the same instruction as Trump's except replaced with "Democratic party"

instruction = """The following tweets are written by the leader of the Democratic party in the United States. 
For each tweet, please evaluate whether it presents another political party, another party's leadership, or another party's voters and supporters as immoral.
It is crucial that you do NOT evaluate portrayals of media outlets or channels, other countries or their leadership, or portrayals of the tweet authors' own party.

The following three aspects must be present together in a tweet:
(1) A clear reference to the political out-group
(2) Calling attention to one of the following moral vices in the out-group's actions or character. The dimensions do not have to be explicitly mentioned but serve as a guide to understand the different kinds of moral vices associated with different accusations.
- harming others (both directly or indirectly, e.g., by failing to help or taking away their rights)
- cheating on or exploiting other people
- betraying or abandoning one’s in-group
- disrespecting authority or tradition
- damaging or contaminating the purity of sacred entities.
(3) Attributing intentionality to the out-group’s vices. For example, the political out-group must be portrayed as intentionally harming others. Unintentional harm, for example, poor decision making, would not be sufficient to code this concept as present in the tweet.

It is crucial that regular political criticism of the out-party is not included if there is no clear judgment as immoral, such as 
- Criticising the out-party's policy positions or decisions 
- Expressing a desire to change the leadership (e.g., by voting them out)

Provide your response in JSON format, as follows:
{"tweet_id":"146", "immoral_outgroup": "1/0", "justification": "Provide a brief justification
for your choice."} 
"0" represents “no portrayal as immoral” 
"1" represents “portrayal as immoral”.

Example: 
Tweet: “tweet_ID 153: Time and time again, that party shows that they don’t care about treating their political opponents in a fair way – they think this is the only way they can win.”
Response: "tweet_id":"153", "immoral_outgroup":"1", "justification": "this tweet portrays the tweet author's political out-group as immoral because they are being unfair and ready to cheat or exploit others to win." 

Ensure NOT to skip any tweets and to seperate all JSON objects using \n.
"""

In [79]:
# request GPT API with right input
biden_immoral_results = gpt_input(biden_txt, "id_text")

Analyzing tweets: 
Analyzing tweets: 
Analyzing tweets: 
Analyzing tweets: 


In [80]:
# parse GPT output
gpt_label_immoral_biden = parse_json(biden_immoral_results)

### Krippendorff's Alpha

In [83]:
# ensure gpt response is correct datatype
gpt_label_immoral_biden['tweet_id'] = gpt_label_immoral_biden['tweet_id'].astype(str)

# merge to obtain performance metrics and save data to examine disagreement
immoral_biden_df = biden_df.merge(gpt_label_immoral_biden, on = "tweet_id")

# rename immoral_outgroup to variable name 
immoral_biden_df = immoral_biden_df.rename(columns={"immoral_outgroup":"V3_MORAL_WRONG_OUTGROUP_GPT"})

# save as .csv 
immoral_biden_df.to_csv("immoral_biden_gpt2.csv")

In [85]:
# select variables to check agreement
immoral_biden_kripp = immoral_biden_df[["tweet_id","V3_MORAL_WRONG_OUTGROUP_E", "V3_MORAL_WRONG_OUTGROUP_C", "V3_MORAL_WRONG_OUTGROUP_M", "V3_MORAL_WRONG_OUTGROUP_GPT"]]

In [86]:
# convert gpt dtype to numeric
immoral_biden_kripp['V3_MORAL_WRONG_OUTGROUP_GPT'] = pd.to_numeric(immoral_biden_kripp['V3_MORAL_WRONG_OUTGROUP_GPT'], errors='raise')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  immoral_biden_kripp['V3_MORAL_WRONG_OUTGROUP_GPT'] = pd.to_numeric(immoral_biden_kripp['V3_MORAL_WRONG_OUTGROUP_GPT'], errors='raise')


In [89]:
# make df longer to get coder ID in own column
immoral_long_biden = pd.melt(immoral_biden_kripp,
    id_vars=["tweet_id"], 
                 var_name="CODER_ID", 
                 value_name="MORAL_WRONG")

# calculate kAlpha
simpledorff.calculate_krippendorffs_alpha_for_df(
    immoral_long_biden,
    experiment_col='tweet_id',
    annotator_col='CODER_ID',
    class_col='MORAL_WRONG')

0.7119943365695792

## (4) Overall Performance Metrics

In [91]:
# Take both labelled dfs with the best performing prompts to get all performance metrics. 
biden_immoral = pd.read_csv("immoral_biden_gpt2.csv")
trump_immoral = pd.read_csv("immoral_trump_gpt2.csv")

# concatinate to one dataframe
immoral = pd.concat([biden_immoral, trump_immoral], axis=0, ignore_index=True)

immoral.head(2)

Unnamed: 0.1,Unnamed: 0,tweet_id,id,V1_BOUNDARY_DRAWING_E,V2_DEROGATION_OUTGROUP_E,V3_MORAL_WRONG_OUTGROUP_E,V4_SENTIMENT_E,V1_BOUNDARY_DRAWING_C,V2_DEROGATION_OUTGROUP_C,V3_MORAL_WRONG_OUTGROUP_C,V4_SENTIMENT_C,Unnamed: 10,V1_BOUNDARY_DRAWING_M,V2_DEROGATION_OUTGROUP_M,V3_MORAL_WRONG_OUTGROUP_M,text,id_text,V3_MORAL_WRONG_OUTGROUP_GPT,justification
0,0,1622,1280000000000000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,"Together, we're going to build a new American ...","tweet_ID 1622: Together, we're going to build ...",0,This tweet does not reference any political ou...
1,1,2223,1310000000000000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,"Wheels up, folks.","tweet_ID 2223: Wheels up, folks.",0,This tweet does not reference any political ou...


In [92]:
# select variables for krippendorff's alpha
immoral_k = immoral[["tweet_id","V3_MORAL_WRONG_OUTGROUP_E", "V3_MORAL_WRONG_OUTGROUP_C", "V3_MORAL_WRONG_OUTGROUP_M", "V3_MORAL_WRONG_OUTGROUP_GPT"]]

# make longer 
immoral_k = pd.melt(immoral_k,
    id_vars=["tweet_id"], 
                 var_name="CODER_ID", 
                 value_name="IMMORAL")

# calculate KAlpha
immoral_kripp = simpledorff.calculate_krippendorffs_alpha_for_df(
    immoral_k,experiment_col='tweet_id',
    annotator_col='CODER_ID',
    class_col='IMMORAL')

print(immoral_kripp)


0.7335530833676692


In [94]:
coder_labels = ["V3_MORAL_WRONG_OUTGROUP_E", "V3_MORAL_WRONG_OUTGROUP_C", "V3_MORAL_WRONG_OUTGROUP_M"]

# Replace string inside the df that says "nan" with actual NaN (np.nan)
immoral[coder_labels] = immoral[coder_labels].replace("nan", pd.NA)

# drop all nans 
immoral =immoral.dropna(subset=coder_labels)

# select coders to calculate majority vote label
immoral_MV = immoral[["V3_MORAL_WRONG_OUTGROUP_E", "V3_MORAL_WRONG_OUTGROUP_C", "V3_MORAL_WRONG_OUTGROUP_M"]]

In [95]:
# Function to calculate majority vote
def majority_vote(row):
    from collections import Counter
    
    # Use Counter to count the occurrences of each label in the row
    vote_counts = Counter(row)
    
    # Return the label with the highest count (majority vote)
    return vote_counts.most_common(1)[0][0]

# Apply the function row-wise to calculate the majority label
immoral_MV["V3_IMMORAL_MV"] = immoral_MV.apply(majority_vote, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  immoral_MV["V3_IMMORAL_MV"] = immoral_MV.apply(majority_vote, axis=1)


In [96]:
# calculate overall performance metrics 
import sklearn
from sklearn.metrics import precision_score, recall_score, f1_score

# Extract majority vote and gpt labels
mv = immoral_MV["V3_IMMORAL_MV"]
gpt = immoral["V3_MORAL_WRONG_OUTGROUP_GPT"]

# Calculate Precision
precision_immoral = precision_score(mv, gpt)

# Calculate Recall
recall_immoral = recall_score(mv, gpt)

# Calculate F1 Score
f1_immoral = f1_score(mv, gpt)

print(f"Precision: {precision_immoral:.2f}")
print(f"Recall: {recall_immoral:.2f}")
print(f"F1 Score: {f1_immoral:.2f}")
print(f"Krippendorff's Alpha: {immoral_kripp: .2f}")

# Essentially: 
# it "overfits" and adds more labels but need to consider more if they make sense.

Precision: 0.78
Recall: 0.84
F1 Score: 0.81
Krippendorff's Alpha:  0.73
