# Linguistic markers of subtle discrimination among mental healthcare professionals: CEAI Lunch-and-Learn

_Performs qualitative deductive coding consistent with the  [CHALET](https://arxiv.org/abs/2405.05758) (**C**ollaborative **H**uman-LLM **A**na**L**ysis for **E**mpowering Conceptualization in Quali**T**ative Research) approach. Requires Ollama and/or OpenAI API key._

> mhp_llm_qualitative_share.ipynb<br>
> Simone J. Skeen (01-30-2025)

1. [Prepare](#scrollTo=TMzbQWcLnD3k)
2. [Write](#scrollTo=ro3vWHGknw3w)<br>
[_code_texts_deductively_llama_](#scrollTo=0TXsMF50oDSi)<br>
[_code_instance_deductively_gpt_](#scrollTo=LrgYlrmo1OUW)<br>
[_code_texts_deductively_gpt_](#scrollTo=I6V00vzh2Na1)<br>
3. [Code](#scrollTo=zXYJT6i9pSPf)<br>
[Llama 3.2: local](#scrollTo=6hHjuQXrAqLE)<br>
[GPT-4o: OpenAI API](#scrollTo=G1v8sP42Ah-n)<br>
4. [Fidelity](#scrollTo=6upq1MSmxvoW)<br>
[Compute Cohen's $\kappa$](#scrollTo=DuSQ858FR2Ab)<br>
[Flag disagreements](#scrollTo=mC58zS16Zttc)


### Prepare
Installs, imports, requisite packages; customizes outputs.
***

**Install**

In [None]:
%%capture

%pip install irrCAC
%pip install lime
%pip install ollama
%pip install openai

**Import**

In [None]:
import json
import numpy as np
import ollama
import openai
import os
import pandas as pd
import re
import requests
import time
import warnings

from google.colab import drive
from irrCAC.raw import CAC
from sklearn.metrics import cohen_kappa_score

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.mode.copy_on_write = True

pd.set_option(
    'display.max_columns',
    None,
    )

pd.set_option(
    'display.max_rows',
    None,
    )

warnings.simplefilter(
    action = 'ignore',
    category = FutureWarning,
    )

#from langchain_community.llms import Ollama

**Set env variables**

In [None]:
os.environ['OPENAI_API_KEY'] = ' '
os.environ

**Directory structure**

In [None]:
mhp_subtle_discrimination/
└── CEAI_lunch_and_learn/
    ├── code
    ├── inputs
    ├── outputs
    └── temp

**Ollama**<br>
http://localhost:11434/

#### Google Colab

In [None]:
# mount gdrive

drive.mount(
    '/content/drive',
#    force_remount = True,
    )

In [None]:
# structure directories

%cd /content/drive/My Drive/Colab/mhp_subtle_discrimination

In [None]:
#%mkdir CEAI_lunch_and_learn
#%cd CEAI_lunch_and_learn

In [None]:
#%mkdir inputs outputs code temp

#### JupyterLab

In [None]:
# set wd

wd = ' '
os.chdir(wd)
%pwd

### Write
Defines qualitative.py module.
***

In [None]:
%cd CEAI_lunch_and_learn/code

#### _code_texts_deductively_llama_

In [None]:
%%writefile qualitative.py

import requests
import json
import pandas as pd

def code_texts_deductively_llama(df, alias, text_column, endpoint_url, prompt_template, model_name):
    """
    Classifies each row of 'text' column in provided df in accord with human-specified prompt,
    includes chain-of-thought reasoning, returning explanations for classification decision.

    Parameters:
    -----------
    df : pandas.DataFrame
        The DataFrame containing the text to classify.
    alias : str
        The alias (for brevity) of the qualitative code to be applied.
    text_column : str
        The column name in df containing the text to be analyzed.
    endpoint_url : str
        The URL where locally hosted Llama model runs.
    prompt_template : str
        The prompt text with a placeholder (e.g., '{text}') where the row's text will be inserted.
    model_name : str
        The model tasked with qualitative deductive coding.

    Returns:
    --------
    pandas.DataFrame
        The original DataFrame with two new columns: '{alias}_llm' (either "0" or "1")
        and '{alias}_expl' (the explanation).
    """

    # dynamically create {alias} column names

    label_column = f'{alias}_llm'
    explanation_column = f'{alias}_expl'

    # create empty tag ['*_llm'] and reasoning ['*_expl'] column

    df[label_column] = None
    df[explanation_column] = None

    for idx, row in df.iterrows():
        row_text = row[text_column]

        # replace '{text}' in prompt_template with df 'text' data

        prompt = prompt_template.format(text = row_text)

        # send request to local Llama endpoint.

        response = requests.post(
            endpoint_url,
            headers = {'Content-Type': 'application/json'},
            json = {
                'model': model_name,
                'prompt': prompt,
                'stream': False
                },
        )

        # print statements for debugging

        print(response.status_code)
        print(response.text)

        if response.status_code == 200:
            try:
                # parse top-level JSON

                result_json = response.json()

                # 'response' field contains JSON string

                raw_response_str = result_json.get('response', ' ')

                # extract only the JSON portion: identify first `{` and last `}` braces

                start_idx = raw_response_str.find("{")
                end_idx = raw_response_str.rfind("}") + 1

                if start_idx != -1 and end_idx != -1:

                # extract and parse JSON portion

                    valid_json_str = raw_response_str[start_idx:end_idx]
                    parsed_output = json.loads(valid_json_str)

                # extract tag and reasoning fields

                    label = parsed_output.get(label_column)
                    explanation = parsed_output.get(explanation_column)
                else:
                    print("No valid JSON found in response.")
                    label = None
                    explanation = None

            except (json.JSONDecodeError, KeyError, TypeError) as e:
                print("Parsing error:", e)
                label = None
                explanation = None

        else:
            label = None
            explanation = None

        # insert classification results into df

        df.at[idx, label_column] = label
        df.at[idx, explanation_column] = explanation

    return df

#### _code_instance_deductively_gpt_

In [None]:
%%writefile -a qualitative.py

import time
import openai

api_key = ' '
client = openai.OpenAI(api_key = api_key)

def code_instance_deductively_gpt(text, prompts):
    """
    Applies annotation decisions, based on multiple prompts, to a given text; provides rationale and explanation.
    Parameters:
    - text: The text to annotate.
    - prompts: A list of prompts to apply to the text.

    Returns:
    - result: The combined result from all prompts.
    """
    try:

        # concatenate prompts

        prompt_content = ' '.join(prompts)

        response = client.chat.completions.create(
            model = 'gpt-4o',
            temperature = 0.2,
            messages = [
                {
                    'role': 'system',
                    'content': prompt_content
                },
                {
                    'role': 'user',
                    'content': text
                }
            ]
        )

        # collect results

        result = ' '
        for choice in response.choices:
            result += choice.message.content

        print(f'{text}: {result}')
        return result
    except Exception as e:
        print(f'Exception: {e}')
        return 'error'

#### _code_texts_deductively_gpt_

In [None]:
%%writefile -a qualitative.py

def code_texts_deductively_gpt(df, prompts_per_code):
    """
    Applies code_instance_deductively_gpt for multiple codes to each row in dataframe 'df'.

    Parameters:
    - df: The dataframe containing texts to annotate.
    - prompts_per_code: A dictionary with tag names as keys and a list of prompts as values.

    Returns:
    - df: The updated dataframe with annotation results.
    """
    for index, row in df.iterrows():
        for tag, prompts in prompts_per_code.items():
            result = code_instance_deductively_gpt(row['text'], prompts)
            if result == 'error':
                continue

            # initialize variables for annotation outputs

            rationale, explanation = None, None

            if f'{tag}_1' in result:
                tag_value = 1

                # extract rationale

                rationale = result.split(f'{tag}_rationale:')[1].split(f'{tag}_explanation:')[0].strip() if f'{tag}_rationale:' in result else None

                # extract explanation

                explanation = result.split(f'{tag}_explanation:')[1].strip() if f'{tag}_explanation:' in result else None

            else:
                tag_value = 0

            # results to df

            df.at[index, f'{tag}_gpt'] = tag_value
            df.at[index, f'{tag}_rtnl_gpt'] = rationale
            df.at[index, f'{tag}_expl_gpt'] = explanation

            # impose delay between API calls

            time.sleep(1)

    return df

#### Import

In [None]:
from qualitative import(
    code_texts_deductively_llama,
    code_instance_deductively_gpt,
    code_texts_deductively_gpt,
)

In [None]:
%cd ../inputs

d = pd.read_excel(
    'd_cycle_3_sjs.xlsx', ### d_cycle_3_sjs - IAA comparison w/ GPT-4o
    index_col = [0],
    )

# replace ' ' w/ NaN

d[[
    'agnt',
    'afrm',
    'brdn',
    'fitt',
    'just',
    'prbl',
    'rbnd',
    'refl',
    ]] = d[[
        'agnt',
        'afrm',
        'brdn',
        'fitt',
        'just',
        'prbl',
        'rbnd',
        'refl',
        ]].replace(
            r'^\s*$',
            np.nan,
            regex = True,
            )

# replace NaN w/ 0

d[[
    'agnt',
    'afrm',
    'brdn',
    'fitt',
    'just',
    'prbl',
    'rbnd',
    'refl',
    ]] = d[[
        'agnt',
        'afrm',
        'brdn',
        'fitt',
        'just',
        'prbl',
        'rbnd',
        'refl',
        ]].apply(
            pd.to_numeric,
            downcast = 'integer',
            )

d.fillna(
    0,
    inplace = True,
    )

# texts: delete '<|PII|>' pseudoword

texts = [
    'text',
         ]

pseudoword_tokens = [
    #'<SPL>',
    '<|PII|>', ### 1/10: remove from 'rtnl' prior to training
    ]

for t in texts:
    d[t] = d[t].replace(
        pseudoword_tokens,
        ' ',
        regex = True,
        )

# rationales: replace NaN w/ '.'

rationales = [
    'rtnl',
    #'afrm_llm_rtnl',
    #'agnt_llm_rtnl',
    #'fitt_llm_rtnl',
    #'just_llm_rtnl',
    #'refl_llm_rtnl',
    'note',
              ]

for r in rationales:
    d[r] = d[r].astype(str)
    d[r] = d[r].str.replace(
        r'0',
        '.',
        regex = True,
        )

# inspect

d.info()
d.head(3)

### Code
Enables human-LLM deductive coding: human-specified per-tag prompts, JSON-.xlsx structured outputs.
***

#### Llama 3.2: local

**Reflect (alias: `refl`): prompt formulation**

In [None]:
role = '''
You are tasked with applying pre-defined qualitative codes to emails by mental health professionals (MHPs)
such as counselors, psychologists, and clinical social workers, replying to prospective clients.

You will be provided a definition, instructions, and key exemplars of text to guide your coding decisions.
'''

definition = '''
Definition of "Reflect": MHPs reflecting specific symptoms or concerns.
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Reflect," output refl_llm as "1".
- Otherwise, output refl_llm as "0".
- Also provide a short explanation in exactly two sentences, stored in refl_expl.

Please respond in valid JSON with keys "refl_llm" and "refl_expl" only.

Text:
{text}
'''

clarification = '''
- "Reflect" is limited to reflections of prospective clients’ symptoms (anxiety, depression, stress)
or clear synonyms thereof (“sadness”).
- "Reflect" does not apply to reflections of clients' identities.
'''

    ### 1/10: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Reflect"

- "It's clear your anxiety is disrupting your life. Let's do what we can to begin your healing journey."

- "Depression can be a bear. So sorry to hear of your struggles."

- "We've seen a lot of stress like yours going around lately, but it can be dealt with <|PII|>."
'''

**Code deductively**

In [None]:
%%capture

# concatenate prompt as f-string

refl_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
#print(refl_prompt)

# locally hosted Llama endpoint

llama_endpoint = 'http://localhost:11434/api/generate'

# classify texts and update df

d = code_texts_deductively_llama(
    d,
    alias = 'refl',
    text_column = 'text',
    endpoint_url = llama_endpoint,
    prompt_template = refl_prompt,
    model_name = 'llama3',
)


#### GPT-4o: OpenAI API

**Role assignment**

In [None]:
role = '''
You are tasked with applying pre-defined qualitative codes to emails by mental health professionals (MHPs)
such as counselors, psychologists, and clinical social workers, replying to prospective clients.

You will be provided a definition, instructions, and key exemplars of text to guide your coding decisions.
'''

#### Affirm (alias: `afrm`): prompt formulation

In [None]:
definition = '''
Definition of "Affirm": Responding in a manner that explicitly reinforces prospective client’s decision to seek therapy.
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Affirm," respond with "afrm_1"
- Otherwise, respond with "0".
- You must choose a "afrm_1" or a "0" response.
- If your response is "afrm_1," then begin a new paragraph with "afrm"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "afrm_1" or a "0" begin a new paragraph with "afrm_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- Affirmation of treatment-seeking must be explicit
- Acknowledgements of the difficulty in seeking care fulfill the definition of "Affirm"
- Expressions of gratitude alone ("thank you for reaching out") do _not_ fulfill the definition of "Affirm"
- Ambiguous positive responses do _not_ fulfill the definition of "Affirm"
'''
    ### 1/20: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Affirm"

"It takes bravery to ask for help, and I'm glad you made the choice <|PII|>."

"How amazing to take this first step, it can be tough decision to make, but I believe it is often the right one.
'''

# concatenate prompt as f-string

afrm_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(afrm_prompt)

#### Agent (alias: `agnt`): prompt formulation

In [None]:
definition = '''
Definition of "Agent": Interfacing with a prospective client via an automatic message or non-MHP staffer such as a scheduler or office coordinator.
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Agent," respond with "agnt_1"
- Otherwise, respond with "0".
- You must choose a "agnt_1" or a "0" response.
- If your response is "agnt_1," then begin a new paragraph with "agnt"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "agnt_1" or a "0" begin a new paragraph with "agnt_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- Responses in which a clinician responds first, even to hand off to a scheduler, do _not_ fulfill the definition of "Agent."
'''
    ### 1/20: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Agent"

"Thank for choosing <|PII|> Counelling Center. We are eager to assist you. First, please reply to this message with your insurance details."

"We received your message regarding starting therapy with Dr. <|PII|>. She does currently have availability. We will need the folloing information."
'''

# concatenate prompt as f-string

agnt_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(agnt_prompt)

dmnd

In [None]:
# tk

#### Fit (alias: `fitt`): prompt formulation

In [None]:
definition = '''
Definition of "Fit": Explicitly mentioning the importance of client-MHP rapport, and/or alliance in a client-empowering manner.
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Fit," respond with "fitt_1"
- Otherwise, respond with "0".
- You must choose a "fitt_1" or a "0" response.
- If your response is "fitt_1," then begin a new paragraph with "fitt"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "fitt_1" or a "0" begin a new paragraph with "fitt_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- Characteristic terms may include "fit," "match," "compatibility."
'''
    ### 1/20: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Fit"

"I prefer to schedule a gratis 30-minute call to be sure we'll be a good fit for each other."

"In the initial session, we can discuss our compatability in working toward your therapy goals."

"Let's first set aside an introductory session to explore your needs and assess our fit."
'''

# concatenate prompt as f-string

fitt_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(fitt_prompt)

#### Friction (alias: `frtn`): prompt formulation

In [None]:
definition = '''
Definition of "Friction": MHPs interjecting and describing administrative burden as reality of provisioning social services
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Friction," respond with "frtn_1"
- Otherwise, respond with "0".
- You must choose a "frtn_1" or a "0" response.
- If your response is "frtn_1," then begin a new paragraph with "frtn"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "frtn_1" or a "0" begin a new paragraph with "frtn_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- Any response in which an MHP requires >2 additional steps from a client before therapy can begin fulfills the "Friction" definition
'''
    ### 1/20: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Friction"

"Thank you for contacting<|PII|> Services. Before we can arrange an appointment, please take a moment to create an account on our
scheduling portal, enter your address and telephone information, and provide valid insurance, including uploading a photo of
your insurance card."

'''

# concatenate prompt as f-string

frtn_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(frtn_prompt)

#### Justify (alias: `just`): prompt formulation

In [None]:
definition = '''
Definition of "Justify": Providing a logistical reason for being unable to meet with prospective clientas a personal/professional courtesy.
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Justify," respond with "just_1"
- Otherwise, respond with "0".
- You must choose a "just_1" or a "0" response.
- If your response is "just_1," then begin a new paragraph with "just"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "just_1" or a '0', begin a new paragraph with "just_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- Reasons may include full caseload, geographic bounds of licensure, imminent retirement.
- Rejections based on client identity do _not_ fulfill the definition of "Justify."
'''

    ### 1/10: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Justify"

"I would love to be able to help, but my caseload is currently full and I cannot take new clients."

"Unfortunately I am fully booked through the next few months, and unable to commit to new appointments."

'''

# concatenate prompt as f-string

just_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(just_prompt)

#### Rebound (alias: `rbnd`): prompt formulation

In [None]:
definition = '''
Definition of "Rebound": Multiple burdensome queries imposed on the prospective client in response to appointment-seeking inquiries
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Rebound," respond with "rbnd_1"
- Otherwise, respond with "0".
- You must choose a "rbnd_1" or a "0" response.
- If your response is "rbnd_1," then begin a new paragraph with "rbnd"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "rbnd_1" or a '0', begin a new paragraph with "rbnd_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- More than 2 consecutive questions directed at a client fulfill the definition of "Rebound."

- Consecutive questions may be embedded within a single sentence
'''

    ### 1/10: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Rebound"

"First, does your insurance reimburse formental health and if so whichinsuranceis it?"

"Are you able to meet in person? Will you require special accomdoations? When did your symptoms begin, and what event kicked them off?"

'''

# concatenate prompt as f-string

rbnd_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(rbnd_prompt)

#### Reflect (alias: `refl`): prompt formulation

In [None]:
definition = '''
Definition of "Reflect": MHPs describing their ability to aid specific symptoms or concerns raised by the client.
'''

instruction = '''
You will be provided with a piece of text. For each piece of text:
- If it meets the definition of "Reflect," respond with "refl_1"
- Otherwise, respond with "0".
- You must choose a "refl_1" or a "0" response.
- If your response is "refl_1," then begin a new paragraph with "refl"_rationale:" and excerpt the sentences or
phrases that determined your decision. You are allowed to choose multiple sentences or phrases, divided by an
"<|SPL|>" token.
- Then, whether you have selected a "refl_1" or a '0', begin a new paragraph with "refl_explanation:" and provide
a two sentence explanation for your response.
'''

clarification = '''
- "Reflect" is limited to reflections of prospective clients’ symptoms (anxiety, depression, stress)
or clear synonyms thereof ("sadness").
- "Reflect" does not apply to reflections of clients' identities.
'''

    ### 1/10: examples below all human-created sham; updates tktk

examples = '''
Below are human-validated examples of "Reflect"

- "It's clear your anxiety is disrupting your life. Let's do what we can to begin your healing journey."

- "Depression can be a bear. So sorry to hear of your struggles."

- "We've seen a lot of stress like yours going around lately, but it can be dealt with <|PII|>."
'''

# concatenate prompt as f-string

refl_prompt = f'{role}{definition}{instruction}{clarification}{examples}'
print(refl_prompt)

**Code deductively**

In [None]:
#%%capture

# define prompts per code

prompts_per_code = {
  'afrm': [afrm_prompt],
  'agnt': [agnt_prompt],
  'fitt': [fitt_prompt],
  #'frtn': [frtn_prompt],
  'just': [just_prompt],
  'rbnd': [rbnd_prompt],
  'refl': [refl_prompt],
  }

# annotate df

d = code_texts_deductively_gpt(
  d,
  prompts_per_code,
  )

In [None]:
# inspect

#print(d)
d.head(10)

In [None]:
# export

#%cd ../outputs

#d.to_excel('d_cycle_3_sjs_gpt.xlsx')

### 4. Fidelity
Calculates inter-coder reliability scores over indepednent coding applications, dummy codes disagreements for deliberation.
***

#### Compute Cohen's $\kappa$

In [None]:
%cd ../outputs

#d = pd.read_excel(
#    'd_cycle_3_sjs_gpt.xlsx',
#    index_col = [0],
#    )

#print(d.columns)

# drop NaN

d = d.dropna(subset = [
    'afrm_gpt',
    'agnt_gpt',
    'fitt_gpt',
    #'frtn_gpt',
    'just_gpt',
    'rbnd_gpt',
    'refl_gpt',
    ]
             )

# inspect

d.info()
d.head(3)

In [None]:
# define kappa fx

def calculate_kappa(d, col1, col2):
    return cohen_kappa_score(d[col1], d[col2])

col_pairs = [
    ('afrm', 'afrm_gpt'),
    ('agnt', 'agnt_gpt'),
    ('fitt', 'fitt_gpt'),
    #('brdn', 'frtn_gpt'),
    ('just', 'just_gpt'),
    ('rbnd', 'rbnd_gpt'),
    ('refl', 'refl_gpt'),
    ]

# initialize dict

kappa_results = {}

# % agreement loop

def calculate_percent_agreement(df, col_pairs):
    results = {}
    for col1, col2 in col_pairs:
        agreement = df[col1] == df[col2]
        percent_agreement = (agreement.sum() / len(df)) * 100
        results[f"{col1} & {col2}"] = percent_agreement
    return results

percent_agreement_results = calculate_percent_agreement(d, col_pairs)

for pair, percent in percent_agreement_results.items():
    print(f"Percent agreement for {pair}: {percent:.2f}%")

# kappa loop

for col1, col2 in col_pairs:
    kappa = calculate_kappa(d, col1, col2)
    kappa_results[f'{col1} and {col2}'] = kappa

for pair, kappa in kappa_results.items():
    print(f"Cohen's Kappa for {pair}: {kappa:.2f}")


#### Flag disagreements

In [None]:
# flag disagreements fx

def encode_disagreements(row):
    return 1 if row[0] != row[1] else 0

col_dis = [
    ('afrm', 'afrm_gpt', 'afrm_dis'),
    ('agnt', 'agnt_gpt', 'agnt_dis'),
    ('fitt', 'fitt_gpt', 'fitt_dis'),
    ('frtn', 'frtn_gpt', 'frtn_dis'),
    ('just', 'just_gpt', 'just_dis'),
    ('rbnd', 'rbnd_gpt', 'rbnd_dis'),
    ('refl', 'refl_gpt', 'refl_dis'),
  ]

for col1, col2, dis_col in col_dis:
    d[dis_col] = d[[col1, col2]].apply(
        encode_disagreements,
        axis = 1,
        )

# export

d.to_excel(f'd_pilot_coded_iaa.xlsx')

> End of mhp_llm_qualitative_share.ipynb