In [1]:
import pandas as pd
import numpy as np 

### from spacy.tokenizer import Tokenizer
import spacy
from spacy.lang.en import English
from spacy_wordnet.wordnet_annotator import WordnetAnnotator 

In [2]:
sentences = pd.read_csv("data/stig_word_sents.csv")

In [3]:
"""
ChatGPT: Replace each of these stigmatizing words with their best replacement for in a clinical note: 'abuse', 'abuser', 
'abuses', 'abusing', 'addict', 'addicted',  'adherence', 'adherent', 'argumentative', 'belligerent', 'cheat',   
'combative', 'complience', 'compliant', 'control', 'contolled',  'controls', 'fail', 'failed', 'fails', 'failure',
'habit', 'narcotic', 'narcotics', 'nonadherence', 'nonadherent', 'refuse',  'refused', 'refuses', 'uncontrolled', '
unmotivated', 'unwilling',  'user'
"""

chat_gpt = {
    "abuse": "Misuse",
    "abuser": "Individual who misuses",
    "abuses": "Misuses",
    "abusing": "Misusing",
    "addict": "Person with substance use disorder",
    "addicted": "Experiencing substance use disorder",
    "adherence": "Medication/treatment compliance",
    "adherent": "Compliant with medication/treatment",
    "argumentative": "Expressing differing viewpoints",
    "belligerent": "Exhibiting aggressive behavior",
    "cheat": "Engage in non-compliant behavior",
    "combative": "Exhibiting confrontational behavior",
    "complience": "Medication/treatment adherence",
    "compliant": "Adheres to medication/treatment",
    "control": "Manage",
    "controlled": "Managed",
    "controls": "Manages",
    "fail": "Not succeed",
    "failed": "Did not succeed",
    "fails": "Does not succeed",
    "failure": "Lack of success",
    "habit": "Pattern",
    "narcotic": "Medication/analgesic (use specific drug names when appropriate)",
    "narcotics": "Medications/analgesics (use specific drug names when appropriate)",
    "nonadherence": "Lack of compliance",
    "nonadherent": "Noncompliant",
    "refuse": "Decline",
    "refused": "Declined",
    "refuses": "Declines",
    "uncontrolled": "Not managed",
    "unmotivated": "Lacking motivation",
    "unwilling": "Reluctant",
    "user": "Individual with substance use history"
}

In [4]:
# !python -m spacy download en_core_web_lg

In [5]:
### from spacy.tokenizer import Tokenizer
import spacy
from spacy.lang.en import English
from spacy_wordnet.wordnet_annotator import WordnetAnnotator 


nlp = spacy.load("en_core_web_lg")
nlp.add_pipe("spacy_wordnet", after='tagger')

<spacy_wordnet.wordnet_annotator.WordnetAnnotator at 0x7fc6481b5430>

In [6]:
replacements = []
chatgpt_replacements = []
for i, row in sentences.iterrows():
    sentence = nlp(row.SENTENCE)
    enriched_sentence = []
    for token in sentence:
        # We get those synsets within the desired domains
        if token.text ==  row.STIG_WORD:
            synsets = token._.wordnet.synsets()
            # synsets = token._.wordnet.wordnet_synsets_for_domain(["medicine"])

            if not synsets:
                best_match_word = token.text
            else:
                lemmas_for_synset = [lemma for s in synsets for lemma in s.lemma_names() if lemma != token.text]
                # If we found a synset in the economy domains
                # we get the variants and add them to the enriched sentence
                best_match_score = -1
                best_match_word = token.text
                for lemma in lemmas_for_synset:
                    lemma = nlp(lemma)
                    if lemma.vector_norm == 0:
                        continue
                    lemma_sim = token.similarity(nlp(lemma))
                    if  lemma_sim > best_match_score:
                        best_match_score = lemma_sim
                        best_match_word = lemma
                            
                enriched_sentence.append(f'[{best_match_word}]')
        else:
            enriched_sentence.append(token.text)
    
    replacements.append(best_match_word)
    chatgpt_replacements.append(chat_gpt[row.STIG_WORD])

In [7]:
sentences["REPLACEMENT"] = replacements
sentences["CHAT_GPT_REPLACEMENT"] = chatgpt_replacements

In [8]:
sentences

Unnamed: 0,SENTENCE,STIG_WORD,REPLACEMENT,CHAT_GPT_REPLACEMENT
0,Past Medical History:\nOsteoporosis\nHypertens...,abuse,(misuse),Misuse
1,Alcohol abuse\n Assessment:\n Patient stat...,abuse,(misuse),Misuse
2,"Has 18yo son, location unknown to her; states ...",abuse,(misuse),Misuse
3,[**Hospital 2**] MEDICAL CONDITION:\n 53 y/o ...,abuse,(misuse),Misuse
4,He reports that pt has no psychiatric or subst...,abuse,(misuse),Misuse
...,...,...,...,...
1229,"Per OMR note, pt's dtr has reported pt to\n ...",user,(exploiter),Individual with substance use history
1230,Habits:\n- smokes cigarettes\n- substance and ...,user,(exploiter),Individual with substance use history
1231,"propafol at 100 mgm hr, fentanyl 25 mcq adjust...",user,(exploiter),Individual with substance use history
1232,Pt also chronic pain med user who is currently...,user,(exploiter),Individual with substance use history


In [9]:
sentences.to_csv("stig_word_sent_replacement_wordvec_chatgpt.csv", index=False)