### example of automated gold-standard answer extraction

In [1]:
keywords_list = ['ivdu', ' idu', "ivda",
                 'iv drug use', 'intravenous drug use',
                 'iv drug abuse','intravenous drug abuse',
                 'injection drug use', 'injection drug abuse',
                 'inject drug', 'injected drug',
                 'drugs by injection', 'drug by injection',
                 'iv drug injector', 'intravenous drug injector',
                 'iv drug injectIon', 'intravenous drug injection',
                 'illicit iv drug', 'illicit intravenous drug',
                 'injection illicit drug use', 'pwid',
                 'iv drug paraphernalia', 'intravenous drug paraphernalia',
                 'suspect injecting', 'pwid', 'skin popping', 'muscle popping',
                 'iv heroin', 'iv meth', 'iv cocaine',
                 'iv methamphetamine', 'iv crack',
                 'inject heroin', 'inject meth', 'inject cocaine',
                 'inject methamphetamine', 'inject crack',
                 'injects heroin', 'injects meth', 'injects cocaine',
                 'injects methamphetamine', 'injects crack',
                 'injected heroin', 'injected meth', 'injected cocaine',
                 'injected methamphetamine', 'injected crack',
                 'intravenous heroin', 'intravenous meth', 'intravenous cocaine',
                 'intravenous methamphetamine', 'intravenous crack', 'speedball',
                 'track marks', 'shared needle', 'sharing needle', 'dirty needle',
                 'ssp ', 'ris4e', 'safe injection',
                 'community syringe services', 'clean syringe prog', 'safe syringe prog',
                 'counseled on safe injection', 'counseled on safer injection',
                 'syringe service', 'safer injection', 
                 ]

In [2]:
negation_patterns = ['denying', 'denies', 'denied', 'no', 'never']
temporal_patterns = ['past medical history','past medical hx','past medical h/o',
                      'remote history','remote hx','remote h/o',
                      'distant history', 'distant hx','distant h/o',
                      'prior history','prior hx','prior h/o',
                      'previous history','previous hx','previous h/o',
                      'former history','former hx','former h/o',
                      'active history', 'active hx','active h/o',
                      'current history','current hx','current h/o',
                      'recent history','recent hx','recent h/o',
                      'last history','last hx', 'last h/o',
                      'long history', 'long hx', 'long h/o',
                      'daily', 'occasional', 'regularly',
                      'often', 'sometimes', 'frequently', 
                      'past', 'remote', 'distant', 'prior',
                      'previous', 'former', 'active', 'current',
                      'recent', 'last', 'long', 'intermittent',
                      'hpi', 'history', 'hx', 'h/o', 'pmh']
add_temp_patterns  = ["year ago", "years ago", "yr ago", "yrs ago",
                      "month ago", "months ago", "mnth ago", "mnths ago", "mos ago",
                      "day ago", "days ago", "d ago",
                      'wk ago','wks ago']
trackmark_patterns = ['arm', 'arms', 'abnormal', 'multiple', 'many',
                     'several', 'healing', 'healed', 'old', 'diffuse',
                     'localized', 'visible', 'red', 'iv', 'fresh', 'dark',
                     'needle', 'notable']
sud_oud_patterns   = ['substance disorder', 'polysubstance use disorder', 
                      'substance abuse disorder', 'polysubstance abuse disorder',
                      'sud', 'psud', 'oud', 'polysubstance',
                      'opioid use disorder', 'opioid', 'opiate']

In [58]:
# note = <CLINICAL NOTE>
# note_id = <NOTE ID TO KEEP TRACK>

note = "<more info> pt has a h/o ivdu. pt injects iv heroin daily. has a remote history of iv cocaine, last used 10 years ago. <more info>"
note_id = "xxxxx"

import re

def get_sentences_with_kw(note, note_id, kw_list):
    
    sent_kw = []

    sents = note.split('.')

    for i, sent in enumerate(sents):
        if any(re.search(kw, sent) for kw in kw_list):
            sent_kw.append((note_id, sent))
            
    if sent_kw:
        return sent_kw
    

In [59]:
kw = "drug names"
kw_list = ['iv heroin', 'iv meth', 'iv cocaine',
           'iv methamphetamine', 'iv crack',
           'inject heroin', 'inject meth', 'inject cocaine',
           'inject methamphetamine', 'inject crack',
           'injects heroin', 'injects meth', 'injects cocaine',
           'injects methamphetamine', 'injects crack',
           'injected heroin', 'injected meth', 'injected cocaine',
           'injected methamphetamine', 'injected crack',
           'intravenous heroin', 'intravenous meth', 'intravenous cocaine',
           'intravenous methamphetamine', 'intravenous crack', 'speedball',
          ]
keyword_ans_dict = {kw: get_sentences_with_kw(note, note_id, kw_list)}
keyword_ans_dict

{'drug names': [('xxxxx', ' pt injects iv heroin daily'),
  ('xxxxx', ' has a remote history of iv cocaine, last used 10 years ago')]}

### example of parsing rules to clean the answers

In [9]:
dummy_ans_ls = ["""65y/o m w cardiac procedures, or recent surgical procedures, \
admits to drinking alcohol daily for the past 10 years, denies any history of ivdu""",
               """pt smokes cannabis, never ivdu""",
               """last ivdu was 10 days ago, snorts cocaine occasionally"""
               ]

In [19]:
ans_dict = {}
for a in dummy_ans_ls:
    if a not in ans_dict:
        for findw in negation_patterns:
            stind = a.find(f"{findw}")
            if stind != -1:
                for k in keywords_list:
                    enind = a[stind:].find(k.lower())
                    if enind != -1 and k.lower() in a[stind:stind+enind+len(k)].lower():
                        # finalAns = a[stind:stind+enind+len(k)]
                        finalAns = a[stind:]
                        if finalAns:
                            ans_dict[a] = finalAns
#                         print(a[stind:stind+enind+len(k)])
ans_dict

{'65y/o m w cardiac procedures, or recent surgical procedures, admits to drinking alcohol daily for the past 10 years, denies any history of ivdu': 'denies any history of ivdu',
 'pt smokes cannabis, never ivdu': 'never ivdu'}

In [20]:
for a in dummy_ans_ls:
    if a not in ans_dict:
        for findw in add_temp_patterns:
            enind  = a.find(findw)# + len(phrase)
            if enind != -1:
                for k in keywords_list:
                    if k.lower() in a[:enind + len(findw)].lower():
                        finalAns = a[:enind + len(findw)]
                        if finalAns:
                            ans_dict[a] = finalAns
ans_dict

{'65y/o m w cardiac procedures, or recent surgical procedures, admits to drinking alcohol daily for the past 10 years, denies any history of ivdu': 'denies any history of ivdu',
 'pt smokes cannabis, never ivdu': 'never ivdu',
 'last ivdu was 10 days ago, snorts cocaine occasionally': 'last ivdu was 10 days ago'}