# Generate PHQ9 Terms + UMLS Synonyms

In [1]:
import re
import mysql.connector


In [2]:
import string

class MetaNorm(object):
    """Normalize UMLS Metathesaurus concept strings."""
    def __init__(self, function=lambda x: x):
        # TTY in [OF,FN] suffixes
        suffixes = ['qualifier value', 'life style', 'cell structure', 'domestic', 'bird', 'organism',
                    'context\\-dependent category', 'inactive concept',
                    'navigational concept', 'lck', 'record artifact',
                    'core metadata concept', 'substance', 'event',
                    'organism', 'person', 'attribute', 'procedure',
                    'tumor staging', 'a', 'cell', 'chloroaniline',
                    'product', 'specimen', 'observable entity',
                    'racial group', 'si', 'namespace concept',
                    'environment', 'social concept', 'ras', 'unspecified',
                    'special concept', 'staging scale', 'disorder',
                    'geographic location', 'occupation', 'ethnic group',
                    'body structure', 'situation', 'physical force',
                    'trans', 'finding', 'epoxymethano', 'linkage concept',
                    'assessment scale', 'metadata', 'link assertion',
                    'dithiocarbamates', 'foundation metadata concept',
                    'morphologic abnormality', 'physical object']
        self.of_fn_rgx = "\(({})\)$".format("|".join(sorted(suffixes, key=len, reverse=1)))
        self.function = function

    def normalize(self, s):
        '''
        Heuristics for stripping non-essential UMLS string clutter

        :param s:
        :return:
        '''
        s = s.replace("--", " ")
        s = re.sub("[(\[<].+[>)\]]$", "", s)        
        s = re.sub("(\[brand name\]|[,]* NOS)+", "", s).strip()  
        s = s.strip().strip("_").strip(":")
        s = re.sub("(\[.{1}\])+", "", s).strip()
        s = re.sub("\-RETIRED\-$", "", s).strip()
        s = re.sub("BOLD[:].+$", "", s).strip()
        s = re.sub(" @ @ ", " ", s).strip()
        # normalize TTY in [OF,FN]
        s = re.sub(self.of_fn_rgx, "", s).strip()
        # remove digits/stray punctuation
        s = re.sub("^([0-9]+[{}]*)+$".format(string.punctuation), "", s).strip()
        # custom normalize function
        s = self.function(s)
        
        return s

In [3]:
# PHQ9 seed terms
terms = [
    'interest', 'pleasure', 
    'depressed', 'hopeless', 'feeling down', 'depressed', 'hopeless',
    'sleep', 'asleep', 'sleeping', 'sleepy',
    'tired', 'energy',
    'appetite', 'eating', 'overeating',
    'bad', 'fail', 'failure', 'let down',
    'concentration', 'concentrating',
    'slow', 'slowly', 'fidgety', 'restless', 'moving slow', 'slowly', 'speaking slow',
    'dead', 'hurt', 'hurting', 'suicide', 'hurt self', 'better off dead' 
]

In [4]:
import collections

query = ("SELECT MRCONSO.CUI, STR FROM MRCONSO, MRSTY "
         "WHERE STR like %s AND MRSTY.CUI=MRCONSO.CUI")

cnx = mysql.connector.connect(user='fries', database='2014AB')
cursor = cnx.cursor()

# map PHQ9 seed terms to UMLS unique concept identifiers
concepts = collections.defaultdict(set)
for t in terms:
    cursor.execute(query, (t,))
    for m in cursor:
        concepts[t].add(m[0])
                                   

## Build Synonyms

In [20]:
query = ("SELECT MRCONSO.CUI, STR, MRSTY.STY STR FROM MRCONSO, MRSTY "
         "WHERE MRCONSO.CUI=%s AND MRSTY.CUI=MRCONSO.CUI AND "
         "STY IN ('Finding', 'Mental or Behavioral Dysfunction', 'Qualitative Concept', 'Sign or Symptom', 'Mental Process')")

metanorm = MetaNorm()

termset = {}

for t in concepts:
    cuis = concepts[t]
    for c in cuis:
        cursor.execute(query, (c,))
        for m in cursor:
            #print(m)
            cui, s, sty = m
            s = metanorm.normalize(s).strip()
            if s:
                s = s.lower()
                termset[s] = (cui,s,sty)
           
     

In [21]:
for t in sorted(termset):
    row =  [t, termset[t][0],  termset[t][-1]]
    print("\t".join(row))

agitate	C0085631	Sign or Symptom
agitated	C0085631	Sign or Symptom
agitated - symptom	C0085631	Sign or Symptom
agitated behavior	C0085631	Sign or Symptom
agitated behaviour	C0085631	Sign or Symptom
agitates	C0085631	Sign or Symptom
agitation	C0085631	Sign or Symptom
agitation, psychomotor	C0085631	Sign or Symptom
asleep	C0424522	Finding
attention concentration	C0086045	Mental Process
bad	C0205169	Qualitative Concept
badly	C0205169	Qualitative Concept
cancer patients and depression and suicide	C0812393	Mental or Behavioral Dysfunction
cancer patients and suicide and depression	C0812393	Mental or Behavioral Dysfunction
cannot see a future	C0150041	Finding
concentration	C0086045	Mental Process
concentration, function	C0086045	Mental Process
concentrations	C0086045	Mental Process
consciousness decreased	C0549249	Finding
consciousness, level depressed	C0549249	Finding
dead	C1546956	Finding
death	C1306577	Finding
death adverse event not associated with more specific ctcae term	C1306577	Findi

In [23]:
for t in sorted(terms):
    print(t)

appetite
asleep
bad
better off dead
concentrating
concentration
dead
depressed
depressed
eating
energy
fail
failure
feeling down
fidgety
hopeless
hopeless
hurt
hurt self
hurting
interest
let down
moving slow
overeating
pleasure
restless
sleep
sleeping
sleepy
slow
slowly
slowly
speaking slow
suicide
tired
