# Generate PHQ9 Terms + UMLS Synonyms

In [4]:
import re
import mysql.connector


In [5]:
import string

class MetaNorm(object):
    """Normalize UMLS Metathesaurus concept strings."""
    def __init__(self, function=lambda x: x):
        # TTY in [OF,FN] suffixes
        suffixes = ['qualifier value', 'life style', 'cell structure', 'domestic', 'bird', 'organism',
                    'context\\-dependent category', 'inactive concept',
                    'navigational concept', 'lck', 'record artifact',
                    'core metadata concept', 'substance', 'event',
                    'organism', 'person', 'attribute', 'procedure',
                    'tumor staging', 'a', 'cell', 'chloroaniline',
                    'product', 'specimen', 'observable entity',
                    'racial group', 'si', 'namespace concept',
                    'environment', 'social concept', 'ras', 'unspecified',
                    'special concept', 'staging scale', 'disorder',
                    'geographic location', 'occupation', 'ethnic group',
                    'body structure', 'situation', 'physical force',
                    'trans', 'finding', 'epoxymethano', 'linkage concept',
                    'assessment scale', 'metadata', 'link assertion',
                    'dithiocarbamates', 'foundation metadata concept',
                    'morphologic abnormality', 'physical object']
        self.of_fn_rgx = "\(({})\)$".format("|".join(sorted(suffixes, key=len, reverse=1)))
        self.function = function

    def normalize(self, s):
        '''
        Heuristics for stripping non-essential UMLS string clutter

        :param s:
        :return:
        '''
        s = s.replace("--", " ")
        s = re.sub("[(\[<].+[>)\]]$", "", s)        
        s = re.sub("(\[brand name\]|[,]* NOS)+", "", s).strip()  
        s = s.strip().strip("_").strip(":")
        s = re.sub("(\[.{1}\])+", "", s).strip()
        s = re.sub("\-RETIRED\-$", "", s).strip()
        s = re.sub("BOLD[:].+$", "", s).strip()
        s = re.sub(" @ @ ", " ", s).strip()
        # normalize TTY in [OF,FN]
        s = re.sub(self.of_fn_rgx, "", s).strip()
        # remove digits/stray punctuation
        s = re.sub("^([0-9]+[{}]*)+$".format(string.punctuation), "", s).strip()
        # custom normalize function
        s = self.function(s)
        
        return s

In [6]:
# PHQ9 seed terms
queries = {
    1:['interest', 'pleasure'], 
    2:['depressed', 'hopeless', 'feeling down', 'depressed', 'hopeless'],
    3:['sleep', 'asleep', 'sleeping', 'sleepy'],
    3:['tired', 'energy'],
    5:['appetite', 'eating', 'overeating'],
    6:['bad', 'fail', 'failure', 'let down'],
    7:['concentration', 'concentrating'],
    8:['slow', 'slowly', 'fidgety', 'restless', 'moving slow', 'slowly', 'speaking slow'],
    9:['dead', 'hurt', 'hurting', 'suicide', 'hurt self', 'better off dead']
}

In [32]:
import collections

query = ("SELECT MRCONSO.CUI, STR FROM MRCONSO, MRSTY "
         "WHERE STR like %s AND MRSTY.CUI=MRCONSO.CUI")

cnx = mysql.connector.connect(user='fries', database='2014AB')
cursor = cnx.cursor()

# map PHQ9 seed terms to UMLS unique concept identifiers
concepts = {phq:set() for phq in queries}
for phq in queries:
    for t in queries[phq]:
        cursor.execute(query, (t,))
        for m in cursor:
            concepts[phq].add(m[0])
                                   

In [33]:
print(concepts)

{1: {'C0543488', 'C1610547', 'C1561518', 'C0679105'}, 2: {'C0344315', 'C0150041', 'C0549249'}, 3: {'C1442080', 'C1547025', 'C0424589', 'C0557875', 'C0542479'}, 5: {'C0013470', 'C0003618', 'C0020505'}, 6: {'C1366450', 'C0293686', 'C0231174', 'C0205169', 'C0231175', 'C0680095'}, 7: {'C0086045', 'C1446561'}, 8: {'C0439834', 'C0920289', 'C0424235', 'C0424230', 'C0085631'}, 9: {'C0011065', 'C1306577', 'C1546956', 'C0812393', 'C0038661'}}


## Build Synonyms

In [58]:
query = ("SELECT MRCONSO.CUI, STR, MRSTY.STY STR FROM MRCONSO, MRSTY "
         "WHERE MRCONSO.CUI=%s AND MRSTY.CUI=MRCONSO.CUI AND "
         "STY IN ('Finding', 'Mental or Behavioral Dysfunction', 'Qualitative Concept', 'Sign or Symptom', 'Mental Process')")

metanorm = MetaNorm()

def get_termset(cuis, cursor):
    terms = []
    for c in cuis:
        cursor.execute(query, (c,))
        for m in cursor:
            cui, s, sty = m
            s = metanorm.normalize(s).strip()
            if s:
                terms.append((cui, s.lower(), sty))
    return terms

termsets = {}
for phq in sorted(concepts):
    termsets[phq] = get_termset(concepts[phq], cursor)
      

In [60]:
for phq in termsets:
    terms = set([tuple([f'{phq}'] + list(term[0:2])) for term in termsets[phq]])
    for t in sorted(terms):
        print('\t'.join(t))


1	C0543488	interest
1	C0543488	interested
1	C0543488	interesting
1	C0543488	interests
1	C0679105	enjoyment
1	C0679105	pleasurable emotion
1	C0679105	pleasure
1	C0679105	pleasures
2	C0150041	cannot see a future
2	C0150041	feeling hopeless
2	C0150041	feeling of hopelessness
2	C0150041	feels there is no future
2	C0150041	future hope
2	C0150041	hopeless
2	C0150041	hopelessness
2	C0150041	loss of hope for the future
2	C0150041	negative about the future
2	C0150041	no hope for the future
2	C0344315	depressed
2	C0344315	depressed mood
2	C0344315	depressing
2	C0344315	depression
2	C0344315	depression moods
2	C0344315	feeling down
2	C0344315	feeling low
2	C0344315	low mood
2	C0344315	melancholic
2	C0344315	melancholy
2	C0344315	miserable
2	C0344315	morose mood
2	C0344315	morosity
2	C0549249	consciousness decreased
2	C0549249	consciousness, level depressed
2	C0549249	decreased consciousness
2	C0549249	depressed
2	C0549249	depressed level of consciousness
3	C0424589	energy
3	C0424589	vitality
3	C0