# Medical Dictations: Natural Language Understanding

This script 
- applies IBM Watson Natural Language Understanding on the medical dictations that form the corpus for speech recognition.
- builds a graph of edges between attributes (from Watson NLU) and dictation instances
- identifies topics/archetypes through probabilistic topic modeling
- explores how the archetypal analysis can be used to improve the speech recognition dictation app 

## Set up: imports and hyperparameters

In [None]:
###############
## SET UP
###############

# API key is intered by the input command. Do NOT save the API-Key in the code. Don't edit the code below
if 'apikey' not in locals():
    apikey = input(prompt='API-Key? ( https://cloud.ibm.com/catalog/services/natural-language-understanding )')  

### SCRIPT INPUT HYPER-PARAMETERS 
# Change as desired when running the script. 
PATH = {}
PATH['data']    = '../data/Documents/'
PATH['results'] = './Watson-nlu-results/'

NLU = {}
NLU['url']            = 'https://gateway.watsonplatform.net/natural-language-understanding/api'
NLU['version']        = '2019-07-12'

##################
### IMPORT MODULES
##################
### Processing
import numpy as np
import scipy as sp
import pandas as pd
import networkx as nx
import math

### System
import sys
from os import listdir

### I/O
import json
import pickle

### Presentation
from matplotlib import pyplot as plt

### NLU
from ibm_watson import NaturalLanguageUnderstandingV1 as NaLaUn
from ibm_watson.natural_language_understanding_v1 import Features, CategoriesOptions,ConceptsOptions,EntitiesOptions,KeywordsOptions,RelationsOptions,SyntaxOptions
nlu = NaLaUn(version=NLU['version'] , iam_apikey = apikey, url = NLU['url'])  #Local Natural Language Understanding object

################
## PREPARE DATA 
################
filenames = listdir(PATH['data']) 
dictation_dic = {}            #dictionary for dictation files
for name in filenames:
    dictation_dic[name.replace('.txt','')] = open(PATH['data']+name).read()

def select_dictation(key):
    if type(key) is int:
        aa = list(dictation_dic.values())[key]
    else:
        aa = dictation_dic[key]
    return aa
dn = select_dictation           # dn <- Short name for select_dictation

## Perform Watson NLU analysis

- Analyzes each dictation text file
- Saves each analysis as a pickled file
- Keeps all analyzed file objects in a dictionary "dictation_analysis"
- Saves the dictionary as a pickled file "all_dictations_nlu.pkl"

In [25]:
###############################
## PERFORM WATSON NLU ANALYSIS
###############################
dictation_analysis = {}
dian = dictation_analysis

# features = Features(
#     categories=CategoriesOptions(limit=NLU['Max categories']),
#     concepts  =ConceptsOptions(limit=NLU['Max concepts'] )
# )
features = Features(
    categories= CategoriesOptions(limit=4),
    concepts  = ConceptsOptions(limit=20),
    entities  = EntitiesOptions(limit=20),
    keywords  = KeywordsOptions(limit=20),
    relations = RelationsOptions(),
    syntax    = SyntaxOptions()
)

for item in list(dictation_dic.items()):
    lbl  = item[0]
    text = item[1]
    dian[lbl] = nlu.analyze(text = text, features=features)
    f = open(PATH['results']+str(lbl)+'_nlu.pkl','wb')
    pickle.dump(dian[lbl],f)
    f.close()

f = open(PATH['results']+'all_dictations_nlu.pkl','wb')
pickle.dump(dian,f)
f.close()
    
    

## Build a graph of the dictations and their analyzed attributes
- builds a graph of edges between attributes (from Watson NLU) and dictation instances

In [45]:
gr = nx.Graph()

# Sandbox. Ignore - not part of the code

In [27]:
aaa = dian['3']

In [39]:
pd.DataFrame.from_dict(aaa.result['keywords'])

Unnamed: 0,count,relevance,text
0,1,0.632321,History of present illness
1,1,0.620887,oropharyngeal dysphagia
2,3,0.584741,end-stage
3,2,0.569271,62-year-old male
4,1,0.569141,physical exam
5,1,0.567809,Family history
6,1,0.56661,ferrous sulfate
7,1,0.563624,outside hospital
8,1,0.563095,nasogastric tube feeds
9,15,0.562367,patient


In [38]:
pd.DataFrame.from_dict(aaa.result['concepts'])

Unnamed: 0,dbpedia_resource,relevance,text
0,http://dbpedia.org/resource/Pulmonary_edema,0.976839,Pulmonary edema
1,http://dbpedia.org/resource/Gastroenterology,0.912084,Gastroenterology
2,http://dbpedia.org/resource/Percutaneous_endos...,0.879067,Percutaneous endoscopic gastrostomy
3,http://dbpedia.org/resource/Pulmonology,0.871367,Pulmonology
4,http://dbpedia.org/resource/Constipation,0.866553,Constipation
5,http://dbpedia.org/resource/Kidney,0.864334,Kidney
6,http://dbpedia.org/resource/Nephrology,0.802674,Nephrology
7,http://dbpedia.org/resource/Chronic_kidney_dis...,0.761871,Chronic kidney disease
8,http://dbpedia.org/resource/Renal_failure,0.72533,Renal failure
9,http://dbpedia.org/resource/Amputation,0.71498,Amputation


In [40]:
pd.DataFrame.from_dict(aaa.result['categories'])

Unnamed: 0,label,score
0,/health and fitness/disease,0.972914
1,/health and fitness/therapy,0.847317
2,/health and fitness/weight loss,0.841645
3,/health and fitness/disorders,0.795234


In [37]:
pd.DataFrame.from_dict(aaa.result['entities'])

Unnamed: 0,confidence,count,disambiguation,relevance,text,type
0,0.908503,3,,0.95276,stage renal disease,HealthCondition
1,0.96,2,,0.765695,62-year,Quantity
2,0.999991,3,"{'subtype': ['DiseaseOrMedicalCondition', 'Cau...",0.648505,cirrhosis,HealthCondition
3,0.276687,1,,0.495814,hypercapnic respiratory failure,HealthCondition
4,0.665347,1,"{'subtype': ['DiseaseOrMedicalCondition', 'Dis...",0.483777,pancytopenia,HealthCondition
5,0.529816,1,,0.458535,gastrointestinal malignancy,HealthCondition
6,0.8,1,,0.384852,100%,Quantity
7,0.8,1,,0.383476,3 liters,Quantity
8,0.619878,1,,0.372212,HEENT,Person
9,0.457756,1,,0.351234,JVD,Person


In [41]:
pd.DataFrame.from_dict(aaa.result['relations'])

Unnamed: 0,arguments,score,sentence,type
0,"[{'text': 'He', 'location': [163, 165], 'entit...",0.849376,He was admitted to outside hospital for worsen...,agentOf
1,"[{'text': 'Midodrine', 'location': [1378, 1387...",0.536084,"Medications, Procrit, Effexor, Lipitor, depake...",locatedAt
2,"[{'text': 'Atarax', 'location': [1389, 1395], ...",0.909557,"Medications, Procrit, Effexor, Lipitor, depake...",locatedAt
3,"[{'text': 'Patient', 'location': [1469, 1476],...",0.978107,Patient denies any fevers or chills.,agentOf
4,"[{'text': 'patient', 'location': [1594, 1601],...",0.464201,"On physical exam, patient has a blood pressure...",hasAttribute
5,"[{'text': 'hemoglobin', 'location': [2447, 245...",0.397797,"Laboratory, patient has white count of 4.5, he...",locatedAt
6,"[{'text': 'patient', 'location': [2415, 2422],...",0.542752,"Laboratory, patient has white count of 4.5, he...",hasAttribute
7,"[{'text': 'patient', 'location': [2626, 2633],...",0.456632,"Impression, recommendations, patient is a 62-y...",hasAttribute
8,"[{'text': 'patient', 'location': [3946, 3953],...",0.748083,ID is following patient {period} Regarding end...,hasAttribute
9,"[{'text': 'patient', 'location': [3289, 3296],...",0.361502,Presence of ascites would be developing contra...,partOfMany
