## Using the NLP API in the notebook

In [5]:
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession
from google.auth import compute_engine
import json

PROJECT_ID="rh20-260602"
URL="https://healthcare.googleapis.com/v1/projects/"+PROJECT_ID+"/locations/us-central1/services/nlp:analyzeEntities"
NLP_SERVICE='projects/'+PROJECT_ID+'/locations/us-central1/services/nlp'

# Define the required scopes
scopes = [
    "https://www.googleapis.com/auth/cloud-platform"
]

# Authenticate a credential with the service account
credentials = compute_engine.Credentials()

# Use the credentials object to authenticate a Requests session.
authed_session = AuthorizedSession(credentials)

payload = {
    'nlpService': NLP_SERVICE,
    'documentContent': 'Insulin regimen human 5 units IV administered.'
}
# Use the credentials object to authenticate a Requests session.
url = URL
print(url)
print(payload)
headers = {"Accept": "application/json"}
response = authed_session.post(
    url,
    json = payload,
    headers=headers
)

data = response.json()

https://healthcare.googleapis.com/v1/projects/rh20-260602/locations/us-central1/services/nlp:analyzeEntities
{'nlpService': 'projects/rh20-260602/locations/us-central1/services/nlp', 'documentContent': 'Insulin regimen human 5 units IV administered.'}


## Printing the raw data. To better see the results, take this and use a json viewer such as 
http://jsonviewer.stack.hu/

In [6]:
print(json.dumps(data, indent = 4))

{
    "entityMentions": [
        {
            "mentionId": "1",
            "type": "MEDICINE",
            "text": {
                "content": "Insulin regimen",
                "beginOffset": 0
            },
            "linkedEntities": [
                {
                    "entityId": "UMLS/C0021641"
                },
                {
                    "entityId": "UMLS/C0795635"
                },
                {
                    "entityId": "UMLS/C1533581"
                },
                {
                    "entityId": "UMLS/C3537244"
                },
                {
                    "entityId": "UMLS/C3714501"
                }
            ],
            "temporalAssessment": {
                "value": "CURRENT",
                "confidence": 0.8573660850524902
            },
            "certaintyAssessment": {
                "value": "LIKELY",
                "confidence": 0.9751282930374146
            },
            "subject": {
                "v

In [17]:
import pandas as pd
column_names = ["Entity#", "Entity type", "Content", "Entity confidence","Temporal Assessment", "Temporal Confidence", "Liklihood", "Liklihood Confidence", "Subject", "Subject Confidence"]
dfentities = pd.DataFrame(columns = column_names)
new_row = {}

for entity in data['entityMentions']:
    new_row['Entity type'] = entity['type']
    new_row['Entity confidence'] = entity['confidence']
    new_row['Content'] = entity['text']['content']
    new_row['Entity#'] = entity['mentionId']

    if 'temporalAssessment' in entity:
        new_row['Temporal Assessment'] = entity['temporalAssessment']['value']
        new_row['Temporal Confidence'] = entity['temporalAssessment']['confidence']
        
    if 'certaintyAssessment' in entity:
        new_row['Liklihood'] = entity['certaintyAssessment']['value']
        new_row['Liklihood Confidence'] = entity['certaintyAssessment']['confidence']
  
    if 'subject' in entity:
        new_row['Subject'] = entity['subject']['value']
        new_row['Subject Confidence'] = entity['subject']['confidence']
  
    dfentities = dfentities.append(new_row, ignore_index=True) 
    
dfentities

Unnamed: 0,Entity#,Entity type,Content,Entity confidence,Temporal Assessment,Temporal Confidence,Liklihood,Liklihood Confidence,Subject,Subject Confidence
0,1,MEDICINE,Insulin regimen,0.637941,CURRENT,0.857366,LIKELY,0.975128,PATIENT,0.999579
1,2,MED_DOSE,5 units,0.744378,CURRENT,0.857366,LIKELY,0.975128,PATIENT,0.999579
2,3,MED_ROUTE,IV,0.779012,CURRENT,0.857366,LIKELY,0.975128,PATIENT,0.999579


## Let's summarize all the relationships of the "MEDICINE" entity.
I use the mentionId = 1 to parse the nlp outpuT and find relationships

In [26]:
def getEntity(id):
    for entity in data['entityMentions']:
        if entity['mentionId'] == id:
            return entity


In [33]:
import pandas as pd


column_names = ["Related Entity Type", "Related Entity Name", "Relationship confidence"]
dfrels = pd.DataFrame(columns = column_names)
new_row = {}
subjectEntityId = '1'

subjectEntity = getEntity(subjectEntityId)
print("SUBJECT Entity is - \n   {}\n   {}".format(subjectEntity['type'], subjectEntity['text']['content']))

for r in data['relationships']:
    #Only pick out the items where the subjectId == sourceEntityId
    if r['subjectId'] == subjectEntityId:
        #found a relationship, so find the object entity in the relationship
        relatedEntity = getEntity(r['objectId'])
        relatedEntitytype = relatedEntity['type']
        relatedEntityname = relatedEntity['text']['content']
        new_row["Related Entity Type"] = relatedEntity['type']
        new_row["Related Entity Name"] = relatedEntity['text']['content']
        new_row['Relationship confidence'] = r['confidence']
        dfrels = dfrels.append(new_row, ignore_index=True) 

print("\n\nThese are the relationships to the SUBJECT entity \n")

dfrels 


SUBJECT Entity is - 
   MEDICINE
   Insulin regimen


These are the relationships to the SUBJECT entity 



Unnamed: 0,Related Entity Type,Related Entity Name,Relationship confidence
0,MED_DOSE,5 units,0.999647
1,MED_ROUTE,IV,0.999567
