In [None]:
import os
import spacy.displacy
import pandas as pd
import seaborn as sns
import json
import warnings
from IPython.core.display import display, HTML

In [None]:
DATA_ROOT = os.path.join(
    '..', 'input', 'nbme-score-clinical-patient-notes')

TRAIN_PATH = os.path.join(DATA_ROOT, 'train.csv')
TEST_PATH = os.path.join(DATA_ROOT, 'test.csv')
FEATURE_PATH = os.path.join(DATA_ROOT, 'features.csv')
PATIENT_NOTES_PATH = os.path.join(DATA_ROOT, 'patient_notes.csv')

train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)
features = pd.read_csv(FEATURE_PATH)
patient_notes = pd.read_csv(PATIENT_NOTES_PATH)


In [None]:
def annotate_sample(note_num):
    note_num = int(note_num)
    warnings.filterwarnings('ignore')
    patient_df = train[train["pn_num"] == note_num].copy()
    patient_df = patient_df.merge(features[['feature_num', 'feature_text']], on='feature_num')
    # WK: location should be a list of str, which some ";" should be handled and turned to ","
    patient_df["location"] = patient_df["location"].str.replace("'", '"').str.replace(';', '","').apply(json.loads)  # WK: list of str,    annotation = patient_df["feature_text"]
    annotation = patient_df["feature_text"]
    ents = []
    for idx, row in patient_df.iterrows():
        spans = row['location']
        label = row['feature_text']
        for span in spans:
            start_loc = span.split()[0]
            end_loc = span.split()[1]
            ents.append({
                'start': int(start_loc),
                'end': int(end_loc),
                'label': label
            })
    doc = {
        'text': patient_notes[patient_notes["pn_num"] == note_num]["pn_history"].iloc[0],
        "ents": ents
    }
    p1 = sns.color_palette('hls', annotation.nunique(), desat=1).as_hex()
    p2 = sns.color_palette('hls', annotation.nunique(), desat=0.5).as_hex()
    colors = {k: f"linear-gradient(90deg, {c1}, {c2})" for k, c1, c2 in zip(annotation.unique(), p1, p2)}
    options = {"colors": colors}
    spacy.displacy.render(doc, style="ent", options=options, manual=True, jupyter=True)


In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [None]:
case_numbers = list(map(str, patient_notes['case_num'].unique()))
case_num_selector = widgets.Dropdown(
    options=case_numbers,
    value=case_numbers[0],
    description='Case No:',
)

def update_note_selector():
    note_numbers = list(
        map(
            str, 
            train[train['case_num'] == int(case_num_selector.value)]['pn_num'].unique()))

    note_num_selector = widgets.Dropdown(
        options=note_numbers,
        value=note_numbers[0],
        description='Note No:',
    )

    return note_num_selector

note_num_selector = update_note_selector()

def on_case_no_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print("changed to %s" % change['new'])
        note_numbers = list(
            map(
                str, 
                train[train['case_num'] == int(case_num_selector.value)]['pn_num'].unique()))
        note_num_selector.options=note_numbers
        note_num_selector.value=note_numbers[0]


        
def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        print("changed to %s" % change['new'])

        

        
case_num_selector.observe(on_case_no_change)
note_num_selector.observe(on_change)

display(case_num_selector)
display(note_num_selector)

In [None]:
# Change the widget value above and re-run this cell to explore labels in the notes
annotate_sample(note_num_selector.value)

# Thanks for reading:)