# Import Widget Libraries

In [1]:
%load_ext autoreload   
%autoreload 2
import kyurem
from kyurem.core.idom_loader import reload_bundle
from kyurem.core.widget import WidgetModel

kyurem_client: 0.0.0


# Import Relevant Data

## Import Graph

In [2]:
from kyurem import Service
service = Service(kh = 'indeed.v0.0.1:612e3403')

In [3]:
schema = service.get_kh_edge_list()

## Import Corpus Data

In [4]:
import pandas as pd
df = pd.read_csv("indeed_taxo_jd_indeed.v0.0.1_612e3403_map.csv")
data = df.to_dict('records')

In [5]:
service.load_corpus(data, concept='parent_title', context='context', highlight='child_title')

# ESE Seed Creation

## Exploration

In [6]:
from kyurem import ExplorerESE


def init(): 
    # Fetch initial view data
    return {
        "subgraph": None,
        "children": service.get_children_node_distributions(),
        "relations": service.get_relation_distribution(),
        "datatable": None,
        "highlight": None,
    }

def focus(node, panel):
    # node can be in one of two formats:
    # node : { "node_label": str, "node_property": str, "node_property_value": str }
    # node : { "node_label": str, "title": str, "uuid": str }
    
    # Since the service functions use the first format, node must first
    # be converted to the proper format
    if "title" in node:
        node = {
            "node_label": node["node_label"],
            "node_property": "title",
            "node_property_value": node["title"]
        }
        
    # TODO: Return nodes in a consistent format from the service
    #       functions to streamline the above
    
    
    # Fetch the neighborhood around the input node 
    result = service.get_node_neighborhood(node)
    datatable = service.get_annotated_corpus(node)
    
    data = { "subgraph": None }
    if panel != "schema":
        data["schema"] = result["schema"]
    if panel != "children":
        data["children"] = service.get_children_node_distributions(node)

    # Use a list comprehension to convert the return format of the relation_dist 
    # to a bar-chart format
    data["relations"] = [
        {"x": relation["label"], "y": relation["count"], "type": type}
        for type, relations in result["relation_dist"].items()
        for relation in relations
    ]
    # TODO: Move conversion code into service
    if datatable["rows"]:
        data["datatable"] = datatable["rows"]
        data["highlight"] = datatable["highlight"]
    else:
        data["datatable"] = None
        data["highlight"] = None
    return data

In [7]:
# For debugging
reload_bundle()
# Create widget 
explorer = ExplorerESE({
        "init": init,
        "focus": focus
    }, schema)

explorer.show()

component(10574aa60, self=<kyurem.widgets.StatefulWidgetBase.StatefulWidgetBase object at 0x10bcd95b0>)

LayoutWidget(Layout(component(10574aa60, self=<kyurem.widgets.StatefulWidgetBase.StatefulWidgetBase object at …

## Export Seed

In [8]:
explorer.export_selection()

[{'emphasis': 'yes',
  'label': 'isSpecializationOf',
  'source': {'node_label': 'attribute',
   'node_property': 'title',
   'node_property_value': 'Hoyer Lift'},
  'target': {'node_label': 'attribute',
   'node_property': 'title',
   'node_property_value': 'Caregiving Skills'},
  'weight': 1},
 {'emphasis': 'yes',
  'label': 'isSpecializationOf',
  'source': {'node_label': 'attribute',
   'node_property': 'title',
   'node_property_value': "Alzheimer's Care"},
  'target': {'node_label': 'attribute',
   'node_property': 'title',
   'node_property_value': 'Caregiving Skills'},
  'weight': 1},
 {'emphasis': 'yes',
  'label': 'isSpecializationOf',
  'source': {'node_label': 'attribute',
   'node_property': 'title',
   'node_property_value': 'Babysitting'},
  'target': {'node_label': 'attribute',
   'node_property': 'title',
   'node_property_value': 'Caregiving Skills'},
  'weight': 1},
 {'emphasis': 'yes',
  'label': 'isSpecializationOf',
  'source': {'node_label': 'attribute',
   'node

## Explore Provenance

In [9]:
prov = explorer.history()
prov.show()

component(10bc3c4c0, self=<kyurem.widgets.StatefulWidgetBase.StatefulWidgetBase object at 0x10bc8f730>)

LayoutWidget(Layout(component(10bc3c4c0, self=<kyurem.widgets.StatefulWidgetBase.StatefulWidgetBase object at …

# Merging Decision Making

## Load Merge Data

In [None]:
df2 = pd.read_csv("indeed.v0.0.1_612e3403_merged.csv")
merge_data = df2.to_dict('records')

service.load_merge_data(merge_data, entity='extraction', node_label='concept', node_uuid='extr_uuid', node_title='node_title')
mergedata = service.get_merge_data()["rows"]

In [None]:
decision_list = ["Accept", "Reject", "Defer"]

## Verify Merging Recommendations

In [None]:
from kyurem import MergeVerifier


def init(): 
    # Fetch initial view data
    return {
        "subgraph": None,
        "corpus": None,
        "mergedata":mergedata,
        "decisions":decision_list,
    }

def focus(row, panel):
    # node can be in the following format:
    # node : { "node_label": str, "node_property": str, "node_property_value": str }
    node = { "node_label": row["node_label"], "node_property": "title", "node_property_value": row["node"] }
    entity = { "node_label": row["node_label"], "node_property": "title", "node_property_value": row["entity"] }
    # Fetch the neighborhood around the input node 
    result = service.get_node_neighborhood(node)
    datatable = service.get_annotated_corpus(entity)
    
    data = {}
    data["subgraph"] = result["schema"]
    # TODO: Move conversion code into service
    if datatable["rows"]:
        data["corpus"] = datatable["rows"]
        data["highlight"] = datatable["highlight"]
    else:
        data["corpus"] = None
        data["highlight"] = None
    return data
  
# Create widget 
merge_verifier = MergeVerifier({
        "init": init,
        "focus": focus
    }, mergedata)

merge_verifier.show()

## Todo: Export Merge Data

In [None]:
merge_verifier.state.data.mergedata