# G2Engine Guide - Why Entities

More information:

1. [GitHub repository](https://github.com/Senzing/docker-jupyter)
1. [Senzing documentation](http://docs.senzing.com/?python#g2config)
1. [G2Engine Reference](senzing-G2Engine-reference.ipynb)

## Prepare environment 

### Initialize Senzing configuration

Run [senzing-G2ConfigMgr-reference.ipynb](senzing-G2ConfigMgr-reference.ipynb)
to install a Senzing Engine configuration in the database.

### Initialize python environment

In [1]:
import argparse
import json
import operator
import os
import sys

# For RenderJSON

import uuid
from IPython.display import display_javascript, display_html, display

### Helper class for JSON rendering

A class for pretty-printing JSON.
Not required by Senzing, 
but helps visualize JSON.

In [2]:
class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        elif isinstance(json_data, bytearray):
            self.json_str = json_data.decode()
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())

    def _ipython_display_(self):
        display_html('<div id="{}" style="height:100%; width:100%; background-color: LightCyan"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)

### Utility functions

In [3]:
def calculate_score_bucket_level(bucketString = None):
    score_bucket = {
        'SAME': 0,
        'CLOSE': 1,
        'LIKELY': 2,
        'PLAUSIBLE': 3, 
        'UNLIKELY': 4,
        'NO_CHANCE': 5,
    }
    return score_bucket.get(bucketString, 6)

### System path

Update system path.

In [4]:
python_path = "{0}/python".format(
    os.environ.get("SENZING_G2_DIR", "/opt/senzing/g2"))
sys.path.append(python_path)

### Initialize variables

Create variables used for G2Engine.

In [5]:
%run senzing-init-config.ipynb

Stored 'senzing_config_json' (str)
Default config already set


In [6]:
%store -r senzing_confifg_json

no stored variable or alias senzing_confifg_json


In [7]:
RenderJSON(senzing_config_json)

## G2Engine

In [8]:
import G2Exception
from G2Engine import G2Engine

### G2Engine initialization

Details at [G2Engine initialization](senzing-G2Engine-reference.ipynb#G2Engine-initialization).

In [9]:
g2_engine = G2Engine()

try:
    g2_engine.initV2(
        module_name,
        senzing_config_json,
        verbose_logging)

except G2Exception.G2ModuleGenericException as err:
    print(g2_engine.getLastException())

### Purge prior repository

Details at [G2Engine.purgeRepository](senzing-G2Engine-reference.ipynb#purgeRepository).

In [10]:
try:
    g2_engine.purgeRepository()

except G2Exception.G2ModuleGenericException as err:
    pring(g2_engine.getLastRepository())

### Insert records

Details at [G2Engine.addRecord](senzing-G2Engine-reference.ipynb#addRecord).

#### Insert record 1

In [11]:
record_id = "1"
data = {
    "NAMES": [
        {
            "NAME_TYPE": "PRIMARY",
            "NAME_LAST": "Bob",
            "NAME_FIRST": "Max"
        }
    ],
    "PHONE_NUMBER": "123-456-7899"
}
data_as_json = json.dumps(data)
datasource_code = "TEST"
load_id = None
try:
    g2_engine.addRecord(
        datasource_code,
        record_id,
        data_as_json,
        load_id)

except G2Exception.G2ModuleGenericException as err:
    pring(g2_engine.getLastException())

#### Insert record 2

In [12]:
record_id = "2"
data = {
    "NAMES": [
        {
            "NAME_TYPE": "PRIMARY",
            "NAME_LAST": "Robart",
            "NAME_FIRST": "Max",
            "MIDDLE_NAME": "S"
        }
    ],
    "ADDR_FULL": "456 Funny ST"
}
data_as_json = json.dumps(data)
datasource_code = "TEST"
load_id = None

try:
    g2_engine.addRecord(
        datasource_code,
        record_id,
        data_as_json,
        load_id)

except G2Exception.G2ModuleGenericException as err:
    print(g2_engine.getLastException())

#### Insert record 3

In [13]:
record_id = "3"
data = {
    "NAMES": [
        {
            "NAME_TYPE": "PRIMARY",
            "NAME_LAST": "Robert",
            "NAME_FIRST": "Max",
            "MIDDLE_NAME":"Stewart"
        }
    ],
    "PHONE_NUMBER": "123-456-7899",
    "ADDR_FULL": "456 Funny st"
}
data_as_json = json.dumps(data)
datasource_code = "TEST"
load_id = None

try:
    g2_engine.addRecord(
        datasource_code,
        record_id,
        data_as_json,
        load_id)

except G2Exception.G2ModuleGenericException as err:
    print(g2_engine.getLastException())

## Ask Why

Details at [G2Engine.whyEntityByRecordID](senzing-G2Engine-reference.ipynb#whyEntityByRecordID).

In [14]:
record_id = "2"
response_bytearray = bytearray()

try:
    g2_engine.whyEntityByRecordID(
        datasource_code,
        record_id,
        response_bytearray)

    why_entity_dictionary = json.loads(response_bytearray.decode())

except G2Exception.G2ModuleGenericException as err:
    print(g2_engine.getLastException())
RenderJSON(response_bytearray)

### Start constructing final report

In [15]:
final_result = {
    "Data_source": datasource_code,
    "Record_ID": record_id
}

### Get the main document data sections

In [17]:
for why_result in why_entity_dictionary.get("WHY_RESULTS", []):
    for focus_record in why_result.get("FOCUS_RECORDS", []):
        if focus_record.get("DATA_SOURCE") == final_result.get("Data_source") \
        and focus_record.get("RECORD_ID") == final_result.get("Record_ID"):
            break
     
final_result["Entity_ID"] = why_result['ENTITY_ID']
RenderJSON(why_result)

### Get the basic why results

In [None]:
match_info = why_result.get("MATCH_INFO", {})
final_result["Why_key"] = match_info.get("WHY_KEY")
final_result["Why_ER_rule_code"] = match_info.get("WHY_ERRULE_CODE")

### Get the matching candidate keys for the record

In [None]:
name_feature_dictionary = {}

for candidate_key, candidate_values in match_info.get("CANDIDATE_KEYS", []).items():
    if candidate_key not in name_feature_dictionary:
        name_feature_dictionary[candidate_key] = []    
    for candidate_value in candidate_values:
        name_feature_dictionary[candidate_key].append(candidate_value.get("FEAT_DESC")) 
        
RenderJSON(name_feature_dictionary)        

### Determine the best matches for each feature type

In [None]:
best_behavior_level_by_types = {}
best_feature_match_by_types = {}

for feature_score_key, feature_score_values in match_info.get("FEATURE_SCORES", {}).items():
    
    if feature_score_key not in best_feature_match_by_types:
        best_behavior_level_by_types[feature_score_key] = calculate_score_bucket_level()
        
    for feature_score_value in feature_score_values:
        score_bucket = feature_score_value.get("SCORE_BUCKET")
        score_bucket_level = calculate_score_bucket_level(score_bucket)
        
        if best_behavior_level_by_types[feature_score_key] >= score_bucket_level:
            best_behavior_level_by_types[feature_score_key] = score_bucket_level
            best_feature_match_by_types[feature_score_key] = {
                "SCORE_BUCKET": score_bucket,
                "FEAT": feature_score_value.get("INBOUND_FEAT")
            }

RenderJSON(best_feature_match_by_types)

### Print the general "Why" results

In [None]:
RenderJSON(final_result)        

### Print the matching criteria

In [None]:
print('Compared Features:')

for key, value in sorted(best_feature_match_by_types.items()):
    print("    {0}: '{1}' ({2})".format(key, value.get("FEAT"), value.get("SCORE_BUCKET")))

### Print the candidate keys

In [None]:
print('Matching record candidate keys:')

for key, values in sorted(name_feature_dictionary.items()):
    print("    {0}:".format(key))
    values.sort()
    for value in values:
        print("        '{0}'".format(value))

### Shut down the engine

Details at [G2Engine.destroy](senzing-G2Engine-reference.ipynb#destroy).

In [None]:
try:
    g2_engine.destroy()

except G2Exception.G2ModuleGenericException as err:
    print(g2_engine.getLastException)