In [None]:
#! /usr/bin/env python3

#--python imports
import sys
sys.path.append('/opt/senzing/g2/python')
from G2Engine import G2Engine
from G2IniParams import G2IniParams
import G2Paths
import G2Exception
import json
import argparse
import os
import operator
module_name = 'pyG2EngineWhy'
senzing_directory = os.environ.get("SENZING_DIR", "/opt/senzing")
senzing_python_directory = "{0}/g2/python".format(senzing_directory)
g2module_ini_pathname = "{0}/G2Module.ini".format(senzing_python_directory)
verbose_logging = True
from G2Config import G2Config
from G2ConfigMgr import G2ConfigMgr

In [None]:
#helper functions
def getScoreBucketLevelForScoreBucket(bucketString):
    if bucketString == 'SAME':
        return 0
    if bucketString == 'CLOSE':
        return 1
    if bucketString == 'LIKELY':
        return 2
    if bucketString == 'PLAUSIBLE':
        return 3
    if bucketString == 'UNLIKELY':
        return 4
    if bucketString == 'NO_CHANCE':
        return 5
    return 6


In [None]:
# Initialize the engine
iniParams = "{\"PIPELINE\": {\"SUPPORTPATH\": \"/opt/senzing/g2/data\"},\"SQL\": {\"CONNECTION\": \"sqlite3://na:na@/opt/senzing/g2/sqldb/G2C.db\",\"RESOURCEPATH\": \"/opt/senzing/g2/python/g2config.json\"}}"
g2ConfigMgr=G2ConfigMgr()
g2ConfigMgr.initV2(module_name, iniParams, verbose_logging)
g2config=G2Config()
config_bytearray=bytearray("", 'utf-8')
g2config.initV2(module_name, iniParams, verbose_logging)
config=g2config.create()
g2config.save(config, config_bytearray)
configJsonToUse = config_bytearray.decode()
config_comment = "Configuration added from G2SetupConfig."
new_config_id = bytearray()
return_code = g2ConfigMgr.addConfig(configJsonToUse, config_comment, new_config_id)
g2ConfigMgr.setDefaultConfigID(new_config_id)
g2_engine = G2Engine()
g2_engine.initV2(module_name, iniParams, verbose_logging)
g2_engine.purgeRepository()

In [None]:
datasource_code = "TEST"
record_id = "1"
load_id = None
data = {"NAMES": [{"NAME_TYPE": "PRIMARY","NAME_LAST": "Bob","NAME_FIRST": "Max"}],"PHONE_NUMBER": "123-456-7899"}
data_string = json.dumps(data)
result = g2_engine.addRecord(datasource_code, record_id, data_string, load_id)
print(result)
datasource_code = "TEST"
record_id = "2"
load_id = None
data = {"NAMES": [{"NAME_TYPE": "PRIMARY","NAME_LAST": "Robart","NAME_FIRST": "Max", "MIDDLE_NAME": "S"}],"ADDR_FULL": "456 Funny ST"}
data_string = json.dumps(data)
result = g2_engine.addRecord(datasource_code, record_id, data_string, load_id)
print(result)
datasource_code = "TEST"
record_id = "3"
load_id = None
data = {"NAMES": [{"NAME_TYPE": "PRIMARY","NAME_LAST": "Robert","NAME_FIRST": "Max", "MIDDLE_NAME":"Stewart"}],"PHONE_NUMBER": "123-456-7899", "ADDR_FULL": "456 Funny st"}
data_string = json.dumps(data)
result = g2_engine.addRecord(datasource_code, record_id, data_string, load_id)
print(result)

In [None]:
# run the why function
record_id="2"
response = bytearray()
ret_code = g2_engine.whyEntityByRecordID(datasource_code,record_id,response)
whyResultData = json.loads(response.decode())

In [None]:
# get the main document data sections
whyResultSection = None
for i in range(len(whyResultData['WHY_RESULTS'])):
    for j in range(len(whyResultData['WHY_RESULTS'][i]['FOCUS_RECORDS'])):
        if whyResultData['WHY_RESULTS'][i]['FOCUS_RECORDS'][j]['DATA_SOURCE'] == datasource_code and whyResultData['WHY_RESULTS'][i]['FOCUS_RECORDS'][j]['RECORD_ID'] == record_id:
            whyResultSection = whyResultData['WHY_RESULTS'][i]
entityID = whyResultSection['ENTITY_ID']

In [None]:
# get the basic why results
whyKey = whyResultSection['MATCH_INFO']['WHY_KEY']
whyERRuleCode = whyResultSection['MATCH_INFO']['WHY_ERRULE_CODE']

In [None]:
# get the matching candidate keys for the record
recordCandidateKeyFeaturesByType = {}
for featureName in whyResultSection['MATCH_INFO']['CANDIDATE_KEYS']:
    for i in range(len(whyResultSection['MATCH_INFO']['CANDIDATE_KEYS'][featureName])):
        featureDesc = whyResultSection['MATCH_INFO']['CANDIDATE_KEYS'][featureName][i]['FEAT_DESC']
        if featureName not in recordCandidateKeyFeaturesByType:
            recordCandidateKeyFeaturesByType[featureName] = []
        recordCandidateKeyFeaturesByType[featureName].append(featureDesc)

In [None]:
# determine the best matches for each feature type
bestFeatureMatchByType = {}
bestBehaviorLevelByType = {}
for featureName in whyResultSection['MATCH_INFO']['FEATURE_SCORES']:
    for i in range(len(whyResultSection['MATCH_INFO']['FEATURE_SCORES'][featureName])):
        bucketString = whyResultSection['MATCH_INFO']['FEATURE_SCORES'][featureName][i]['SCORE_BUCKET']
        scoreBucketLevelForFeatureScore = getScoreBucketLevelForScoreBucket(bucketString)
        shouldIncludeComparison = False
        if featureName in bestBehaviorLevelByType:
            if scoreBucketLevelForFeatureScore < bestBehaviorLevelByType[featureName]:
               shouldIncludeComparison = True
        else:
            shouldIncludeComparison = True
        if shouldIncludeComparison == True:
            bestFeatureMatchByTypeEntry = {}
            bestFeatureMatchByTypeEntry['SCORE_BUCKET'] = bucketString
            bestFeatureMatchByTypeEntry['FEAT'] = whyResultSection['MATCH_INFO']['FEATURE_SCORES'][featureName][i]['INBOUND_FEAT']
            bestFeatureMatchByType[featureName] = bestFeatureMatchByTypeEntry
            bestBehaviorLevelByType[featureName] = scoreBucketLevelForFeatureScore

In [None]:
# print the general why-results
print('')
print('Displaying record results:')
print('')
print('Data source: \''+datasource_code+'\'')
print('Record ID: \''+record_id+'\'')
print('Entity ID: '+(str(entityID)))
print('Why key: \''+whyKey+'\'')
print('Why ER rule code: \''+whyERRuleCode+'\'')
print('')

In [None]:
# print the matching criteria
print('Compared Features:')
for featureName in sorted(bestFeatureMatchByType.keys()):
    featureDesc = bestFeatureMatchByType[featureName]['FEAT']
    scoreBucket = bestFeatureMatchByType[featureName]['SCORE_BUCKET']
    print('    '+featureName+': \'' + featureDesc + '\' ('+scoreBucket+')')
print('')

In [None]:
# print the candidate keys
print('Matching record candidate keys:')
for featureName in sorted(recordCandidateKeyFeaturesByType.keys()):
    print('    '+featureName+':')
    featList = []
    for i in range(len(recordCandidateKeyFeaturesByType[featureName])):
        featList.append(recordCandidateKeyFeaturesByType[featureName][i])
    featList.sort()
    for i in range(len(featList)):
        print('        \'' + featList[i]+'\'')
print('')

# shut down the engine
g2_engine.destroy()