In [1]:
# Libraries here
import pandas as pd
import numpy as np
import requests
import json

!pip install edist
import edist.sed as sed

Defaulting to user installation because normal site-packages is not writeable


In [2]:
insertion_cost = 1.
deletion_cost = 1.
leave_change = 1. 
default_cost = 100

In [3]:
intents = {}
intents["DEBUGGING"] = ["Is this the same outcome for similar instances?", "Is this instance a common occurrence?"]
intents["TRANSPARENCY"] = ["What is the impact of feature X on the outcome?","How does feature X impact the outcome?","What are the necessary features that guarantee this outcome?","Why does the AI system have given outcome A?","Which feature contributed to the current outcome?","How does the AI system respond to feature X?","What is the goal of the AI system?","What is the scope of the AI system capabilities?","What features does the AI system consider?","What are the important features for the AI system?", "What is the impact of feature X on the AI system?","How much evidence has been considered to build the AI system?", "How much evidence has been considered in the current outcome?","What are the possible outcomes of the AI system?","What features are used by the AI system?"] 
intents["PERFORMANCE"] = ["How confident is the AI system with the outcome?","Which instances get a similar outcome?","Which instances get outcome A?","What are the results when others use the AI System?","How accurate is the AI system?","How reliable is the AI system?","In what situations does the AI system make errors?","What are the limitations of the AI system?","In what situations is the AI system likely to be correct?"] 
intents["COMPLIANCY"] = ["How well does the AI system capture the real-world?","Why are instances A and B given different outcomes?"]
intents["COMPREHENSIBILITY"] = ["How to improve the AI system performance?","What does term X mean?","What is the overall logic of the AI system?","What kind of algorithm is used in the AI system?"]
intents["EFFECTIVENESS"] = ["What would be the outcome if features X is changed to value V?","What other instances would get the same outcome?","How does the AI system react if feature X is changed?","What is the impact of the current outcome?"] 
intents["ACTIONABILITY"] = ["What are the alternative scenarios available?","What type of instances would get a different outcome?","How can I change feature X to get the same outcome?","How to get a different outcome?","How to change the instance to get a different outcome?","How to change the instance to get outcome {outcome}?","Why does the AI system have given outcome A not B?","Which features need changed to get a different outcome?"] 

In [4]:
# API call to iSeeOntoAPI to get the most similar cases
def getCasesJson(treeId_paremeter,usecaseId_parameter,topK_paremeter):
    """
        Function to get the solutions for that case in json format
    """
    url = "https://api-dev.isee4xai.com/api/trees/cbr_retrieve"

    payload = json.dumps({
      "treeId": treeId_paremeter,
      "usecaseId": usecaseId_parameter,
      "topk": topK_paremeter
    })
    headers = {
      'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    #print(response.text)

    return json.loads(response.text)

### Translator Json-Graph

In [5]:
def print_node_instances(node_id, nodes_dict, node_list, id_list): 
    node = nodes_dict[node_id]
    node_instance = node['Instance']
    if node_instance is None:
        return None
    node_list.append(node_instance)
    id_list.append(node_id)

    if 'firstChild' in node:
        first_child_id = node['firstChild']['Id']
        print_node_instances(first_child_id, nodes_dict, node_list, id_list)
        next_child = node['firstChild'].get('Next')

        while next_child is not None:
            next_child_id = next_child['Id']
            print_node_instances(next_child_id, nodes_dict, node_list, id_list)
            next_child = next_child.get('Next')

    return node_list, id_list

In [6]:
def get_index(node_id, nodes_dict, id_list):
    node = nodes_dict[node_id]
    node_instance = node.get('Instance')
    node_index = id_list.index(node_id)
    node_index = node_index + 1

    return node_index, node_instance

In [7]:
def find_parent(node_id, node, parent_child_dict, id_list, nodes_dict):
    parent_index, parent_instance = get_index(node_id, nodes_dict, id_list)
    
    if 'firstChild' in node:
        first_child_id = node['firstChild']['Id']
        child_index, child_instance = get_index(first_child_id, nodes_dict, id_list)

        if parent_index not in parent_child_dict:
            parent_child_dict[parent_index] = []
        if child_index not in parent_child_dict[parent_index]:
            parent_child_dict[parent_index].append(child_index)
        
        next_child = node['firstChild'].get('Next')
        while next_child is not None:
            next_child_id = next_child['Id']
            child_index, child_instance = get_index(next_child_id, nodes_dict, id_list)
            if child_index not in parent_child_dict[parent_index]:
                parent_child_dict[parent_index].append(child_index)  # Add child index to the parent's list
            next_child = next_child.get('Next')

        return parent_instance

In [8]:
def create_parent_child_dict(nodes_dict, node_list, id_list): 
    parent_child_dict = {}   
    # root = node_list[0] #r 
    parent_child_dict[0] = [1]  # Add root node with index 0

    for i, (instance, node_id) in enumerate(zip(node_list[1:], id_list), start=1):
        node_index = i
        node_id =id_list[node_index-1]
        node = nodes_dict[node_id]
        find_parent(node_id, node, parent_child_dict, id_list, nodes_dict)
    
    return parent_child_dict

In [9]:
def build_adjacency_list(node_list, parent_child_dict): 
    adjacency_list = [[] for _ in range(len(node_list))]

    for node_index, node_instance in enumerate(node_list):
        if node_index in parent_child_dict:
            children = parent_child_dict[node_index]
            adjacency_list[node_index] = children

    return adjacency_list

In [10]:
# function to translate the case solution to graph structure 
# This function must work for all the cases and the query 
# TODO
def translateCasesFromJSONtoGraph(case):
    tree_dict, nodes_dict, parent_child_dict = {},{},{}
    node_list = ['r'] # Added 'r' as the default root node in the node list
    id_list =[] #List of node id's 


    for idx, obj in enumerate(case, start=1):
        trees = obj['data']['trees']
        # Get the 'nodes' from 'trees'
        for tree in trees:
            nodes = tree.get('nodes', {})
            nodes_dict.update(nodes)
            # Get the root node
            root_node_id = tree.get('root')    

        # Call the recursive function to print node instances
        node_list, id_list= print_node_instances(root_node_id, nodes_dict, node_list = ['r'], id_list =[])
        # Call the function to create the parent_child dictionary
        parent_child_dict = create_parent_child_dict(nodes_dict, node_list, id_list)
        # Build the adjacency list from the behavior tree
        adjacency_list = build_adjacency_list(node_list, parent_child_dict)

        tree_key = f'tree_{idx}'
        #   tree_dict[tree_key] = trees
        tree_dict[tree_key] = {
              'tree_json': trees,
              'tree_graph': {
                  'nodes': node_list,
                  'adj': adjacency_list
              }
        }

    return tree_dict


In [11]:
#json_text = json.loads(response.text)
#tree_dict = translateCasesFromJSONtoGraph(json_text)
#print(tree_dict)

### Getting the most similar BT

In [12]:
def tmp_explainers(x):
    if x in ["/Images/Anchors", "/Images/Counterfactuals", "/Tabular/ALE", "/Tabular/DisCERN"]:
        return True
    else:
        return False

In [13]:
def typeQuestion(question):
    question_type = [key for key in intents.keys() if question in intents[key]]
    print(question_type)
    if question_type == []: 
        print("That question is not in our catalog")
    else:
        return question_type[0]

In [14]:
# delta: custom node distance function
def semantic_delta(x, y):
    #df = getSimilarityTable()
    #print(df["/Images/Anchors"]["/Images/Counterfactuals"])

    if(x==y):
        ret = 0.
    elif(x!=None and y==None): #inserting
        #print("inserting")
        ret = insertion_cost
    elif(x==None and y!=None): #deleting
        #print("deleting")
        ret = deletion_cost
    elif(x=='r'or y=='r'):  #we assign an infinite cost when comparing a root node
        #print("root")
        ret = np.inf
    elif(x in ['f','t'] and y in ['f','t']): #if both nodes are either failer or succeeder, assign null cost
        #print("failer and succeeder")
        ret = 0.
    elif(x in ['s','p'] and y in['s','p']): #if both nodes are either sequence or priority, assign null cost
        #print("sequence and priority")
        ret = 0.
    elif(x in ['s','p'] or y in ['s','p']): #if one of the nodes is a sequence or priority, the other won't because of the previous rule
        #print("sequence or priority")
        ret = np.inf
    elif(x in ['f','t'] and y[0]=='/'):
        #print("cambiando explainer por failure or succeeder")
        ret = leave_change
    elif(x[0]=='/' and y in ['f','t']):
        #print("cambiando explainer por failure or succeeder")
        ret = leave_change
    # elif x in df.columns and y in df.columns: #Both explainers are in similarity table, DF MUST BE LOADED BEFOREHAND
        # ret = 1-df.loc[x][y]
    elif tmp_explainers(x) == False: # TODO
        ret = 1
    else: # here we have a question leave
        # if they are the same type
        if typeQuestion(x) == typeQuestion(y):
            ret = 0.75
        else: # if they are not the same type
            ret = 0.5
       
    #print('sem_delta: ',str(x)," , "+str(y)+ " = "+ str(ret) )   
    return ret

In [15]:
def bt_sequence(tree,node,adj_node,seq):
    seq.append(node)
    if adj_node: 
        for child in adj_node:
            bt_sequence(tree, tree["nodes"][child],tree["adj"][child],seq)

In [16]:
# Function to calculate the edit distance between two BTs, both of them have to have graph structure
# in this case is Levenshtein edit distance
def editDistFunc(q,c,delta):
    s1=[]
    bt_sequence(q,q["nodes"][0],q["adj"][0],s1)
    s2=[]
    bt_sequence(c,c["nodes"][0],c["adj"][0],s2)
    dist = sed.sed(s1,s2,delta)
    return dist

In [17]:
# Select the subtree from the query that we have to consider, given a specific condition node
# TODO

In [18]:
# Translate each BT (json format) to graph structure (list of nodes and adjacency list)
# Also, we should use another structure to save the correspondence between the json format
# and the graph format, so later, when we have to return the json format for the most similar BT
# we ca access that structure and not doing the translation again
# call to translateCases(case)

#treeId_parameter = "64b676baa737e466ce27f166" # query
#usecaseId_parameter = "6426a68da3402ba28c44a7c0" # case of the query



# json_text = getCasesJson(treeId_parameter,usecaseId_parameter,topK_parameter)

# tree_dict = translateCasesFromJSONtoGraph(json_text) # here we also have the query

# print(tree_dict)

In [19]:
# Adapt the similarity metric function between explainers
# Think of how to do it
# TODO
# FINISH EXPLAINER COMPARISON


In [29]:
# MAIN
def reuseFunctionality(queryJson, queryTree, queryCase, k_cases=5):
    
    # getting the cases to compare
    json_text = getCasesJson(queryTree,queryCase,k_cases)
    
    # saving only the cases that are not the query
    my_cases = []
    for tree in json_text:
        if tree != queryJson:
            my_cases.append(tree)

    # getting the graph format of the solutions (trees)
    tree_dict = translateCasesFromJSONtoGraph(my_cases)
    
    # this might change when we know how we are getting the query
    tree_query = translateCasesFromJSONtoGraph(queryJson)['tree_1']['tree_graph']
    
    # for every BT in the case base:
    #   compare the query with that BT (taking into account that the query is not the same to the case)
    solution = {}
    for bt in tree_dict:
        solution[bt] = editDistFunc(tree_query,tree_dict[bt]['tree_graph'],semantic_delta)    
    
    # Sort solution to get the BT with the lowest edit distance
    sorted_BTs = sorted(solution.items(), key=lambda x:x[1])
    
    # getting the most similar one and the graph format of that BT
    solution_graph_format = sorted_BTs[0][0]
    # From the structure above, we have to get the json format for that solution (if there is root, we have to remove the root)
    our_solution_json = tree_dict[solution_graph_format]['tree_json']
    
    return our_solution_json

In [30]:
## THIS CODE IS ONLY FOR TESTING, REMOVE LATER
# Load case base from json file
with open("apioutput.json", "r") as f:
    queryJson = json.load(f)

In [31]:
treeId_parameter = "64c28fa0905203cf45444936"
usecaseId_parameter = "64c28f69905203cf4544491c"
topK_parameter = 5 # number of similar cases we need for the query

# TODO
reuseFunctionality(queryJson, treeId_parameter, usecaseId_parameter, topK_parameter)
# I HAVE TO DEFINE queryjson

mi query
{'nodes': ['r', 'Sequence', 'Priority', 'User Question', '/Tabular/DeepSHAPGlobal'], 'adj': [[1], [2], [3, 4], [], []]}
{'nodes': ['r', 'Priority', 'Sequence', 'User Question', '/Tabular/LIME'], 'adj': [[1], [2], [3, 4], [], []]}
{'nodes': ['r', 'Priority', 'Sequence', 'User Question', '/Tabular/DisCERN'], 'adj': [[1], [2], [3, 4], [], []]}
{'nodes': ['r', 'Priority', 'Sequence', 'User Question', '/Tabular/DeepSHAPLocal'], 'adj': [[1], [2], [3, 4], [], []]}
{'nodes': ['r', 'Priority', 'Sequence', 'User Question', '/Tabular/DeepSHAPGlobal'], 'adj': [[1], [2], [3, 4], [], []]}
[('tree_4', 2.0), ('tree_1', 3.0), ('tree_2', 3.0), ('tree_3', 3.0)]
tree_4
[{'version': '0.1.0', 'scope': 'tree', 'id': '33def3ec-31a8-47c1-856c-7fd724718df2', 'Instance': 'Explanation Experience', 'description': '', 'root': '546f5cee-68b0-4b90-85be-786b9957d03a', 'properties': {}, 'nodes': {'5112868d-f790-4665-ab3e-18a36a857363': {'id': '5112868d-f790-4665-ab3e-18a36a857363', 'Concept': 'Sequence', 'Inst

In [32]:
# TODO
# DEFINE EXPLAINER SIMILARITIES
# GET THE SOLUTION without root 