<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
import math
import pandas as pd
import json

In [2]:
def get_children_in_depth(df, depth, key, children_in_depth_dict):
    if not children_in_depth_dict[key][depth]:
        return depth - 1

    children_from_current_depth_list = []
    for children in children_in_depth_dict[key][depth]:
        children_from_current_depth_list += list(df[df['Parent_Name'] == children]['Name'].values)

    children_in_depth_dict[key][depth + 1] = children_from_current_depth_list
    return get_children_in_depth(df, depth + 1, key, children_in_depth_dict)


def get_feature_difference_count(df, base_action_name, action_lists, rel_tol=0.001,
                                 state_col='Game_Features', exclude_features=None):

    base_game_feature = df[df['Name'] == base_action_name][state_col].values[0]
    base_game_feature = pd.json_normalize(json.loads(base_game_feature))

    game_feature_summary = {}
    for col_name in base_game_feature.columns:
        game_feature_summary[col_name] = {"higher": 0, "same": 0, "lower": 0}

    for action in action_lists:
        compare_game_feature = df[df['Name'] == action]['Game_Features'].values[0]
        compare_game_feature = pd.json_normalize(json.loads(compare_game_feature))

        for col_name in base_game_feature.columns:
            base_value = base_game_feature[col_name].values[0]
            compare_value = compare_game_feature[col_name].values[0]
            if math.isclose(base_value, compare_value, rel_tol=rel_tol):
                game_feature_summary[col_name]['same'] += 1
            elif compare_value > base_value:
                game_feature_summary[col_name]['higher'] += 1
            else:
                game_feature_summary[col_name]['lower'] += 1

    return game_feature_summary


def game_feature_difference_explanation(df):
    root_name = df[df['Parent_Name'] == 'None']['Name'][0]
    root_children = df[df['Parent_Name'] == root_name]['Name'].values

    children_in_depth_dict = {name: {0: [name]} for name in root_children}
    immediate_difference = {}
    game_feature_difference_in_depth_dict = {name: {} for name in root_children}

    for child in root_children:
        children_depth = get_children_in_depth(df, 0, child, children_in_depth_dict)

        immediate_difference[child] = get_feature_difference_count(df, root_name, [child])

        for depth in range(1, children_depth + 1):
            counts = get_feature_difference_count(df, root_name, children_in_depth_dict[child][depth])
            game_feature_difference_in_depth_dict[child][depth] = counts

    return immediate_difference, game_feature_difference_in_depth_dict


def generate_immediate_dataframe(immediate_difference):
    immediate_df = {"Name": [], "Change": [], "Feature_List": [], "Count": []}

    for node, summary in immediate_difference.items():
        name = [node] * 3
        feature_list = [[] for _ in range(3)]
        for feature, change_dict in summary.items():
            if change_dict['higher']:
                feature_list[0].append(feature)
            elif change_dict['same']:
                feature_list[1].append(feature)
            else:
                feature_list[2].append(feature)

        immediate_df['Name'] += name
        immediate_df['Change'] += ['Higher', 'Same', 'Lower']
        immediate_df['Feature_List'] += feature_list
        immediate_df['Count'] += [len(feature) for feature in feature_list]

    return pd.DataFrame.from_dict(immediate_df)


def get_root_children_list(df):
    root_name = df[df['Parent_Name'] == 'None']['Name'][0]
    return list(df[df['Parent_Name'] == root_name]['Name'].values)


def generate_game_feature_explanation_df(df):
    immediate, depth = game_feature_difference_explanation(df)

    table_df = {}
    feature_order = []

    for node, summary in immediate.items():
        if not feature_order:
            feature_order = list(summary.keys())
        change_dict_higher = table_df.setdefault((node, "Higher"), {})
        change_dict_same = table_df.setdefault((node, "Same"), {})
        change_dict_lower = table_df.setdefault((node, "Lower"), {})

        for feature, changes in summary.items():
            change_dict_higher[(feature, 'Immediate')] = f"{changes['higher'] / 1 * 100:.2f}%"
            change_dict_same[(feature, 'Immediate')] = f"{changes['same'] / 1 * 100:.2f}%"
            change_dict_lower[(feature, 'Immediate')] = f"{changes['lower'] / 1 * 100:.2f}%"

    for node, depth_summary in depth.items():
        for depth, summary in depth_summary.items():
            change_dict_higher = table_df.setdefault((node, "Higher"), {})
            change_dict_same = table_df.setdefault((node, "Same"), {})
            change_dict_lower = table_df.setdefault((node, "Lower"), {})

            for feature, changes in summary.items():
                count_sum = sum(changes.values())
                change_dict_higher[(feature, depth)] = f"{changes['higher'] / count_sum * 100:.2f}%"
                change_dict_same[(feature, depth)] = f"{changes['same'] / count_sum * 100:.2f}%"
                change_dict_lower[(feature, depth)] = f"{changes['lower'] / count_sum * 100:.2f}%"

    feature_order_dict = {name: idx for idx, name in enumerate(feature_order)}

    table_df = pd.DataFrame.from_dict(table_df)
    table_df.sort_index(inplace=True, key=lambda x: [i if type(i) == int else 0 for i in x], level=1)
    table_df.sort_index(inplace=True, key=lambda x: [feature_order_dict[i] for i in x], level=0, sort_remaining=False)
    table_df.index.names = ['Name', 'Depth']

    maximum_depth = table_df[table_df.index.get_level_values('Name') == feature_order[0]].index[-1][1]

    return table_df, maximum_depth

In [3]:
df = pd.read_csv("test_01/record_2.csv", sep='\t')

df.head()

Unnamed: 0,Depth,Name,Value,Visits,Parent_Name,Game_State,Game_Features,Game_State_Heuristic,Action_Name,Best_Action,Is_Terminal,Is_Win,Is_Lose
0,0,Node_1,1.01,849,,"{""Grid_0_0"":""."",""Grid_1_0"":""."",""Grid_2_0"":""."",...","{""SCORE"":0.0,""SCORE_ADV"":0.0,""ORDINAL"":0.5,""OU...",0.0,,"SetGridValueAction{gridBoard=5, x=0, y=5, valu...",0,0,0
1,1,Node_2,0.0,18,Node_1,"{""Grid_0_0"":""."",""Grid_1_0"":""."",""Grid_2_0"":""."",...","{""SCORE"":0.0,""SCORE_ADV"":0.0,""ORDINAL"":0.5,""OU...",0.0,"SetGridValueAction{gridBoard=5, x=5, y=7, valu...","SetGridValueAction{gridBoard=5, x=1, y=7, valu...",0,0,0
2,1,Node_3,-0.0294,17,Node_1,"{""Grid_0_0"":""."",""Grid_1_0"":""."",""Grid_2_0"":""."",...","{""SCORE"":0.0,""SCORE_ADV"":0.0,""ORDINAL"":0.5,""OU...",0.0,"SetGridValueAction{gridBoard=5, x=6, y=7, valu...","SetGridValueAction{gridBoard=5, x=4, y=7, valu...",0,0,0
3,1,Node_4,-0.0294,17,Node_1,"{""Grid_0_0"":""."",""Grid_1_0"":""."",""Grid_2_0"":""."",...","{""SCORE"":0.0,""SCORE_ADV"":0.0,""ORDINAL"":0.5,""OU...",0.0,"SetGridValueAction{gridBoard=5, x=7, y=7, valu...","SetGridValueAction{gridBoard=5, x=1, y=7, valu...",0,0,0
4,1,Node_5,-0.0294,17,Node_1,"{""Grid_0_0"":""."",""Grid_1_0"":""."",""Grid_2_0"":""."",...","{""SCORE"":0.0,""SCORE_ADV"":0.0,""ORDINAL"":0.5,""OU...",0.0,"SetGridValueAction{gridBoard=5, x=1, y=7, valu...","SetGridValueAction{gridBoard=5, x=0, y=4, valu...",0,0,0


In [4]:
table_df, maximun_depth = generate_game_feature_explanation_df(df)
table_df.to_dict()

{('Node_2', 'Higher'): {('SCORE', 'Immediate'): '0.00%',
  ('SCORE', 1): '0.00%',
  ('SCORE', 2): '0.00%',
  ('SCORE', 3): nan,
  ('SCORE_ADV', 'Immediate'): '0.00%',
  ('SCORE_ADV', 1): '0.00%',
  ('SCORE_ADV', 2): '0.00%',
  ('SCORE_ADV', 3): nan,
  ('ORDINAL', 'Immediate'): '0.00%',
  ('ORDINAL', 1): '0.00%',
  ('ORDINAL', 2): '0.00%',
  ('ORDINAL', 3): nan,
  ('OUR_TURN', 'Immediate'): '0.00%',
  ('OUR_TURN', 1): '0.00%',
  ('OUR_TURN', 2): '0.00%',
  ('OUR_TURN', 3): nan,
  ('HAS_WON', 'Immediate'): '0.00%',
  ('HAS_WON', 1): '0.00%',
  ('HAS_WON', 2): '0.00%',
  ('HAS_WON', 3): nan,
  ('FINAL_ORD', 'Immediate'): '0.00%',
  ('FINAL_ORD', 1): '0.00%',
  ('FINAL_ORD', 2): '0.00%',
  ('FINAL_ORD', 3): nan,
  ('ROUND', 'Immediate'): '100.00%',
  ('ROUND', 1): '100.00%',
  ('ROUND', 2): '100.00%',
  ('ROUND', 3): nan,
  ('One_Token', 'Immediate'): '100.00%',
  ('One_Token', 1): '78.57%',
  ('One_Token', 2): '66.67%',
  ('One_Token', 3): nan,
  ('Opponent_One_Token', 'Immediate'): '0.00

In [5]:
import sys
 
# setting path
sys.path.append('..')
 
# importing
import data_preprocessing

In [6]:
def get_children_in_depth(df, exclude_action_names=None):
    """
    Generate the dictionary about the related nodes in different depths for each root available actions
    :param df: MCTS data file
    :param exclude_action_names: the list of root action names that will be ignored
    :return: dictionary about the related nodes in different depths for each root available actions,
             minimum depth of available actions, maximum depth of available actions
    """
    def helper(current_depth, root_action):
        """
        Recursive function that helps to generate the list of children node for particular
        root action in different depth
        :param current_depth: current depth of root action
        :param root_action: the name of root action
        """
        if current_depth not in root_actions_depth_dict[root_action]:
            return

        # Generate the list of all available node's name from current depth
        children_actions_from_current_depth_list = []
        for children_action in root_actions_depth_dict[root_action][current_depth]:
            children_actions_from_current_depth_list += data_preprocessing.get_node_available_actions(df, children_action)

        # Add to dictionary if the list is not empty
        if len(children_actions_from_current_depth_list) != 0:
            root_actions_depth_dict[root_action][current_depth + 1] = children_actions_from_current_depth_list

        # Go to next depth level
        return helper(current_depth + 1, root_action)

    root_actions = data_preprocessing.get_root_available_actions(df, exclude_action_names)
    root_actions_depth_dict = {action: {0: [action]} for action in root_actions}

    max_depth = float('-inf')
    min_depth = float('inf')

    # Generate the depth dictionary for different root actions and
    # record the minimum and maximum depth for the MCTS tree
    for action in root_actions:
        helper(0, action)
        action_max_depth = max(root_actions_depth_dict[action])
        max_depth = max(max_depth, action_max_depth)
        min_depth = min(min_depth, action_max_depth)

    return root_actions_depth_dict, min_depth, max_depth

In [7]:
tests = ["test_01/record_0.csv", "test_01/record_3.csv", "../DotsAndBoxes/record_10.csv", 
         "../Connect4/record_10.csv", "../test_data/MCTS_test_12.csv"]

for test in tests:
    test_df = pd.read_csv(test, sep="\t")
    get_children_in_depth(test_df)

In [8]:
root_actions_depth_dict, min_depth, max_depth = get_children_in_depth(df)

root_actions_depth_dict

{'Node_2': {0: ['Node_2'],
  1: ['Node_10',
   'Node_11',
   'Node_12',
   'Node_13',
   'Node_14',
   'Node_15',
   'Node_16',
   'Node_17',
   'Node_18',
   'Node_19',
   'Node_20',
   'Node_21',
   'Node_22',
   'Node_23'],
  2: ['Node_117', 'Node_118', 'Node_119']},
 'Node_3': {0: ['Node_3'],
  1: ['Node_24',
   'Node_25',
   'Node_26',
   'Node_27',
   'Node_28',
   'Node_29',
   'Node_30',
   'Node_31',
   'Node_32',
   'Node_33',
   'Node_34',
   'Node_35',
   'Node_36'],
  2: ['Node_120', 'Node_121', 'Node_122']},
 'Node_4': {0: ['Node_4'],
  1: ['Node_37',
   'Node_38',
   'Node_39',
   'Node_40',
   'Node_41',
   'Node_42',
   'Node_43',
   'Node_44',
   'Node_45',
   'Node_46',
   'Node_47',
   'Node_48',
   'Node_49'],
  2: ['Node_123', 'Node_124', 'Node_125']},
 'Node_5': {0: ['Node_5'],
  1: ['Node_50',
   'Node_51',
   'Node_52',
   'Node_53',
   'Node_54',
   'Node_55',
   'Node_56',
   'Node_57',
   'Node_58',
   'Node_59',
   'Node_60',
   'Node_61',
   'Node_62',
   

In [9]:
DIFFERENCE_TYPE = ['Higher', 'Same', 'Lower']


def get_feature_counts(df, root_features, node_list, exclude_features=None, feature_col='Game_Features', rel_tol=0.001):
    game_feature_summary = {feature: {difference_type: 0 for difference_type in DIFFERENCE_TYPE} for feature in
                            root_features}

    for node_name in node_list:
        node_features = data_preprocessing.get_features(df, node_name, exclude_features, feature_col)
        for feature in root_features:
            root_val = root_features[feature]
            node_val = node_features[feature]

            if math.isclose(root_val, node_val, rel_tol=rel_tol):
                game_feature_summary[feature]['Same'] += 1
            elif node_val > root_val:
                game_feature_summary[feature]['Higher'] += 1
            else:
                game_feature_summary[feature]['Lower'] += 1

    return game_feature_summary


def game_feature_difference_explanation(df, depth_type='max', exclude_action_nodes=None, exclude_features=None, 
                                        feature_col='Game_Features', rel_tol=0.001):
    
    root_actions_depth_dict, min_depth, max_depth = get_children_in_depth(df, exclude_action_nodes)
    
    if depth_type == 'max':
        maximum_depth = max_depth
    elif depth_type == 'min':
        maximum_depth = min_depth
    else:
        maximum_depth = (max_depth + min_depth) // 2
    
    root_name = data_preprocessing.get_root_node_name(df)
    root_features = data_preprocessing.get_features(df, root_name, exclude_features, feature_col)

    features_differences = {}

    for root_action, action_depth_dict in root_actions_depth_dict.items():
        root_action_name = data_preprocessing.node_name_to_action_name(df, root_action)
        # Create dictionaries for each different type
        for difference_type in DIFFERENCE_TYPE:
            features_differences[(root_action_name, difference_type)] = dict()

        # Loop through the different depth and summary the feature difference
        for depth, node_list in action_depth_dict.items():                
            if depth > maximum_depth:
                break    
                
            if depth == 0:
                depth = "Immediate"

            game_feature_summary = get_feature_counts(df, root_features, node_list, exclude_features, feature_col, rel_tol)

            for feature, summary in game_feature_summary.items():
                for difference_type, count in summary.items():
                    features_differences[(root_action_name, difference_type)][
                        (feature, depth)] = f"{count / len(node_list) * 100:.1f}%({count})"

        feature_order_dict = {name: idx for idx, name in enumerate(list(root_features))}

        feature_df = pd.DataFrame.from_dict(features_differences)
        feature_df.sort_index(inplace=True, key=lambda x: [i if type(i) == int else 0 for i in x], level=1)
        feature_df.sort_index(inplace=True, key=lambda x: [feature_order_dict[i] for i in x], level=0,
                              sort_remaining=False)
        feature_df.index.names = ['Feature', 'Depth']
        
    return feature_df

In [10]:
game_feature_difference_explanation(df, depth_type="average", exclude_action_names=["Node_3", "Node_5"], exclude_features=["OUR_TURN", "ORDINAL"])

Unnamed: 0_level_0,Unnamed: 1_level_0,"SetGridValueAction{gridBoard=5, x=5, y=7, value=x}","SetGridValueAction{gridBoard=5, x=5, y=7, value=x}","SetGridValueAction{gridBoard=5, x=5, y=7, value=x}","SetGridValueAction{gridBoard=5, x=7, y=7, value=x}","SetGridValueAction{gridBoard=5, x=7, y=7, value=x}","SetGridValueAction{gridBoard=5, x=7, y=7, value=x}","SetGridValueAction{gridBoard=5, x=0, y=5, value=x}","SetGridValueAction{gridBoard=5, x=0, y=5, value=x}","SetGridValueAction{gridBoard=5, x=0, y=5, value=x}","SetGridValueAction{gridBoard=5, x=2, y=6, value=x}","SetGridValueAction{gridBoard=5, x=2, y=6, value=x}","SetGridValueAction{gridBoard=5, x=2, y=6, value=x}","SetGridValueAction{gridBoard=5, x=3, y=6, value=x}","SetGridValueAction{gridBoard=5, x=3, y=6, value=x}","SetGridValueAction{gridBoard=5, x=3, y=6, value=x}","SetGridValueAction{gridBoard=5, x=4, y=7, value=x}","SetGridValueAction{gridBoard=5, x=4, y=7, value=x}","SetGridValueAction{gridBoard=5, x=4, y=7, value=x}"
Unnamed: 0_level_1,Unnamed: 1_level_1,Higher,Same,Lower,Higher,Same,Lower,Higher,Same,Lower,Higher,Same,Lower,Higher,Same,Lower,Higher,Same,Lower
Feature,Depth,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
SCORE,Immediate,0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0)
SCORE,1,0.0%(0),100.0%(14),0.0%(0),0.0%(0),92.3%(12),7.7%(1),6.2%(1),81.2%(13),12.5%(2),0.0%(0),91.7%(11),8.3%(1),0.0%(0),100.0%(12),0.0%(0),0.0%(0),100.0%(13),0.0%(0)
SCORE,2,0.0%(0),100.0%(3),0.0%(0),0.0%(0),100.0%(3),0.0%(0),6.0%(8),91.0%(122),3.0%(4),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(2),0.0%(0),0.0%(0),100.0%(4),0.0%(0)
SCORE_ADV,Immediate,0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0)
SCORE_ADV,1,0.0%(0),100.0%(14),0.0%(0),0.0%(0),92.3%(12),7.7%(1),6.2%(1),81.2%(13),12.5%(2),0.0%(0),91.7%(11),8.3%(1),0.0%(0),100.0%(12),0.0%(0),0.0%(0),100.0%(13),0.0%(0)
SCORE_ADV,2,0.0%(0),100.0%(3),0.0%(0),0.0%(0),100.0%(3),0.0%(0),6.0%(8),91.0%(122),3.0%(4),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(2),0.0%(0),0.0%(0),100.0%(4),0.0%(0)
HAS_WON,Immediate,0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0)
HAS_WON,1,0.0%(0),100.0%(14),0.0%(0),0.0%(0),100.0%(13),0.0%(0),6.2%(1),93.8%(15),0.0%(0),0.0%(0),100.0%(12),0.0%(0),0.0%(0),100.0%(12),0.0%(0),0.0%(0),100.0%(13),0.0%(0)
HAS_WON,2,0.0%(0),100.0%(3),0.0%(0),0.0%(0),100.0%(3),0.0%(0),6.0%(8),94.0%(126),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(2),0.0%(0),0.0%(0),100.0%(4),0.0%(0)
FINAL_ORD,Immediate,0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0),0.0%(0),100.0%(1),0.0%(0)


In [11]:
tests = ["test_01/record_0.csv", "test_01/record_3.csv", "../DotsAndBoxes/record_10.csv", 
         "../Connect4/record_10.csv", "../test_data/MCTS_test_12.csv"]

for test in tests:
    test_df = pd.read_csv(test, sep="\t")
    game_feature_difference_explanation(test_df)