In [51]:
import os
import numpy as np
from collections import defaultdict
import copy

In [100]:
dataset_name = 'FB15k-237'
data_dirs = ['cpg_minerva', 'plain_minerva', 'cpg_conve', 'plain_conve']
metrics = ['hits_at_1', 'hits_at_3', 'hits_at_5', 'hits_at_10', 'mrr']

In [266]:
# generate the daset paths
def generate_data_paths(dataset_name, data_dirs, metrics, data_type='test'):
    dir_paths = defaultdict(lambda: [].copy())
    for data_dir in data_dirs:
        for metric_type in metrics:
            data_dir_path = os.path.join(os.getcwd(), 
                                         dataset_name, 
                                         data_dir, 
                                         '{}_relation_{}.txt'.format(data_type, metric_type))
            dir_paths[data_dir].append(data_dir_path)
    return dir_paths


def _write_data_to_file(file_path, data):
    if os.path.exists(file_path):
        append_write = 'a'
    else:
        append_write = 'w+'
    with open(file_path, append_write) as handle:
        handle.write(str(data) + "\n")
        

def correct_data_paths(data_dirs):
    for data_dir in data_dirs:
        data_corrected = []
        with open(data_dir, 'r') as handle:
            lines = handle.readlines()
            for idx in range(0, len(lines), 2):
                relation = lines[idx].strip()
                value = lines[idx+1].strip()
                data_corrected.append('{}\t{}'.format(relation, value))
        new_path = data_dir[:-4] + '_new.txt'
        with open(data_dir, 'w') as handle:
            for data in data_corrected:
                handle.write(data + '\n')
            _write_data_to_file(new_path, data)
                

def extract_relation_metrics(paths_dict):
    relation_metrics = defaultdict(lambda: copy.deepcopy({'hits_at_1': [0, 0], 
                                            'hits_at_3': [0, 0], 
                                            'hits_at_5': [0, 0], 
                                            'hits_at_10': [0, 0], 
                                            'mrr': [0, 0]}))
    model_metrics = defaultdict(lambda: relation_metrics.copy())
    
    for model_type in paths_dict.keys():
        for path in paths_dict[model_type]:
            if 'test_relation' in path:
                path_type = path.split('/')[-1][14:-4]
            else:
                path_type = path.split('/')[-1][15:-4]
            with open(path, 'r') as handle:
                for line in handle:
                    relation, metric, total_examples = line.strip().split('\t')
                    model_metrics[model_type][relation][path_type] = [float(metric), total_examples]
    return model_metrics


def cleanly_print_dict(current_object, padding='', precision=3):
    if  (not isinstance(list(current_object.values())[0], dict) and 
         not isinstance(list(current_object.values())[0], defaultdict)):
        for key, value in current_object.items():
            if isinstance(value, list):
                str_value = '{0:.{1}f}'.format(float(value[0]), precision)
                for val_ in value[1:]:
                    str_value += '\t{0}'.format(val_)
            else:
                try:
                    str_value = '{0:.{1}f}'.format(float(value[0]), precision)
                except:
                    str_value = str(value)
            line_to_print = padding + str(key) + '\t' + str_value
            print(line_to_print)
    else:
        for key, value in current_object.items():
            line_to_print = padding + str(key)
            print(line_to_print)
            cleanly_print_dict_(value, padding=padding+'\t', precision=precision)
        
        
def sample_from_dict(dictionary, k=10):
    sampled_dict = {}
    relations = dictionary.keys()
    if len(relations) < k:
        k = len(relations)
    sampled_relations = np.random.choice(list(relations), k)
    for sampled_relation in sampled_relations:
        sampled_dict[sampled_relation] = dictionary[sampled_relation]
    return sampled_dict


def compute_ratio(source, target):
    return (target - source) / max(source, 1.)


def compute_metric_differences(model_metrics, source_model, target_model):
    source_model_metrics = model_metrics[source_model]
    target_model_metrics = model_metrics[target_model]
    comparison_metrics = {}
    for relation in source_model_metrics.keys():
        source_relation_metrics = source_model_metrics[relation]
        target_relation_metrics = target_model_metrics[relation]
        comparison_metrics[relation] = {}
        for metric_name in source_relation_metrics.keys():
            source_relation_metric_val = source_relation_metrics[metric_name][0]
            target_relation_metric_val = target_relation_metrics[metric_name][0]
            if metric_name not in comparison_metrics[relation]:
                comparison_metrics[relation][metric_name] = [0, 0]
            comparison_metrics[relation][metric_name][0] = compute_ratio(source_relation_metric_val, 
                                                                         target_relation_metric_val)
            comparison_metrics[relation][metric_name][1] = target_relation_metrics[metric_name][1]
    return comparison_metrics
            

def reorder_metric_storage(metric_storage):
    reodered_storage = {}
    for relation, metrics in metric_storage.items():
        for metric_name, metric_value in metrics.items():
            if metric_name not in reodered_storage.keys():
                reodered_storage[metric_name] = {}
            reodered_storage[metric_name][relation] = metric_value
    return reodered_storage


def sort_dict_by_value(dictionary, decreasing=True, is_list=True):
    if is_list:
        return sorted(dictionary.items(), key=lambda kv: kv[1][0], reverse=decreasing)
    else:
        return sorted(dictionary.items(), key=lambda kv: kv[1], reverse=decreasing)


def get_metric_tails(metric2relation2value, tail_len=10):
    metric_tails_dict = {}
    for metric, relation_metric_values in metric2relation2value.items():
        metric_tails_dict[metric] = {}
        sorted_relation_values = sort_dict_by_value(relation_metric_values, decreasing=True)
        top_k = dict(sorted_relation_values[:tail_len])
        bottom_k = dict(sorted_relation_values[-tail_len:])
        metric_tails_dict[metric]['top_{}'.format(tail_len)] = top_k
        metric_tails_dict[metric]['bottom_{}'.format(tail_len)] = bottom_k
    return metric_tails_dict


def read_entities(dataset_name):
    path = os.path.join(os.getcwd(), dataset_name, 'entities.txt')
    entities = []
    with open(path, 'r') as handle:
        for line in handle:
            entity = line.strip()
            entities.append(entity)
    return entities

def append_data_to_cpg_conve(cpg_conve_path, plain_conve_path, new_path):
    with open(plain_conve_path, 'r') as handle:
        plain_conve_dict = {}
        for line in handle:
            rel, _, num_exp = line.strip().split('\t')
            plain_conve_dict[rel] = num_exp

    with open(cpg_conve_path, 'r') as handle:
        cpg_conve_dict = {}
        for line in handle:
            rel, _ = line.strip().split('\t')
            cpg_conve_dict[rel] = line.strip() + '\t' + plain_conve_dict[rel] + '\n'
    with open(new_path, 'w') as new_path_handle:
        for rel in cpg_conve_dict:
            new_path_handle.write(cpg_conve_dict[rel])
        

In [126]:
append_data_to_cpg_conve(metric_dir_paths['cpg_conve'][4], metric_dir_paths['plain_conve'][4],
'/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_mrr.txt')

In [125]:
print(metric_dir_paths['cpg_conve'][4])
print(metric_dir_paths['plain_conve'][4])

/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_mrr.txt
/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/plain_conve/test_relation_mrr.txt


In [102]:
metric_dir_paths = generate_data_paths(dataset_name, data_dirs, metrics, data_type='test')
metric_dir_paths

defaultdict(<function __main__.generate_data_paths.<locals>.<lambda>>,
            {'cpg_conve': ['/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_hits_at_1.txt',
              '/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_hits_at_3.txt',
              '/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_hits_at_5.txt',
              '/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_hits_at_10.txt',
              '/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_conve/test_relation_mrr.txt'],
             'cpg_minerva': ['/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_minerva/test_relation_hits_at_1.txt',
              '/Users/georgestoica/Desktop/Research/QA/qa_types/src/qa_cpg/temp/FB15k-237/cpg_minerva/test_relation_hits_at

In [103]:
# if metrics are messed up run this
# correct_data_paths(metric_dir_paths['cpg_conve'])

In [127]:
model_metrics = extract_relation_metrics(metric_dir_paths)

In [60]:
sampled_dictionary = sample_from_dict(model_metrics)

In [128]:
cleanly_print_dict(sampled_dictionary)

plain_minerva
	/tv/tv_producer/programs_produced./tv/tv_producer_term/producer_type
		hits_at_5	0.955 	 22.0
		hits_at_3	0.955 	 22.0
		hits_at_10	0.955 	 22.0
		hits_at_1	0.864 	 22.0
		mrr	0.902 	 22.0
	/education/educational_degree/people_with_this_degree./education/education/student
		hits_at_5	0.000 	 27.0
		hits_at_3	0.000 	 27.0
		hits_at_10	0.000 	 27.0
		hits_at_1	0.000 	 27.0
		mrr	0.000 	 27.0
	/base/x2010fifaworldcupsouthafrica/world_cup_squad/current_world_cup_squad./base/x2010fifaworldcupsouthafrica/current_world_cup_squad/current_club
		hits_at_5	0.034 	 29.0
		hits_at_3	0.034 	 29.0
		hits_at_10	0.069 	 29.0
		hits_at_1	0.000 	 29.0
		mrr	0.022 	 29.0
	/film/film/dubbing_performances./film/dubbing_performance/actor
		hits_at_5	0.000 	 8.0
		hits_at_3	0.000 	 8.0
		hits_at_10	0.250 	 8.0
		hits_at_1	0.000 	 8.0
		mrr	0.034 	 8.0
	/award/award_winning_work/awards_won./award/award_honor/award
		hits_at_5	0.127 	 118.0
		hits_at_3	0.110 	 118.0
		hits_at_10	0.203 	 118.0
		

		hits_at_5	0.448 	 29.0
		hits_at_3	0.414 	 29.0
		hits_at_10	0.448 	 29.0
		hits_at_1	0.207 	 29.0
		mrr	0.322 	 29.0
	/people/person/gender
		hits_at_5	0.961 	 436.0
		hits_at_3	0.959 	 436.0
		hits_at_10	0.966 	 436.0
		hits_at_1	0.883 	 436.0
		mrr	0.922 	 436.0
	/location/country/form_of_government
		hits_at_5	0.977 	 43.0
		hits_at_3	0.884 	 43.0
		hits_at_10	0.977 	 43.0
		hits_at_1	0.651 	 43.0
		mrr	0.778 	 43.0
	/food/food/nutrients./food/nutrition_fact/nutrient
		hits_at_5	0.667 	 105.0
		hits_at_3	0.610 	 105.0
		hits_at_10	0.838 	 105.0
		hits_at_1	0.248 	 105.0
		mrr	0.453 	 105.0
	/user/jg/default_domain/olympic_games/sports
		hits_at_5	0.727 	 11.0
		hits_at_3	0.455 	 11.0
		hits_at_10	0.818 	 11.0
		hits_at_1	0.182 	 11.0
		mrr	0.351 	 11.0
	/sports/sports_team/roster./basketball/basketball_roster_position/position
		hits_at_5	1.000 	 2.0
		hits_at_3	1.000 	 2.0
		hits_at_10	1.000 	 2.0
		hits_at_1	0.500 	 2.0
		mrr	0.667 	 2.0
	/location/country/official_language
		h

In [63]:
reoredered_model_metrics = reorder_metric_storage(model_metrics['cpg_minerva'])

In [64]:
tail_model_metrics = get_metric_tails(reoredered_model_metrics, tail_len=10)

In [65]:
cleanly_print_dict(tail_model_metrics)

hits_at_1
	top_10
		/base/biblioness/bibs_location/state	1.000 | 7.0
		/location/statistical_region/rent50_2./measurement_unit/dated_money_value/currency	1.000 | 46.0
		/tv/tv_producer/programs_produced./tv/tv_producer_term/producer_type	1.000 | 22.0
		/sports/sports_team/roster./baseball/baseball_roster_position/position	1.000 | 3.0
		/organization/non_profit_organization/registered_with./organization/non_profit_registration/registering_agency	1.000 | 22.0
		/business/business_operation/revenue./measurement_unit/dated_money_value/currency	1.000 | 1.0
		/location/hud_foreclosure_area/estimated_number_of_mortgages./measurement_unit/dated_integer/source	1.000 | 84.0
		/organization/endowed_organization/endowment./measurement_unit/dated_money_value/currency	1.000 | 3.0
		/education/university/local_tuition./measurement_unit/dated_money_value/currency	1.000 | 1.0
		/film/film/distributors./film/film_film_distributor_relationship/region	1.000 | 10.0
	bottom_10
		/base/popstra/location/vacat

In [66]:
cleanly_print_dict({'hits_at_1': tail_model_metrics['hits_at_1']})

hits_at_1
	top_10
		/base/biblioness/bibs_location/state	1.000 | 7.0
		/location/statistical_region/rent50_2./measurement_unit/dated_money_value/currency	1.000 | 46.0
		/tv/tv_producer/programs_produced./tv/tv_producer_term/producer_type	1.000 | 22.0
		/sports/sports_team/roster./baseball/baseball_roster_position/position	1.000 | 3.0
		/organization/non_profit_organization/registered_with./organization/non_profit_registration/registering_agency	1.000 | 22.0
		/business/business_operation/revenue./measurement_unit/dated_money_value/currency	1.000 | 1.0
		/location/hud_foreclosure_area/estimated_number_of_mortgages./measurement_unit/dated_integer/source	1.000 | 84.0
		/organization/endowed_organization/endowment./measurement_unit/dated_money_value/currency	1.000 | 3.0
		/education/university/local_tuition./measurement_unit/dated_money_value/currency	1.000 | 1.0
		/film/film/distributors./film/film_film_distributor_relationship/region	1.000 | 10.0
	bottom_10
		/base/popstra/location/vacat

In [67]:
list(model_metrics['cpg_minerva'].keys())

['/tv/tv_producer/programs_produced./tv/tv_producer_term/producer_type',
 '/education/educational_degree/people_with_this_degree./education/education/student',
 '/base/x2010fifaworldcupsouthafrica/world_cup_squad/current_world_cup_squad./base/x2010fifaworldcupsouthafrica/current_world_cup_squad/current_club',
 '/film/film/dubbing_performances./film/dubbing_performance/actor',
 '/award/award_winning_work/awards_won./award/award_honor/award',
 '/influence/influence_node/influenced_by',
 '/business/job_title/people_with_this_title./business/employment_tenure/company',
 '/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/service_language',
 '/people/person/place_of_birth',
 '/award/award_category/category_of',
 '/base/popstra/celebrity/canoodled./base/popstra/canoodled/participant',
 '/film/film/personal_appearances./film/personal_film_appearance/person',
 '/user/tsegaran/random/taxonomy_subject/entry./user/tsegaran/random/taxonomy_entry/taxonomy',
 '/fil

In [129]:
minerva_metric_diffs = compute_metric_differences(model_metrics, 'plain_minerva', 'cpg_minerva')

In [130]:
minerva_metrics_reordered = reorder_metric_storage(minerva_metric_diffs)

In [131]:
minerva_tail_model_metrics = get_metric_tails(minerva_metrics_reordered, tail_len=10)

In [132]:
cleanly_print_dict(minerva_tail_model_metrics['hits_at_10'])

top_10
	/government/politician/government_positions_held./government/government_position_held/legislative_sessions	0.500 	 30.0
	/film/film/film_art_direction_by	0.667 	 3.0
	/film/film/film_festivals	0.556 	 18.0
	/base/eating/practicer_of_diet/diet	1.000 	 11.0
	/film/person_or_entity_appearing_in_film/films./film/personal_film_appearance/type_of_appearance	1.000 	 3.0
	/tv/tv_producer/programs_produced./tv/tv_producer_term/program	0.500 	 4.0
	/travel/travel_destination/climate./travel/travel_destination_monthly_climate/month	0.533 	 60.0
	/sports/sports_team/roster./american_football/football_historical_roster_position/position_s	1.000 	 1.0
	/location/us_county/county_seat	0.500 	 2.0
	/award/award_winning_work/awards_won./award/award_honor/honored_for	0.700 	 10.0
bottom_10
	/influence/influence_node/peers./influence/peer_relationship/peers	-0.500 	 2.0
	/people/ethnicity/geographic_distribution	-0.118 	 17.0
	/film/film_set_designer/film_sets_designed	-0.125 	 8.0
	/tv/tv_person

In [133]:
conve_metric_diffs = compute_metric_differences(model_metrics, 'plain_conve', 'cpg_conve')

In [134]:
conve_metrics_reordered = reorder_metric_storage(conve_metric_diffs)

In [135]:
conve_tail_model_metrics = get_metric_tails(conve_metrics_reordered, tail_len=10)

In [141]:
cleanly_print_dict(conve_tail_model_metrics['mrr'])

top_10
	/user/jg/default_domain/olympic_games/sports	0.404 	 11
	/award/award_nominated_work/award_nominations./award/award_nomination/nominated_for	0.244 	 16
	/education/educational_institution/campuses	0.573 	 13
	/sports/sports_team/roster./american_football/football_roster_position/position	0.418 	 3
	/education/educational_institution_campus/educational_institution	0.537 	 13
	/location/hud_county_place/place	0.710 	 48
	/music/instrument/family	0.304 	 6
	/tv/tv_producer/programs_produced./tv/tv_producer_term/program	0.328 	 4
	/location/us_county/county_seat	0.466 	 2
	/dataworld/gardening_hint/split_to	0.340 	 10
bottom_10
	/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/contact_category	-0.075 	 20
	/base/marchmadness/ncaa_basketball_tournament/seeds./base/marchmadness/ncaa_tournament_seed/team	-0.118 	 41
	/location/statistical_region/gdp_nominal_per_capita./measurement_unit/dated_money_value/currency	-0.167 	 3
	/sports/sports_league_dr

# Data Parsing

In [90]:
data_dir = 'FB15k-237'
data_files = ['train.txt']
file_paths = [os.path.join(os.getcwd(), data_dir, data_dir, data_file) for data_file in data_files]

In [245]:
def num_data_per_relation(file_paths):
    relation_data = defaultdict(lambda: 0)
    for data_file in file_paths:
        with open(data_file, 'r') as handle:
            for line in handle:
                _, rel, _ = line.strip().split('\t')
                relation_data[rel] += 1
    return relation_data

def combine_dict_info(dict1, dict2):
    joined_dict = {}
    for metric_name in dict1:
        joined_dict[metric_name] = {}
        for top_k in dict1[metric_name]:
            joined_dict[metric_name][top_k] = {}
            for relation in dict1[metric_name][top_k]:
                dict1_value = dict1[metric_name][top_k][relation]
                dict2_value = dict2[relation]
                joined_dict[metric_name][top_k][relation] = dict1_value + [dict2_value]
    return joined_dict
    

In [204]:
rel_props_in_data = num_data_per_relation(file_paths)

In [262]:
cleanly_print_dict(rel_props_in_data)

/tv/tv_producer/programs_produced./tv/tv_producer_term/producer_type	225
/business/business_operation/assets./measurement_unit/dated_money_value/currency	181
/education/educational_degree/people_with_this_degree./education/education/student	260
/base/x2010fifaworldcupsouthafrica/world_cup_squad/current_world_cup_squad./base/x2010fifaworldcupsouthafrica/current_world_cup_squad/current_club	246
/film/film/dubbing_performances./film/dubbing_performance/actor	114
/music/record_label/artist	2226
/influence/influence_node/influenced_by	1908
/business/job_title/people_with_this_title./business/employment_tenure/company	734
/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/service_language	208
/people/person/place_of_birth	2465
/award/award_category/category_of	148
/base/popstra/celebrity/canoodled./base/popstra/canoodled/participant	449
/film/film/personal_appearances./film/personal_film_appearance/person	217
/user/tsegaran/random/taxonomy_subject/entry./us

In [246]:
minerva_tail_metrics_all = combine_dict_info(minerva_tail_model_metrics, rel_props_in_data)

In [254]:
conve_tail_metrics_all = combine_dict_info(conve_tail_model_metrics, rel_props_in_data)

In [255]:
def cleanly_print_dict_(current_object, padding='', precision=3):
    if  (not isinstance(list(current_object.values())[0], dict) and 
         not isinstance(list(current_object.values())[0], defaultdict)):
        for key, value in current_object.items():
            if isinstance(value, list):
                str_value = '{0:.{1}f}'.format(float(value[0]), precision)
                for val_ in value[1:]:
                    str_value += '\t{0}'.format(val_)
            else:
                try:
                    str_value = '{0:.{1}f}'.format(float(value[0]), precision)
                except:
                    str_value = str(value)
            line_to_print = padding + str(key) + '\t' + str_value
            print(line_to_print)
    else:
        for key, value in current_object.items():
            line_to_print = padding + str(key)
            print(line_to_print)
            cleanly_print_dict_(value, padding=padding+'\t', precision=precision)


In [261]:
cleanly_print_dict_(conve_tail_metrics_all['hits_at_1'])

top_10
	/user/jg/default_domain/olympic_games/sports	0.455	11	581
	/education/educational_institution/campuses	0.692	13	533
	/sports/sports_team/roster./american_football/football_roster_position/position	0.667	3	607
	/education/educational_institution_campus/educational_institution	0.769	13	526
	/education/university/domestic_tuition./measurement_unit/dated_money_value/currency	0.400	5	476
	/location/hud_county_place/place	0.896	48	459
	/music/instrument/family	0.333	6	100
	/government/politician/government_positions_held./government/government_position_held/legislative_sessions	0.333	30	215
	/soccer/football_team/current_roster./soccer/football_roster_position/position	0.300	10	2265
	/organization/endowed_organization/endowment./measurement_unit/dated_money_value/currency	0.333	3	456
bottom_10
	/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/contact_category	-0.150	20	142
	/base/marchmadness/ncaa_basketball_tournament/seeds./base/marchmadness/nca

In [264]:
rel_props_in_data_sorted = sort_dict_by_value(rel_props_in_data, is_list=False)

In [269]:
rel_props_in_data_sorted

[('/award/award_nominee/award_nominations./award/award_nomination/award_nominee',
  15989),
 ('/film/film/release_date_s./film/film_regional_release_date/film_release_region',
  12893),
 ('/award/award_nominee/award_nominations./award/award_nomination/award',
  12157),
 ('/people/person/profession', 10945),
 ('/film/actor/film./film/performance/film', 9494),
 ('/award/award_category/nominees./award/award_nomination/nominated_for',
  9465),
 ('/award/award_winner/awards_won./award/award_honor/award_winner', 8423),
 ('/film/film/genre', 7268),
 ('/award/award_nominee/award_nominations./award/award_nomination/nominated_for',
  6277),
 ('/music/genre/artists', 5880),
 ('/award/award_category/winners./award/award_honor/award_winner', 5673),
 ('/film/film/other_crew./film/film_crew_gig/film_crew_role', 5305),
 ('/location/location/contains', 5201),
 ('/people/person/nationality', 4197),
 ('/music/performance_role/track_performances./music/track_contribution/role',
  3795),
 ('/people/person/

## Visualize Relation Hierarchy

In [286]:
def create_depth_hierarchy(dict_level, depth_map):
    value_map = dict_level

    for depth_level in depth_map[:-2]:
        if depth_level not in value_map:
            value_map[depth_level] = {}
        value_map = value_map[depth_level]
    
    penultimate_level = depth_map[-2]
    last_level = depth_map[-1]
    if penultimate_level not in value_map:
        value_map[penultimate_level] = set() 
    value_map[penultimate_level].add(last_level)
    
    return dict_level

def generate_relation_hierarchy(relation_set):
    relation_hierarchy = {}
    relation_hierarchy['.'] = {}
    
    value_map = relation_hierarchy['.']
    
    for relation in relation_set:
        if '.' in relation:
            source, target = relation.strip().split('.')
            source_types = source.split('/')
            target_types = target.split('/')
            
            create_depth_hierarchy(value_map, source_types)
            create_depth_hierarchy(value_map, target_types)
        else:
            types = relation.strip().split('/')
            create_depth_hierarchy(value_map, types)
    return value_map

# def traverse_tree_depth(root, path_dict):
#     if isinstance(list(path_dict.values())[0], set):
#         for key, values in path_dict.items():
#             for value in values:
#                 tree.create_node(value, key)
#     else:
#         for key, value in path_dict.items():
            

# def create_tree_from_hierarchy(hierarchy):
#     from treelib import Node, Tree
#     tree = Tree()
#     for 


In [287]:
relation_set = set(list(rel_props_in_data.keys()))
relation_hierarchy = generate_relation_hierarchy(relation_set)

In [288]:
relation_hierarchy

{'': {'american_football': {'football_historical_roster_position': {'position_s'},
   'football_roster_position': {'position'},
   'football_team': {'current_roster'}},
  'award': {'award_category': {'category_of',
    'disciplines_or_subjects',
    'nominees',
    'winners'},
   'award_ceremony': {'awards_presented'},
   'award_honor': {'award', 'award_winner', 'ceremony', 'honored_for'},
   'award_nominated_work': {'award_nominations'},
   'award_nomination': {'award', 'award_nominee', 'nominated_for'},
   'award_nominee': {'award_nominations'},
   'award_winner': {'awards_won'},
   'award_winning_work': {'awards_won'},
   'hall_of_fame': {'inductees'},
   'hall_of_fame_induction': {'inductee'},
   'ranked_item': {'appears_in_ranked_lists'},
   'ranking': {'list'}},
  'base': {'aareas': {'schema': {'administrative_area': {'administrative_area_type',
      'administrative_parent',
      'capital'}}},
   'americancomedy': {'celebrity_impressionist': {'celebrities_impersonated'}},
   'b