In [2]:
import networkx as nx
import pickle
import json

G = nx.read_gpickle("../data/graph_cleaned.pickle")

In [3]:
type(G)

networkx.classes.graph.Graph

In [4]:
len(G.nodes)

2185

In [5]:
len(G.edges)

2464

In [6]:
G['active']

AtlasView({'passive': {'weight': -1}, 'involved': {'weight': 1}, 'inactive': {'weight': -1}, 'engaged in or ready for military or naval operations': {'relation': 'defined as'}, "(used of verbs (e.g. `to run') and participial adjectives (e.g. `running' in `running water')) expressing action rather than a state of being": {'relation': 'defined as'}, 'taking part in an activity': {'relation': 'defined as'}, 'in operation': {'relation': 'defined as'}, 'characterized by energetic activity': {'relation': 'defined as'}, 'disposed to take action or effectuate change': {'relation': 'defined as'}, 'engaged in full-time work': {'relation': 'defined as'}, 'exerting influence or producing a change or effect': {'relation': 'defined as'}, 'expressing that the subject of the sentence has the semantic function of actor:': {'relation': 'defined as'}, 'full of activity or engaged in continuous activity': {'relation': 'defined as'}, '(of e.g. volcanos) capable of erupting': {'relation': 'defined as'}, '(o

In [41]:
from copy import deepcopy
G2 = deepcopy(G)
for n in G.nodes():
    connected_edges = G.edges(nbunch=n, data=True)
    # remove nodes where the node's connected edges all have the 'relation'='defined as' attribute.
    # this will give us a G2 which contains only traits as nodes;
    # it should not contain definitions as nodes.
    # note that this algo removes all traits that are also not connected to other traits.
    # unfortunately, I'm not sure how to workaround that issue for now.
    flag = True
    for e in connected_edges:
        if 'relation' not in list(e[2].keys()):
            flag = False
            break
    if flag == True:
        G2.remove_node(n)
display(len(G2), len(G2.edges))

436

592

In [40]:
G2.nodes

NodeView(('active', 'considerate', 'convivial', 'diplomatic', 'gregarious', 'optimistic', 'mature', 'motivated', 'sensible', 'brave', 'chatty', 'clever', 'smart', 'cowardly', 'friendly', 'funny', 'generous', 'grumpy', 'honest', 'loud', 'lucky', 'nasty', 'neat', 'nice', 'polite', 'popular', 'quiet', 'rude', 'selfish', 'serious', 'shy', 'silly', 'stupid', 'tidy', 'unlucky', 'untidy', 'wise', 'adaptable', 'adventurous', 'affectionate', 'ambitious', 'amiable', 'compassionate', 'courageous', 'courteous', 'empathetic', 'exuberant', 'impartial', 'inventive', 'philosophical', 'practical', 'rational', 'reliable', 'sincere', 'sympathetic', 'witty', 'inconsiderate', 'passive', 'resentful', 'curious', 'responsible', 'affable', 'eager', 'agreeable', 'encouraging', 'positive', 'enthusiastic', 'helpful', 'trustworthy', 'precise', 'aggressive', 'inconsistent', 'jealous', 'superficial', 'thoughtless', 'hostile', 'problematic', 'unmotivated', 'callous', 'immature', 'unreliable', 'weak', 'sarcastic', 'ch

In [78]:
# add a 'type' attribute to differentiate trait nodes from definition nodes in future.
for node in G2.nodes:
    G2.nodes[node]['type'] = 'trait'

In [85]:
path = input("enter path to save pickled graph:\n")
nx.write_gpickle(G2, path)

enter path to save pickled graph:
 ../data/traits_only_graph_cleaned.pickle


In [42]:
# from copy import deepcopy
# G2 = deepcopy(G)
# for (u, v, a) in G.edges.data('relation', default=None):
#     print(a)
#     if a == "defined as":
#         G2.remove_edge(u, v)
# len(G2.edges)

In [5]:
with open("../data/yihao_traits_defs.json", 'r') as f:
    defs_dict = json.load(f)
print(defs_dict.keys())
print(f"num of traits: {len(defs_dict)}")

dict_keys(['tidy', 'infirm', 'disorderly', 'doltish', 'inactive', 'affirmatory', 'intimate', 'large', 'discrepant', 'gracious', 'courteous', 'dashing', 'egotistic', 'undaunted', 'thoughtless', 'vocal', 'unstinting', 'charitable', 'watery', 'narcissistic', 'dirty', 'underdeveloped', 'dull', 'grumpy', 'enthusiastic', 'morose', 'bounteous', 'erudite', 'slack', 'unsafe', 'positive', 'downright', 'childish', 'subdued', 'glib', 'consonant', 'imprecise', 'curious', 'solemn', 'agreeable', 'impartial', 'dense', 'dear', 'witless', 'zealous', 'canny', 'exuberant', 'pubescent', 'microscopic', 'schoolboyish', 'funny', 'ripe', 'mussy', 'instrumental', 'fearless', 'unaged', 'forthright', 'challenging', 'braw', 'respectable', 'crabby', 'untidy', 'sympathetic', 'easy-going', 'creditworthy', 'slovenly', 'formative', 'irrational', 'argumentative', 'dynamic', 'serious', 'deafening', 'prosperous', 'prime', 'fledgling', 'smart', 'marriageable', 'stouthearted', 'peculiar', 'unfriendly', 'real', 'incongruous'

In [6]:
defs_dict['adventurous']

['willing to undertake or seeking out new and daring enterprises']

In [7]:
def is_entailed(A, B):
    """
    :param A: String.
    :param B: String.
    """
    pass

def infer(trait_dict, trait_graph, s1, s2):
    """
    :param trait_dict: Dict of trait definitions; the traits in this dict are a subset of the
    traits in trait_graph.
    :param trait_graph: Graph of antonym/synonym relations between traits.
    :param s1: String. for comparison w s2.
    :param s2: String. for comparison w s1.
    """
    # Find likely trait1 and trait2.
    trait1_scores = dict()
    trait2_scores = dict()
    for trait, defs in trait_dict:
        trait1_scores[trait] = list()
        trait2_scores[trait] = list()
        for desc in defs:
            score = compare(desc, s1)
            trait1_scores[trait].append(score)
            
            score = compare(desc, s1)
            trait2_scores[trait].append(score)
            
    # Obtain highest scoring trait1 and trait2 from trait scores dicts.
    
    # Given trait1 and trait2, find edges between them and infer if antonym or synonym.


In [116]:
import random

def get_paths_of_weights(G, paths_list):
    """
    :param G: source graph
    :param paths_list: all shortest paths
    """
    weight_paths_list = list()
    for path in paths_list:
        weight_path = list()
        for i in range(len(path) - 1):
            edge_attr = G.get_edge_data(path[i], path[i+1])
            weight_path.append(edge_attr['weight'])
        weight_paths_list.append(weight_path)
    return weight_paths_list

def vis_weighted_paths(paths_list, weight_paths_list):
    # visualise the weighted_paths
    for i in range(len(paths_list)):
        path = paths_list[i]
        weights = weight_paths_list[i]
        to_print = str()
        for j in range(len(path)-1):
            to_print = to_print + str(f"{path[j]}-({weights[j]})-")
        to_print = to_print + f"{path[-1]}"
        print(to_print + '\n')
 
t1 = random.choice(list(G2.nodes))  # example trait
print(t1)
t2 = random.choice(list(G2.nodes))  # example trait
print(t2)

try:
    paths = nx.all_shortest_paths(G2, source=t1, target=t2, weight=None, method='dijkstra')
    paths_list = [row for row in paths]

    paths_of_weights = get_paths_of_weights(G2, paths_list)
    vis_weighted_paths(paths_list, paths_of_weights)

except nx.NetworkXNoPath as e:
    print(e)

civilized
little
civilized-(1)-refined-(-1)-crude-(1)-early-(1)-immature-(1)-little

civilized-(1)-refined-(-1)-crude-(1)-early-(1)-young-(1)-little



In [117]:
import numpy as np

In [125]:
from statistics import mode
from statistics import StatisticsError

inference_map = {-1: "antonym", 1: "synonym"}

def infer_trait_relations(paths_of_weights):
    """
    simple algorithm for computing relation between start and end node of path.
    :param paths_of_weights: see get_paths_of_weights(G, paths_list).
    """
    inferred_relations = list()
    for weight_path in paths_of_weights:
        start_val = weight_path[0]
        for i in weight_path[1:]:
            start_val = start_val * i
        inferred_relations.append(start_val)
    return inferred_relations
        
def print_inference(inferred_relations):
    print("infered relation from shortest paths:")
    try:
        print(inference_map[mode(inferred_relations)] + '\n')
    except StatisticsError as e:
        print('no mode found. synonym and antonym equally likely.')
    
    distribution = np.unique(inferred_relations, return_counts=True)
    print("<relation: count>:")
    for i in range (len(distribution[0])):
        print(f"{inference_map[distribution[0][i]]}: {distribution[1][i]}")
        
inferred_relations = infer_trait_relations(paths_of_weights)
print_inference(inferred_relations)

infered relation from shortest paths:
antonym

<relation: count>:
antonym: 2
