# Construct NLI-based knowledge graph of traits.
KG tuple: <trait1, trait2, entailment_or_contradiction_score>

In [17]:
import networkx as nx

G1 = nx.read_gpickle("../data/traits_only_graph_cleaned2.pickle")

In [18]:
G1.edges(data=True)

EdgeDataView([('active', 'passive', {'weight': -1}), ('active', 'involved', {'weight': 1}), ('active', 'inactive', {'weight': -1}), ('considerate', 'inconsiderate', {'weight': -1}), ('considerate', 'thoughtful', {'weight': 1}), ('convivial', 'sociable', {'weight': 1}), ('diplomatic', 'tactful', {'weight': 1}), ('diplomatic', 'undiplomatic', {'weight': -1}), ('gregarious', 'ungregarious', {'weight': -1}), ('optimistic', 'positive', {'weight': 1}), ('optimistic', 'sanguine', {'weight': 1}), ('optimistic', 'cheerful', {'weight': 1}), ('optimistic', 'pessimistic', {'weight': -1}), ('mature', 'immature', {'weight': -1}), ('mature', 'adult', {'weight': 1}), ('mature', 'aged', {'weight': 1}), ('mature', 'big', {'weight': 1}), ('motivated', 'unmotivated', {'weight': -1}), ('sensible', 'wise', {'weight': 1}), ('sensible', 'thoughtful', {'weight': 1}), ('sensible', 'prudent', {'weight': 1}), ('sensible', 'reasonable', {'weight': 1}), ('sensible', 'logical', {'weight': 1}), ('sensible', 'intellig

In [11]:
import torch
from fairseq.data.data_utils import collate_tokens
from math import exp
import numpy as np

class RobertaMNLI:
    # todo: create similar wrapper classes for other NLI engines.
    # todo: create a wrapper superclass and subclass from there.
    
    def __init__(self, rel_path):
        self.output_map = {
            0: 'contradiction',
            1: 'neutral',
            2: 'entailment'
        }
        
        torch.hub.set_dir(rel_path)
        self.roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')  # works
        self.roberta.cuda()
        self.roberta.eval()
        
    def predict_one(self, S1, S2, return_probs=False):
        batch = collate_tokens(
            [self.roberta.encode(S1, S2)], pad_idx=1
        )
        logprobs = self.roberta.predict('mnli', batch)
        classes_tsr = logprobs.argmax(dim=1)
        classes = classes_tsr.tolist()  
        
        if return_probs == True:
            logprobs_list=[logprob.item() for logprob in logprobs[0]]
            prob_list =  [pow(exp(1), logprob) for logprob in logprobs_list]
            return prob_list
        else:
            return classes[0]  # 0 is contradiction, 1 is neutral, 2 is entailment.

In [12]:
def entailment_score(predictor, S1, S2):
    """
    Returns entailment score of S1 and S2.
    We ignore the directionality of S1 and S2; we return the higher entailment score
    regardless of whether S1 entails S2 or S2 entails S1.
    
    :param predictor: wrapper class for NLI engine. See RobertaMNLI above.
    :param S1: String.
    :param S2: String.
    """
    probs1 = predictor.predict_one(S1, S2, return_probs=True)
    probs2 = predictor.predict_one(S2, S1, return_probs=True)
    e1 = probs1[2]  # index 2 is for entailment.
    e2 = probs2[2]
    return max(e1, e2)

def contradiction_score(predictor, S1, S2):
    """
    Returns contradiction score of S1 and S2.
    We ignore the directionality of S1 and S2; we return the higher score
    regardless of whether S1 contradicts S2, or S2 contradicts S1. 
    
    :param predictor: wrapper class for NLI engine. See RobertaMNLI above.
    :param S1: String.
    :param S2: String.
    """
    probs1 = predictor.predict_one(S1, S2, return_probs=True)
    probs2 = predictor.predict_one(S2, S1, return_probs=True)
    c1 = probs1[0]  # index 0 is for entailment.
    c2 = probs2[0]
    return max(c1, c2)

In [13]:
predictor = RobertaMNLI(rel_path="../../roberta/hub")

Using cache found in ../../roberta/hub\pytorch_fairseq_master


In [14]:
s1 = "brave"
s2 = "courageous"
entailment_score(predictor, s1, s2)

0.9599203192182878

In [15]:
s1 = "brave"
s2 = "cowardly"
contradiction_score(predictor, s1, s2)

0.9913602944411873

In [21]:
G2 = nx.Graph()

# make new knowledge graph where the edge weight is based on an NLI score.
for edge in G1.edges(data=True):
    if edge[2]['weight'] == 1:
        score = entailment_score(predictor, edge[0], edge[1])
        G2.add_edge(edge[0], edge[1], score=score)
    elif edge[2]['weight'] == -1:
        score = contradiction_score(predictor, edge[0], edge[1])
        G2.add_edge(edge[0], edge[1], score=-1*score) # -1 to indicate opposite polarity (antonymous relation)
    else:
        raise Exception("invalid weight value detected.")
G2.edges(data=True)

EdgeDataView([('active', 'passive', {'score': -0.9962928040560369}), ('active', 'involved', {'score': 0.8323226615075182}), ('active', 'inactive', {'score': -0.9951286145356494}), ('passive', 'inactive', {'score': 0.9475867187093776}), ('involved', 'interested', {'score': 0.8728021251452028}), ('involved', 'participating', {'score': 0.9517589468412195}), ('inactive', 'quiescent', {'score': 0.8819435041473466}), ('inactive', 'dull', {'score': 0.7789697707268533}), ('inactive', 'slow', {'score': 0.6159981150560486}), ('inactive', 'supine', {'score': 0.7104365261292587}), ('considerate', 'inconsiderate', {'score': -0.9963289099869019}), ('considerate', 'thoughtful', {'score': 0.9640939834721406}), ('inconsiderate', 'thoughtless', {'score': 0.82419437526209}), ('inconsiderate', 'careless', {'score': 0.8225489297020572}), ('thoughtful', 'sensible', {'score': 0.8281778560611142}), ('convivial', 'sociable', {'score': 0.9021289763443758}), ('sociable', 'agreeable', {'score': 0.8358482712908396

In [22]:
len(G2.edges)

585

In [23]:
len(G1.edges)

585

In [24]:
path = input("enter path to save pickled graph:\n")
nx.write_gpickle(G2, path)

enter path to save pickled graph:
 ../data/traits_only_graph_nli.pickle
