In [9]:
import torch
from fairseq.data.data_utils import collate_tokens
from math import exp
import numpy as np

class RobertaMNLI:
    # todo: create similar wrapper classes for other NLI engines.
    # todo: create a wrapper superclass and subclass from there.
    
    def __init__(self, rel_path):
        """
        :param rel_path: relative path to pytorch hub directory.
        """
        self.output_map = {
            0: 'contradiction',
            1: 'neutral',
            2: 'entailment'
        }
        
        torch.hub.set_dir(rel_path)
        self.roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')  # works
        self.roberta.cuda()
        self.roberta.eval()
        
    def predict_one(self, S1, S2, return_probs=False):
        batch = collate_tokens(
            [self.roberta.encode(S1, S2)], pad_idx=1
        )
        logprobs = self.roberta.predict('mnli', batch)
        classes_tsr = logprobs.argmax(dim=1)
        classes = classes_tsr.tolist()  
        
        if return_probs == True:
            logprobs_list=[logprob.item() for logprob in logprobs[0]]
            prob_list =  [pow(exp(1), logprob) for logprob in logprobs_list]
            return prob_list
        else:
            return classes[0]  # 0 is contradiction, 1 is neutral, 2 is entailment.

In [10]:
predictor = RobertaMNLI(rel_path="../../roberta/hub")

Using cache found in ../../roberta/hub\pytorch_fairseq_master


In [11]:
import pandas as pd
with open("../data/roberta/astro0_decomposed_clean.csv") as f:
    dataset = pd.read_csv(f, delimiter=',')
dataset.head()

Unnamed: 0.1,Unnamed: 0,first,second
0,0,"People with Virgo rising tend to be practical,...",You have an ingenious mind.
1,1,"People with Virgo rising tend to be practical,...",You have an active mind.
2,2,"People with Virgo rising tend to be practical,...",You have an alert mind.
3,3,"People with Virgo rising tend to be practical,...",Gaining knowledge and putting it to good use a...
4,4,"People with Virgo rising tend to be practical,...",You strive for perfection.


In [12]:
len(dataset)

3828

In [18]:
path = input('enter path to save csv of results:\n')
results = list()
for i in range(len(dataset)):
    s1 = dataset.iloc[i]['first']
    s2 = dataset.iloc[i]['second']
    CNE1 = predictor.predict_one(s1, s2, return_probs=True)
    CNE2 = predictor.predict_one(s2, s1, return_probs=True)
    row = CNE1+ CNE2
    results.append(row)

results_df = pd.DataFrame(data=results, columns=['C1', 'N1', 'E1', 'C2', 'N2', 'E2'])
results_df.head()

results_df.to_csv(path)

enter path to save csv of results:
 ../data/astro0_decomposed_clean_roberta.csv
