# Gender Ambiguity Detector

Codebase: https://github.com/sattree/gap

In [1]:
import pandas as pd
import torch
import os
from pathlib import Path
import re

In [2]:
model_version = 'probert'
annotate_coref_mentions = pretrained_proref = False
coref_models = []
sanitize_labels = True
preprocess_eval = True
exp_dir = Path('results/probert')
coref_models_ = []

n_gpu = torch.cuda.device_count()
n_samples = 0

In [3]:
# Prepare model
from models.gap.probert import ProBERT
from models.gap.probert_config import ProBertConfig
from models.gap.features import convert_examples_to_features

from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE
from pytorch_pretrained_bert.modeling import WEIGHTS_NAME, CONFIG_NAME
from transformers import BertTokenizer, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() and not False else "cpu")
n_gpu = torch.cuda.device_count()

print('Preparing Model.')
cache_dir = str(PYTORCH_PRETRAINED_BERT_CACHE)
# model = ProBERT.from_pretrained("bert-base-uncased",cache_dir=cache_dir,num_labels=2)
# model.to(device)

# if n_gpu > 1:
#     model = torch.nn.DataParallel(model)
        
# Loading the trained model and config that you have fine-tuned
model_path = os.path.abspath("results/probert/ensembled_lms/bert_large_uncased/ensembled_seeds/42/train_evaluate_cv/1/probert/1/model")
output_model_file = os.path.join(model_path, WEIGHTS_NAME)
output_config_file = os.path.join(model_path, CONFIG_NAME)
config = ProBertConfig()

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True, 
                                          never_split=["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"])

model = ProBERT(config,num_labels=2)

if torch.cuda.is_available():
    map_location=lambda storage, loc: storage.cuda()
else:
    map_location='cpu'

print('Loading Model.')
model.load_state_dict(torch.load(output_model_file, map_location=map_location), strict=False)
model.to(device)


Preparing Model.
Loading Model.


ProBERT(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )


In [4]:
from tqdm import tqdm
import numpy as np
import contextlib
from torch.utils.data import DataLoader, SequentialSampler,TensorDataset
from models.utils import init_data

def evaluate(model,eval_features,device,eval_mode=True):

    model.eval()

    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_gpr_tags_mask = torch.tensor([f.gpr_tags_mask for f in eval_features], dtype=torch.uint8)

    all_mention_p_ids = torch.tensor([f.mention_p_ids for f in eval_features], dtype=torch.long)
    all_mention_a_ids = torch.tensor([f.mention_a_ids for f in eval_features], dtype=torch.long)
    all_mention_p_mask = torch.tensor([f.mention_p_mask for f in eval_features], dtype=torch.uint8)
    all_mention_a_mask = torch.tensor([f.mention_a_mask for f in eval_features], dtype=torch.uint8)

    all_cluster_ids_a = torch.tensor([f.cluster_ids_a for f in eval_features], dtype=torch.long)
    all_cluster_mask_a = torch.tensor([f.cluster_mask_a for f in eval_features], dtype=torch.uint8)
    all_cluster_ids_p = torch.tensor([f.cluster_ids_p for f in eval_features], dtype=torch.long)
    all_cluster_mask_p = torch.tensor([f.cluster_mask_p for f in eval_features], dtype=torch.uint8)

    all_pretrained = torch.tensor([f.pretrained for f in eval_features], dtype=torch.float)
    
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)

    eval_data = TensorDataset(all_input_ids, 
                                all_input_mask, 
                                all_segment_ids, 
                                all_gpr_tags_mask,
                                all_mention_p_ids,
                                all_mention_a_ids,
                                all_mention_p_mask,
                                all_mention_a_mask,
                                all_cluster_ids_a,
                                all_cluster_mask_a,
                                all_cluster_ids_p,
                                all_cluster_mask_p,
                                all_pretrained,
                                all_label_ids)

    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, 
                                sampler=eval_sampler, 
                                batch_size=1)

    eval_loss = 0
    preds = []
    attn_wts = []
    pbar = tqdm(desc="Evaluating", total=len(eval_dataloader)) if eval_mode else contextlib.suppress()
    with pbar:
        for step, batch in enumerate(eval_dataloader):
            # with torch.cuda.device(0):
            batch = tuple(t.to(device) for t in batch)
            (input_ids, input_mask, segment_ids, 
                gpr_tags_mask,
                mention_p_ids, mention_a_ids,
                mention_p_mask, mention_a_mask,
                cluster_ids_a, cluster_mask_a,
                cluster_ids_p, cluster_mask_p, pretrained, label_ids) = batch

            with torch.no_grad():
                res = model(input_ids,
                            segment_ids, 
                            input_mask, 
                            gpr_tags_mask=gpr_tags_mask,
                            mention_p_ids=mention_p_ids,
                            mention_a_ids=mention_a_ids,
                            mention_p_mask=mention_p_mask,
                            mention_a_mask=mention_a_mask, 
                            cluster_ids_a=cluster_ids_a,
                            cluster_mask_a=cluster_mask_a,
                            cluster_ids_p=cluster_ids_p,
                            cluster_mask_p=cluster_mask_p,
                            pretrained=pretrained,
                            labels=None,
                            training=False,
                            eval_mode=eval_mode
                        )

                if eval_mode:
                    logits, probabilties, attn_wts_m, attn_wts_c, attn_wts_co = res
                else:
                    logits, probabilties = res

            if len(preds) == 0:
                preds.append(probabilties.detach().cpu().numpy())
            else:
                preds[0] = np.append(preds[0], probabilties.detach().cpu().numpy(), axis=0)

            if eval_mode:
                pbar.update()

                if len(attn_wts) == 0:
                    attn_wts = [attn_wts_m, attn_wts_c]
                else:
                    attn_wts[0] = np.append(attn_wts[0], attn_wts_m, axis=0)
                    attn_wts[1] = np.append(attn_wts[1], attn_wts_c, axis=0)

    preds = preds[0]
    return preds, attn_wts

  return f(*args, **kwds)


## Testing Automatic Pipeline for Using the Detector

In [5]:
# Raw input text
original_text = [
                    "Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace."
                ]    
# original_text = [
#                     "Kathleen Nott was born in Camberwell, London. Her father, Philip, was a lithographic printer, and her mother, Ellen, ran a boarding house in Brixton; Kathleen was their third daughter. \
#                         She was educated at Mary Datchelor Girls' School (now closed), London, before attending King's College, London. This is where she met and married them.",
#                     "Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace."
#                 ] 
# # A list of raw input text
# original_text = ["Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace.",
#                  "For the U.S. Under Secretary of State, see Lucy W. Benson. Lucy Benson is a fictional character from the long-running Channel 4 soap opera Hollyoaks, \
#                     played by Kerrie Taylor between 1995--2000 and appeared in the shows first ever episode airing on the 23 October 1995. The character left five years later when she went travelling.",
#                 "Kathleen Nott was born in Camberwell, London. Her father, Philip, was a lithographic printer, and her mother, Ellen, ran a boarding house in Brixton; Kathleen was their third daughter. \
#                     She was educated at Mary Datchelor Girls' School (now closed), London, before attending King's College, London.",
#                 "This is most notable in two almost unique settings of the Symbolum Apostolorum, a Credo according to the Apostle's Creed, not according to the ordinary of the mass. \
#                     Infantas left no conventional mass setting. Michael Noone suggests that, although it possible that Infantas may have been aware of a setting by the French composer Le Brung printed in 1540, it is equally likely that Infantas believed his settings to be unique.",
#                 "Tilton first made contact with Dozenberg when he was still in Chicago in 1927,\
#                     liquidating the print shop. Tilton wrote Dozenberg a letter in Latvian signed simply ``Alfred,'' asking Dozenberg to meet him in New York as soon as he relocated there. \
#                         Tilton met with Dozenberg in a restaurant in New York City about a month later and asked Dozenberg to enter his service, without specifying the exact service in question."
#                 ]

In [6]:
_corenlp_url = 'https://corenlp.run/'
from pycorenlp import StanfordCoreNLP
import json
corenlp = StanfordCoreNLP(_corenlp_url)

In [7]:
from typing import List

def is_pronoun(sentence, entity):
    for token_dict in sentence['tokens']:
        if token_dict['originalText'] == entity:
            return token_dict['pos'] == 'PRP' or token_dict['pos'] == 'PRP$'

# need to get combinations of every pronoun + offset with every entity + offset
def get_combinations(entity_dict, pronoun_dict):
    combinations = []
    # each pronoun
    for pronoun in pronoun_dict:
        # offset for each pronoun
        for pronoun_offset in pronoun_dict[pronoun]:
            finished_entities = []
            # each pair of entities
            for entity1 in entity_dict:
                # no duplicates (only want one of [[entity1 = a], [entity2 = b]])
                if entity1 not in finished_entities:
                    # offset for each entity
                    for entity1_offset in entity_dict[entity1]:
                        combinations.append([pronoun, pronoun_offset, entity1, entity1_offset])
                finished_entities.append(entity1)
            
    return combinations

def get_entities_and_pronouns(original_text: List[str]):
    entity_list, pronoun_list = [], []
    if type(original_text) != list and original_text != []:
        raise Exception("Input must be a list of strings.")
    
    for i in range(len(original_text)):
        entity_dict, pronoun_dict = {}, {}
        root = json.loads(corenlp.annotate(original_text[i], properties={'annotators': 'parse,coref,openie,ner', "timeout": "50000"}))

        for sentence_idx in range(len(root['sentences'])):
            sentence = root['sentences'][sentence_idx]
            for idx in range(len(sentence['entitymentions'])):
                entity = sentence['entitymentions'][idx]['ner']
                text = sentence['entitymentions'][idx]['text']
                if entity == 'PERSON' and not is_pronoun(sentence, text):
                    entity_dict[text] = []
            for token_dict in sentence['tokens']:
                if token_dict['pos'] == 'PRP' or token_dict['pos'] == 'PRP$':
                    pronoun_dict[token_dict['originalText']] = []
                    # if is_pronoun(sentence, text):
                    #     pronoun_dict[text] = []
                    # else:
                    #     entity_dict[text] = []

        # add offset from ORIGINAL text 
        # (can't add directly from above because coref annotation adds spaces / other chars) 
        for name in entity_dict:
            if entity_dict[name] == []:
                entity_dict[name] = [word.start() for word in re.finditer(name, original_text[i])]
        for name in pronoun_dict:
            if pronoun_dict[name] == []:
                pronoun_dict[name] = [word.start() for word in re.finditer(name, original_text[i])]
        
        entity_list.append(entity_dict)
        pronoun_list.append(pronoun_dict)
    return entity_list, pronoun_list

entity_list, pronoun_list = get_entities_and_pronouns(original_text)
print(entity_list, pronoun_list)

[{'Pat': [0]}] [{'they': [5, 39], 'them': [10], 'their': [15]}]


In [8]:
# initialize results data frame
from pickle import FALSE, TRUE

cols = ['id', 'text', 'pronoun', 'pronoun_offset', 'a', 'a_offset', 'url']

output_cols = ['id', 'text', 'pronoun', 'pronoun_offset', 'a', 'a_offset', 'a_coref', 'url', 'probabilities', 'output']
df_output = pd.DataFrame([], columns=output_cols)

for i in range(len(original_text)):
    combinations = get_combinations(entity_list[i], pronoun_list[i])
    for combination in combinations:
        # each combination is a list with 4 elements 
            # [pronoun, pronoun_offset, entity1, entity1_offset]
        pronoun, pronoun_offset = combination[0], combination[1]
        entity1, entity1_offset = combination[2], combination[3]
        
        new_df = pd.DataFrame([['na',original_text[i], pronoun,pronoun_offset,entity1,entity1_offset,'na']], columns=cols)
        display(new_df)
        
        tmp_write_path = f'{exp_dir}/new_df.csv'
        new_df.to_csv(tmp_write_path, sep='\t', index=False)
        
        # Initialise the data tranformation 
        # Not sure we really need this as all it does is change some columns names then adds 2 new columns 
        X_inference = init_data(exp_dir=exp_dir,test_path=tmp_write_path, verbose=0, mode='inference')
        
        # Tokenisation of the text happens here
        inference_features = convert_examples_to_features(X_inference,tokenizer,512,n_coref_models=0,verbose=0)
        
        # evaluate sample using model
        labels = [True,False]
        predicted_probs, _ = evaluate(model,inference_features,device,eval_mode=True)
        
        # print(predicted_probs)

        # index of max value from predictions so we get exact entity name it resolves to
        for prob in predicted_probs:
            print(prob)
            max_idx = list(prob).index(max(prob))
            print(f"Known pronoun '{pronoun}' resolves '{labels[max_idx]}' to '{entity1}' with a probability of '{prob[max_idx]}'")
        
        output = ""
        entity_coref = False
        if labels[max_idx]:
            output = f"Known pronoun '{pronoun}' resolves to '{entity1}' with a probability of '{prob[max_idx]}'"
            entity_coref = True
        else:
            output = f"Known pronoun '{pronoun}' does not resolve to '{entity1}' with a probability of '{prob[max_idx]}'"
        new_df_output = pd.DataFrame([['na',original_text[i], pronoun,pronoun_offset,entity1,entity1_offset,entity_coref,'na', predicted_probs, output]], columns=output_cols)
        df_output = pd.concat([df_output, new_df_output])


Unnamed: 0,id,text,pronoun,pronoun_offset,a,a_offset,url
0,na,Pat (they/them/their) was not sure how they sh...,they,5,Pat,0,na


Initializing Step InputReader
Step InputReader initialized
Initializing Step LabelSanitizer
Step LabelSanitizer initialized
Initializing Step MentionsAnnotator
Step MentionsAnnotator initialized
Initializing Step gather_step
Step gather_step initialized
Step gather_step, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step LabelSanitizer, working in "inference" mode
Step LabelSanitizer, adapting inputs
Step LabelSanitizer, transforming...
Step LabelSanitizer, transforming completed
Step LabelSanitizer, transform completed
Step MentionsAnnotator, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step MentionsAnnotator, unpacking inputs
Step Mentions

Transforming data to features.
------BEFORE DATA PIPELINE------- results/probert
Empty DataFrame
Columns: [id, label]
Index: []


Extracting coref clusters...: 100%|██████████| 1/1 [00:00<00:00, 472.81it/s]
Step MentionsAnnotator, transforming completed
Step MentionsAnnotator, transform completed
Step gather_step, adapting inputs
Step gather_step, transforming...
Step gather_step, transforming completed
Step gather_step, transform completed


Transforming data to features done.
 Log a couple of examples for sanity check.



Convert Examples to features: 1it [00:00, 264.98it/s]
  sequence_output = sequence_output[~gpr_tags_mask].view(batch_size, -1, self.config.hidden_size)
Evaluating: 100%|██████████| 1/1 [00:04<00:00,  4.15s/it]

[0.97176987 0.02823015]
Known pronoun 'they' resolves 'True' to 'Pat' with a probability of '0.9717698693275452'





Unnamed: 0,id,text,pronoun,pronoun_offset,a,a_offset,url
0,na,Pat (they/them/their) was not sure how they sh...,they,39,Pat,0,na


Initializing Step InputReader
Step InputReader initialized
Initializing Step LabelSanitizer
Step LabelSanitizer initialized
Initializing Step MentionsAnnotator
Step MentionsAnnotator initialized
Initializing Step gather_step
Step gather_step initialized
Step gather_step, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step LabelSanitizer, working in "inference" mode
Step LabelSanitizer, adapting inputs
Step LabelSanitizer, transforming...
Step LabelSanitizer, transforming completed
Step LabelSanitizer, transform completed
Step MentionsAnnotator, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step MentionsAnnotator, unpacking inputs
Step Mentions

Transforming data to features.
------BEFORE DATA PIPELINE------- results/probert
Empty DataFrame
Columns: [id, label]
Index: []


Extracting coref clusters...: 100%|██████████| 1/1 [00:00<00:00, 811.12it/s]
Step MentionsAnnotator, transforming completed
Step MentionsAnnotator, transform completed
Step gather_step, adapting inputs
Step gather_step, transforming...
Step gather_step, transforming completed
Step gather_step, transform completed


Transforming data to features done.
 Log a couple of examples for sanity check.



Convert Examples to features: 1it [00:00, 482.27it/s]
  sequence_output = sequence_output[~gpr_tags_mask].view(batch_size, -1, self.config.hidden_size)
Evaluating: 100%|██████████| 1/1 [00:04<00:00,  4.13s/it]

[0.6899466  0.31005344]
Known pronoun 'they' resolves 'True' to 'Pat' with a probability of '0.6899465918540955'





Unnamed: 0,id,text,pronoun,pronoun_offset,a,a_offset,url
0,na,Pat (they/them/their) was not sure how they sh...,them,10,Pat,0,na


Initializing Step InputReader
Step InputReader initialized
Initializing Step LabelSanitizer
Step LabelSanitizer initialized
Initializing Step MentionsAnnotator
Step MentionsAnnotator initialized
Initializing Step gather_step
Step gather_step initialized
Step gather_step, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step LabelSanitizer, working in "inference" mode
Step LabelSanitizer, adapting inputs
Step LabelSanitizer, transforming...
Step LabelSanitizer, transforming completed
Step LabelSanitizer, transform completed
Step MentionsAnnotator, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step MentionsAnnotator, unpacking inputs
Step Mentions

Transforming data to features.
------BEFORE DATA PIPELINE------- results/probert
Empty DataFrame
Columns: [id, label]
Index: []


Extracting coref clusters...: 100%|██████████| 1/1 [00:00<00:00, 786.04it/s]
Step MentionsAnnotator, transforming completed
Step MentionsAnnotator, transform completed
Step gather_step, adapting inputs
Step gather_step, transforming...
Step gather_step, transforming completed
Step gather_step, transform completed


Transforming data to features done.
 Log a couple of examples for sanity check.



Convert Examples to features: 1it [00:00, 572.21it/s]
  sequence_output = sequence_output[~gpr_tags_mask].view(batch_size, -1, self.config.hidden_size)
Evaluating: 100%|██████████| 1/1 [00:04<00:00,  4.08s/it]

[0.8984461  0.10155394]
Known pronoun 'them' resolves 'True' to 'Pat' with a probability of '0.8984460830688477'





Unnamed: 0,id,text,pronoun,pronoun_offset,a,a_offset,url
0,na,Pat (they/them/their) was not sure how they sh...,their,15,Pat,0,na


Initializing Step InputReader
Step InputReader initialized
Initializing Step LabelSanitizer
Step LabelSanitizer initialized
Initializing Step MentionsAnnotator
Step MentionsAnnotator initialized
Initializing Step gather_step
Step gather_step initialized
Step gather_step, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step LabelSanitizer, working in "inference" mode
Step LabelSanitizer, adapting inputs
Step LabelSanitizer, transforming...
Step LabelSanitizer, transforming completed
Step LabelSanitizer, transform completed
Step MentionsAnnotator, working in "inference" mode
Step InputReader, working in "inference" mode
Step InputReader, adapting inputs
Step InputReader, transforming...
Step InputReader, transforming completed
Step InputReader, transform completed
Step MentionsAnnotator, unpacking inputs
Step Mentions

Transforming data to features.
------BEFORE DATA PIPELINE------- results/probert
Empty DataFrame
Columns: [id, label]
Index: []


Extracting coref clusters...: 100%|██████████| 1/1 [00:00<00:00, 864.27it/s]
Step MentionsAnnotator, transforming completed
Step MentionsAnnotator, transform completed
Step gather_step, adapting inputs
Step gather_step, transforming...
Step gather_step, transforming completed
Step gather_step, transform completed


Transforming data to features done.
 Log a couple of examples for sanity check.



Convert Examples to features: 1it [00:00, 599.27it/s]
  sequence_output = sequence_output[~gpr_tags_mask].view(batch_size, -1, self.config.hidden_size)
Evaluating: 100%|██████████| 1/1 [00:04<00:00,  4.13s/it]

[0.6122219 0.3877781]
Known pronoun 'their' resolves 'True' to 'Pat' with a probability of '0.612221896648407'





In [9]:
filename_wr = f'{exp_dir}/df_output_seed_42_fold1.csv' 

In [10]:
df_output.to_csv(filename_wr, index=False)

In [11]:
eval_data_plot = pd.read_csv(filename_wr)
with pd.option_context('display.max_rows', 10):
    display(eval_data_plot)

Unnamed: 0,id,text,pronoun,pronoun_offset,a,a_offset,a_coref,url,probabilities,output
0,na,Pat (they/them/their) was not sure how they sh...,they,5,Pat,0,True,na,[[0.97176987 0.02823015]],Known pronoun 'they' resolves to 'Pat' with a ...
1,na,Pat (they/them/their) was not sure how they sh...,they,39,Pat,0,True,na,[[0.6899466 0.31005344]],Known pronoun 'they' resolves to 'Pat' with a ...
2,na,Pat (they/them/their) was not sure how they sh...,them,10,Pat,0,True,na,[[0.8984461 0.10155394]],Known pronoun 'them' resolves to 'Pat' with a ...
3,na,Pat (they/them/their) was not sure how they sh...,their,15,Pat,0,True,na,[[0.6122219 0.3877781]],Known pronoun 'their' resolves to 'Pat' with a...


## Visualisations

Note: Run the git gpr_pub clone only once.

In [12]:
# !git clone https://github.com/sattree/gpr_pub.git

In [13]:
from IPython.core.display import display, HTML
from gpr_pub import visualization

# Add css styles and js events to DOM, so that they are available to rendered html
display(HTML(open('gpr_pub/visualization/highlight.css').read()))
display(HTML(open('gpr_pub/visualization/highlight.js').read()))

In [14]:
def labelled_pronoun(row):
    txt = row.text

    # map char indices to token indices
    tokens = txt.split(' ')
    start_a = len(txt[:row.a_offset].split(' '))-1

    clusters = [[[start_a, start_a+len(row.a.split(' '))-1]]]

    # add pronoun token to the labelled cluster
    start_p = len(txt[:row.pronoun_offset].split(' '))-1
    if row.a_coref:
        clusters[0].append([start_p, start_p+len(row.pronoun.split(' '))-1])
    else:
        clusters.append([[start_p, start_p+len(row.pronoun.split(' '))-1]])

    return tokens, clusters

def to_html(tokens, clusters):
    tree = visualization.html_template.transform_to_tree(tokens, clusters)
    html = ''.join(visualization.html_template.span_wrapper(tree, 0))
    html = '<div style="padding: 16px;">{}</div>'.format(html)
    return html

In [15]:
# row = eval_data_plot.loc[len(eval_data_plot)-1]
rows = []
for idx, row in eval_data_plot.iterrows():
    # Special rendering for labelled pronouns
    # labels in 'a_coref'
    tokens, clusters = labelled_pronoun(row)
    html = to_html(tokens, clusters)
    rows.append({'sample_idx': idx,
                 'text': row.text,
                 'annotation': html})

df = pd.DataFrame(rows).groupby(['sample_idx']).agg(lambda x: x)
s = df.style.set_properties(**{'text-align': 'left'})
display(HTML(s.render(justify='left')))

Unnamed: 0_level_0,text,annotation
sample_idx,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace.,0 Pat 0 (they/them/their) was not sure how they should bring up inclusivity in the workplace.
1,Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace.,0 Pat (they/them/their) was not sure how 0 they should bring up inclusivity in the workplace.
2,Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace.,0 Pat 0 (they/them/their) was not sure how they should bring up inclusivity in the workplace.
3,Pat (they/them/their) was not sure how they should bring up inclusivity in the workplace.,0 Pat 0 (they/them/their) was not sure how they should bring up inclusivity in the workplace.
