# Experiment Setup 

## Install dependencies and prepare modules

In [1]:
!pip install --upgrade pip
!pip install flair
!pip install coloredlogs
!pip install unidecode
!pip install fcache

import coloredlogs, logging
import application.logformatter as lf
from fcache.cache import FileCache

log_level = logging.DEBUG

fh = logging.StreamHandler()
fh.setFormatter(lf.CustomFormatter())
fh.setLevel(log_level)

logger = logging.getLogger('muheqa')
logger.addHandler(fh)
logger.setLevel(log_level)

import application.summary.entity as ent
import application.summary.concept as cp
import application.summary.keyword as key
import application.cache as cache

entity_discovery  = ent.Entity()
concept_discovery = cp.Concept()
keyword_discovery = key.Keyword()





[38;20m2022-08-29 17:04:12,745 - muheqa - DEBUG - initializing Entity class instance... (entity.py:11)[0m
[38;20m2022-08-29 17:04:18,780 - muheqa - DEBUG - initializing Concept class instance... (concept.py:12)[0m
[38;20m2022-08-29 17:04:19,462 - muheqa - DEBUG - initializing Keyword class instance... (keyword.py:11)[0m
[38;20m2022-08-29 17:04:19,463 - muheqa - DEBUG - initializing Entity class instance... (entity.py:11)[0m
[38;20m2022-08-29 17:04:25,125 - muheqa - DEBUG - initializing Concept class instance... (concept.py:12)[0m


## Define Metrics

In [2]:
import unidecode

def normalize(label):
  return unidecode.unidecode(label.strip()).lower().replace(" ","_")

def precision(tp,fp):
  if (fp+tp == 0):
    return 0.0
  return tp / (fp + tp)

def recall(tp,fn):
  if (fn+tp == 0):
    return 0.0
  return tp / (fn + tp)

def f1(tp,fp,fn):
  p = precision(tp,fp)
  r = recall(tp,fn)
  if (p+r == 0):
    return 0.0
  return 2 * ((p*r)/(p+r))

def average(values):
  return sum(values) / len(values) 

# lists of entity lists
def evaluate_labels(true_list,pred_list):
  tp, tn, fp, fn = 0, 0, 0, 0
  precision_list, recall_list, f1_list = [], [], []
  empty_values = 0
  for index in range(len(true_list)):
    # normalize entities
    valid_entities = [normalize(e) for e in true_list[index] if e != '']
    predicted_entities = [normalize(e) for e in pred_list[index]]
    ptp, ptn, pfp, pfn = 0, 0, 0, 0
    if (len(valid_entities)==0):
      empty_values += 1
    for entity in valid_entities:
      if (entity not in predicted_entities):
        pfn += 1
    for entity in predicted_entities:
      if (entity in valid_entities):
        ptp += 1
      else:
        pfp += 1    
    precision_list.append(precision(ptp,pfp))
    recall_list.append(recall(ptp,pfn))
    f1_list.append(f1(ptp,pfp,pfn))
    tp += ptp
    tn += ptn
    fp += pfp
    fn += pfn  
  return  {
      'total': index,
      'empty': empty_values,
      'tp': tp,
      'tn': tn, 
      'fp': fp,
      'fn':fn,
      'micro-precision': precision(tp,fp),
      'micro-recall': recall(tp,fn),
      'micro-f1': f1(tp,fp,fn),
      'macro-precision': average(precision_list),
      'macro-recall': average(recall_list),
      'macro-f1': average(f1_list)
  }

print("metrics are ready")

metrics are ready


## Load SOTA Methods

### FLERT

From paper: Schweter, Stefan and A. Akbik. “FLERT: Document-Level Features for Named Entity Recognition.” ArXiv abs/2011.06993 (2020)

In [None]:
from flair.data import Sentence
from flair.models import SequenceTagger

flert_tagger = SequenceTagger.load("flair/ner-english-large")
flert_cache = cache.Cache("muheqa_flert")

def get_entities_by_flert(text):
    if (flert_cache.exists(text)):
        return flert_cache.get(text)
    sentence = Sentence(text)
    flert_tagger.predict(sentence)
    entities = []
    for entity in sentence.get_spans('ner'):
        entities.append(entity.text)
    flert_cache.set(text,entities)
    return entities


In [None]:
get_entities_by_flert("George Washington went to Washington")

### BERT-based NER

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

bert_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
bert_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
bert_cache = cache.Cache("muheqa_bert")


bert_nlp = pipeline("ner", model=bert_model, tokenizer=bert_tokenizer)

def get_entities_by_bert_base(text):
    if (bert_cache.exists(text)):
        return bert_cache.get(text)
    entities = []
    entity = ""
    index = -1
    offset = -1
    for token in bert_nlp(text):
        if (index == -1):
            index = token['index']
            offset = token['start']
        word = token['word']
        if (word[0] == '#'):
            word = token['word'].replace("#","")        
        if (token['start']== offset):
            entity += word
        elif (token['index']-index < 2):
            entity += " " + word
        else:
            entities.append(entity)
            entity = word
        index = token['index']
        offset = token['end']
    if (len(entity) > 0):    
        entities.append(entity)
    bert_cache.set(text,entities)
    return entities

In [None]:
# unit test
get_entities_by_bert_base("George Washington went to Washington")

### RoBERTA-based NER

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

roberta_tokenizer = AutoTokenizer.from_pretrained("Jean-Baptiste/roberta-large-ner-english")
roberta_model = AutoModelForTokenClassification.from_pretrained("Jean-Baptiste/roberta-large-ner-english")
roberta_cache = cache.Cache("muheqa_roberta")

roberta_nlp = pipeline("ner", model=roberta_model, tokenizer=roberta_tokenizer)

def get_entities_by_roberta_base(text):
    if (roberta_cache.exists(text)):
        return roberta_cache.get(text)
    entities = []
    entity = ""
    index = -1
    offset = -1
    for token in roberta_nlp(text):
        if (index == -1):
            index = token['index']
            offset = token['start']
        word = token['word']
        if (word[0] == '#'):
            word = token['word'].replace("#","")        
        if (token['start']== offset):
            entity += word
        elif (token['index']-index < 2):
            entity += " " + word
        else:
            entities.append(entity.replace("Ġ",""))
            entity = word
        index = token['index']
        offset = token['end']
    if (len(entity) > 0):    
        entities.append(entity.replace("Ġ",""))
    roberta_cache.set(text,entities)
    return entities

In [None]:
# unit test
get_entities_by_roberta_base("George Washington went to Washington")

# Evaluation Results

In [None]:
from IPython.display import clear_output
import json
import pandas as pd

def json_file(name):
  return name+"-keywords.json"

def csv_file(name):
  return name+"-keywords.csv"

def evaluate_data(name,dataframe):
  l1, l2, l3, l4 = [], [], [], []
  total = 0
  for index, row in dataframe.iterrows():
      question = row['question']
      print(index,":",question)
      l1.append(keyword_discovery.get(question))
      l2.append(get_entities_by_flert(question))
      l3.append(get_entities_by_bert_base(question))
      l4.append(get_entities_by_roberta_base(question))
      total += 1
  dataframe['MuHeQA_Keywords']=l1
  dataframe['FLERT_NER']=l2
  dataframe['BERT_NER']=l3
  dataframe['RoBERTA_NER']=l4
  clear_output(wait=True)
  print(total,"questions analyzed!")
  dataframe.to_json(json_file(name), orient='split')
  dataframe.to_csv(csv_file(name))
  return dataframe

def make_report(name,additional=[]):
  
  df = pd.read_json(json_file(name), orient='split')
  y_true =df['entities'].tolist()
  results = []
  for col in df.columns:
    if (col == 'question') or (col == 'entities'):
      continue
    y_pred = df[col].tolist()
    result = evaluate_labels(y_true,y_pred)
    result['model']=col
    results.append(result)

  for row in additional:
    results.append(row)

  df_results = pd.DataFrame(results)
  return df_results

print("evaluation methods are ready")

# Basic Test

In [None]:
#query = "what does 2674 pandarus orbit?"
query = "in which country was overnight delivery filmed in?"
logger.info("Query: '" + query + "'")
logger.info("Entities:"+ str(entity_discovery.get(query)))
logger.info("Concepts:"+ str(concept_discovery.get(query)))
logger.info("Keywords:"+ str(keyword_discovery.get(query)))
logger.info("FLERT:"+ str(get_entities_by_flert(query)))
logger.info("BERT:"+ str(get_entities_by_bert_base(query)))
logger.info("RoBERTA:"+ str(get_entities_by_roberta_base(query)))

# SimpleQuestions Dataset

In [None]:
import pandas as pd
df = pd.read_csv('datasets/simple_questions/wsq-labels.csv', index_col=0)
# inverse predicates contain no entity. In all other cases, the entity corresponds to the subject.
entities = []
questions = []
for index, row in df.iterrows():
    entity = row['subject_label']  
    question = row['question']
    if (entity.lower() in question.lower()):
        entities.append([entity])
        questions.append(question)
sq_df = pd.DataFrame(list(zip(questions, entities)),columns =['question', 'entities'])
sq_df.head()

In [None]:
sq_df.describe()

In [None]:
evaluate_data('sq_results',sq_df.head(10))

In [None]:
make_report('sq_results')