# Start Stanford CoreNLP server
`java -Xmx16g -cp C:\stanford-corenlp-latest\stanford-corenlp-4.0.0\* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9002 -timeout 600 -threads 5 -maxCharLength 100000 -quiet False -preload tokenize,ssplit,pos,lemma,ner,parse,depparse,coref`

In [1]:
import string
import re
import collections
import csv
import pandas as pd
import numpy as np
import requests
import os
import json
from hyphen import Hyphenator

import dask.dataframe as dd
import multiprocessing

import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter

import seaborn as sns

%matplotlib inline

import stanfordnlp
from stanfordnlp.server import CoreNLPClient

import nltk
nltk.download('punkt')

# Uncomment if needed to fix this error:
# OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

NLP_PORT = 9002
WORDS_API_KEY = ''

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Justin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
PREDICTION_PATH = './predictions'
TEST_SETS_PATH = './test_sets'
MODEL_EVALS_URL = 'https://squad-model-evals.s3-us-west-2.amazonaws.com/model_db.json'

#SET_NAMES = ['Amazon', 'Reddit', 'New-Wiki', 'NYT', 'dev-v1.1']
SET_NAMES = ['Amazon', 'Reddit', 'New-Wiki', 'NYT']

In [3]:
# Get the SQuAD training set data for later comparisons

with open(TEST_SETS_PATH + '/train-v2.0.json') as f:
    s_json = json.load(f)
    
full_squad_text = ' '.join([x['context'] for y in s_json['data'] for x in y['paragraphs']])    

squad_freqdist = nltk.FreqDist(nltk.tokenize.word_tokenize(full_squad_text))
squad_freqdist_lower = nltk.FreqDist(nltk.tokenize.word_tokenize(full_squad_text.lower()))

# SQuAD lexical diversity
print('SQuAD Training Lexical Diversity:', len(set(nltk.tokenize.word_tokenize(full_squad_text)))/len(nltk.tokenize.word_tokenize(full_squad_text)))
print('SQuAD Training Lexical Diversity (lower case):', len(set(nltk.tokenize.word_tokenize(full_squad_text.lower())))/len(nltk.tokenize.word_tokenize(full_squad_text.lower())))

SQuAD Training Lexical Diversity: 0.043178504959439146
SQuAD Training Lexical Diversity (lower case): 0.03877918893738047


In [4]:
# Use the Words API to get results for each word that appears in one of the test sets or the SQuAD training set

# API key available via https://www.wordsapi.com/ and https://rapidapi.com/developer/dashboard
# Sample file available at https://s3.amazonaws.com/wordsapi/wordsapi_sample.zip

word_api_url = "https://wordsapiv1.p.rapidapi.com/words/{}"
word_api_headers = {
    'x-rapidapi-host': "wordsapiv1.p.rapidapi.com",
    'x-rapidapi-key': WORDS_API_KEY
    }

# Words API provided a sample file of some of their results for free. 
# Don't want to waste API calls requesting results we already have.

# Load the sample file into wordsapi_dict to filter on.
with open('datafiles\wordsapi_sample.json', encoding='utf8') as f:
    words_api_dict = json.load(f)
    
# Create a Stanford CoreNLP pipeline for use later.     
nlp = stanfordnlp.Pipeline(processors='tokenize', use_gpu=True)    

Use device: gpu
---
Loading: tokenize
With settings: 
{'model_path': 'C:\\Users\\Justin\\stanfordnlp_resources\\en_ewt_models\\en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Done loading processors!
---


In [5]:
def fetch_eval_file(eval_file_path, model_evals_url, overwrite=False):
    if (not os.path.exists(eval_file_path)) or overwrite:
        r = requests.get(model_evals_url)
                        
        with open(eval_file_path, 'w', encoding='utf-8') as outfile:
            outfile.write(r.text)

    else:
        print('File Exists')
    
    

def write_output(output_file_path, list_to_write):
    fields = list_to_write[0].keys()
    
    with open(output_file_path, 'w', newline='', encoding='utf-8') as csv_file:
        csv_writer = csv.DictWriter(csv_file, 
                                    fieldnames=fields,
                                    delimiter=',', 
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL )
        csv_writer.writeheader()
        csv_writer.writerows(list_to_write)

def parse_predictions(prediction_file_path, download=False):
    
    with open(prediction_file_path) as f:
      predictions = json.load(f)


    pred_list_test = [{ 'model_display_name': x['name'], 
      'model_name': x['metadata']['name'], 
      'description': x['metadata']['description'], 
      'uuid': x['metadata']['uuid'],
      'testbed': x['testbed'],
      'predictions': x['predictions']

     } for x in predictions]

    pred_list = []

    for r in predictions:

      display_name = r['name']
      model_name = r['metadata']['name']
      description = r['metadata']['description']
      uuid = r['metadata']['uuid']
      testbed = r['testbed']

      for k1, v1 in r['predictions'].items():
        if k1 in (SET_NAMES):
          if 'bundle' in v1.keys():
            test_set = k1
            bundle = v1['bundle']

            for k2, v2 in v1['data'].items():
              qid = k2
              predicted_answer = v2
              exact_match = v1['scores'][qid]['exact_match']
              f1 = v1['scores'][qid]['f1']

              pred_list.append( {
                'display_name': display_name,
                'model_name': model_name,
                'description': description,
                'uuid': uuid,
                'testbed': testbed,
                'test_set': test_set,
                'qid': qid,
                'predicted_answer': predicted_answer,
                'exact_match': exact_match,
                'f1': float(f1)
              })
   
    return pred_list

def load_data(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        return [r for r in csv_reader]

def parse_answers(answer_file_path):
    test_set_answers = [a for a in os.listdir(answer_file_path) if not os.path.isdir('/'.join([answer_file_path, a]))]
    answers_list = []
    
    for f in test_set_answers:
      with open('/'.join([TEST_SETS_PATH, f])) as fh:
          test_set = f.split('.')[0]
          
          answers = json.load(fh)['data']
          for x in answers:
              title = x['title']
    
              for p in x['paragraphs']:
                  context = p['context']
    
                  for qa in p['qas']:
                      question = qa['question']
                      question_id = qa['id']
    
                      for a in qa['answers']:
                          answers_list.append(
                                  {
                                      'test_set': test_set,
                                      'question_id': question_id,
                                      'title': title,
                                      'context': context,
                                      'question_text': question,
                                      'answer_text': a['text'],
                                      'answer_start': a['answer_start']
                                  }
                              )
    return answers_list

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
  
    def remove_articles(text):
        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
        return re.sub(regex, ' ', text)
    def white_space_fix(text):
        return ' '.join(text.split())
    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punc(lower(s))))

def get_tokens(s):
    if not s: return []
    return normalize_answer(s).split()

def compute_exact(question_id, predicted_answer, all_answers):
    gold_answers = [normalize_answer(x['answer_text']) for x in all_answers if x['question_id'] == question_id]
    return max((int(normalize_answer(predicted_answer) == a) for a in gold_answers))

def compute_f1(question_id, predicted_answer, all_answers):
    gold_toks = [get_tokens(x['answer_text']) for x in all_answers if x['question_id'] == question_id]
    pred_toks = get_tokens(predicted_answer)
    
    f1s = []
  
    for answer_toks in gold_toks:
        common = collections.Counter(answer_toks) & collections.Counter(pred_toks)
        num_same = sum(common.values())
      
        if len(answer_toks) == 0 or len(pred_toks) == 0:
            # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
            f1s.append(float(int(answer_toks == pred_toks)))
            continue
        if num_same == 0:
            f1s.append(0.0)
            continue
            
        precision = 1.0 * num_same / len(pred_toks)
        recall = 1.0 * num_same / len(answer_toks)
        f1 = (2 * precision * recall) / (precision + recall)

        f1s.append(f1)

    return float(max(f1s))

def print_answer(qid, all_answers):
    question = [q for q in all_answers if q['question_id'] == qid]
    answers = [a['answer_text'] for a in question]
    
    if question:
        print('Test Set:', question[0]['test_set'])
        print('Context:', question[0]['context'])
        print('Question:', question[0]['question_text'])
        print('Answers:', answers)
        
def get_all_stanford_metrics(txt):
    subtree_value = ''
    ners = []
    ners_count = 0
    sentence_count = 0
    word_count = 0 
    character_count = 0
    
    try:
        with CoreNLPClient(endpoint='http://localhost:{}'.format(NLP_PORT), start_server=False, timeout=30000) as client:

            ann = client.annotate(txt)
            
            sentence_count = len(ann.sentence)
            words = [x.word for s in ann.sentence for x in s.token if x.word not in string.punctuation]
            word_count = len(words)
            character_count = sum([len(x) for x in words])
            
            for s in ann.sentence:
                    if s.mentions:
                        for m in s.mentions:
                            ners.append(m.entityType)
                            ners_count += 1
            
            constituency_parse = sentence.parseTree
            subtree_value = constituency_parse.child[0].value
        
        return subtree_value, ners, ners_count, sentence_count, word_count, character_count
        
    except Exception as e:
        return e.args[0], e.args[0], e.args[0], e.args[0], e.args[0], e.args[0]
    
def get_stanford_counts(txt):
    sentence_count = 0
    word_count = 0 
    character_count = 0
    
    try:
        doc = nlp(txt)
        sentence_count = len(doc.sentences)
        words = [w.text for s in doc.sentences for w in s.words if w.text not in string.punctuation]
        word_count = len(words)
        character_count = sum([len(x) for x in words])
        
        return sentence_count, word_count, character_count, words
        
    except Exception as e:
        return e.args[0], e.args[0], e.args[0]
        

# Take a list of words, filter out any that were in the sample, and sent to Words API.
# All results are aggregated into a list of dicts objects.

def get_wordsapi_result(word_list):
    results = []
    word_set = set(word_list).difference(set(wordsapi_dict.keys()))
    for w in word_set:
        try:
            response = requests.request("GET", word_api_url.format(w), headers=word_api_headers)
            result_json = response.json()
            result_json['word'] = w
            results.append(result_json)
        except Exception as e:
            response = {'word': w, 'success':  False, 'message': str(e)}
            print(w)
    return results        

In [8]:
# Test Server
try:
    txt = 'This is a test sentence. So is this.'
    with CoreNLPClient(endpoint='http://localhost:{}'.format(NLP_PORT), start_server=False, timeout=30000) as client:
        ann = client.annotate(txt)
        print('Server running. Found {} sentences'.format(len(ann.sentence)))
except Exception as e:
    print(e)

Server running. Found 2 sentences


In [9]:
# Download the model_db.json file that contains all the pre-evaluated and scored questions
# from the previous groups' work, if it doesn't exist yet.

fetch_eval_file(PREDICTION_PATH + '/model_db.json', MODEL_EVALS_URL, overwrite=False)

File Exists


In [10]:
# If predictions and/or answer files don't exist, uncomment these to recreate them

predictions = parse_predictions(PREDICTION_PATH + '/model_db.json')
answers = parse_answers(TEST_SETS_PATH)

write_output(PREDICTION_PATH + '/all_predictions.csv', predictions)
write_output(PREDICTION_PATH + '/all_answers.csv', answers)

In [11]:
# Otherwise, load from files
predictions = load_data(PREDICTION_PATH + '/all_predictions.csv')
answers = load_data(PREDICTION_PATH + '/all_answers.csv')   

In [12]:
# Load into Pandas dataframes

df_pred = pd.DataFrame(predictions)
df_answers = pd.DataFrame(answers)

df_pred = df_pred.astype({'f1': 'float'})

In [13]:
df_answers['is_numeric'] = df_answers.apply(lambda row: row['answer_text'].isnumeric(), axis=1)
df_answers[df_answers['is_numeric']]

Unnamed: 0,test_set,question_id,title,context,question_text,answer_text,answer_start,is_numeric
66,amazon_reviews_v1,5dd4661fcc027a086d65bc77,Amazon_Reviews_530,"i wanted an electric kettle, but landed up ord...",How many irritations are there?,2,169,True
67,amazon_reviews_v1,5dd4661fcc027a086d65bc77,Amazon_Reviews_530,"i wanted an electric kettle, but landed up ord...",How many irritations are there?,2,169,True
68,amazon_reviews_v1,5dd4661fcc027a086d65bc77,Amazon_Reviews_530,"i wanted an electric kettle, but landed up ord...",How many irritations are there?,2,169,True
151,amazon_reviews_v1,5dd4673dcc027a086d65bcec,Amazon_Reviews_295,I ordered these sheets and must say was a bit ...,what is the thread count on the sheets?,1500,91,True
152,amazon_reviews_v1,5dd4673dcc027a086d65bcec,Amazon_Reviews_295,I ordered these sheets and must say was a bit ...,what is the thread count on the sheets?,1500,126,True
...,...,...,...,...,...,...,...,...
193716,train-v2,5735cc33012e2f140011a06c,Kathmandu,Sikhism is practiced primarily in Gurudwara at...,About how many Christian houses of worship exi...,170,728,True
193719,train-v2,5735d0026c16ec1900b92817,Kathmandu,"Institute of Medicine, the central college of ...",When did the Institute of Medicine begin to of...,1978,219,True
193733,train-v2,5735d1a86c16ec1900b92832,Kathmandu,The main international airport serving Kathman...,"Starting in the center of Kathmandu, how many ...",6,134,True
193734,train-v2,5735d1a86c16ec1900b92833,Kathmandu,The main international airport serving Kathman...,How many airlines use Tribhuvan International ...,22,297,True


In [14]:
df_distinct_answers = pd.DataFrame({'answer_text': df_answers['answer_text'].unique()})
df_distinct_answers[['first_parse', 'ners', 'ners_count', 'sentence_count', 'word_count', 'word_character_count', ]] = df_distinct_answers.apply(lambda row: get_all_stanford_metrics(row['answer_text']), axis=1, result_type='expand')

In [15]:
df_distinct_answers.fillna(value = {'ners':'_NO_NER'}).groupby(['ners']).count()

Unnamed: 0_level_0,answer_text,first_parse,ners_count,sentence_count,word_count,word_character_count
ners,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CoreNLP request timed out. Your document may be too long.,2,2,2,2,2,2
name 'sentence' is not defined,114789,114789,114789,114789,114789,114789


In [16]:
df_distinct_context = df_answers[['test_set','context']].drop_duplicates().reset_index()

In [17]:
df_distinct_context[['sentence_count', 'word_count', 'word_character_count', 'words']] = df_distinct_context.apply(lambda row: get_stanford_counts(row['context']), axis=1, result_type='expand')

### Use Words API to Get Better Syllable Counts

In [18]:
# Word lists are already in the distinct_in_test_lower column of the df_test_sets dataframe from above.
# Iterate through all of those and write the results to files.

### Commented out to avoid accidental re-run

# for test_set in df_test_sets['test_set'].unique():
#     word_list = list(df_test_sets[df_test_sets['test_set'] == test_set]['distinct_in_test_lower'].values[0])
#     results = get_wordsapi_result(word_list)
#     with open('datafiles/wordsapi_{}.json'.format(test_set), 'w', encoding='utf8') as f_out:
#         json.dump(results, f_out)

# After all the API calls complete, read the output files and combine the results into a new dataframe: syll_df

combined_results = []
for f in os.listdir('datafiles/'):
    
    # Filter out the sample json file originally provided (it's formatted a little differently).
    # Also filtering out the squad results... not necessary the first time this runs, given it wouldn't yet exist.
    
    if f not in ('wordsapi_sample.json', 'wordsapi_squad.json'):
        with open('datafiles/{}'.format(f), 'r', encoding='utf8') as f_open:
            x = json.load(f_open)
            combined_results += x
            
syll_df_2 = pd.DataFrame(x)

# For this, we only care about the syllables data, so we filter out any rows that didn't have it
# (Words API claims that 44% of their words have syllable info)

# We also just pull the word and syllables columns, then replace the existing syllables columns with just the syllable count
syll_df_2 = syll_df_2[~syll_df_2['syllables'].isna()][['word','syllables']].reset_index(drop=True)
syll_df_2['syllables'] = syll_df_2.apply(lambda row: row['syllables']['count'], axis = 1)            

# Parse and filter the original sample file. The data here is formatted a little differently, so we don't reuse the previous logic.

with open('datafiles/{}'.format('wordsapi_sample.json'), 'r', encoding='utf8') as f_open:
    x = json.load(f_open)

syll_df_2 = pd.concat([syll_df_2, pd.DataFrame([{'word': k, 'syllables': v['syllables']['count']} for k, v in x.items() if 'syllables' in x[k].keys()])]).reset_index(drop=True)   

# Now to get the SQuAD words we need. Again, don't want to waste API calls on words we've already checked, so we filter those out first.

# We have the list FreqDist of SQuAD words already in squad_freqdist_lower from before, so we can use that to get the list of words. 
squad_minus_checked = list(set(squad_freqdist_lower.keys()).difference(set(syll_df_2['word'].to_list())))

# Then we can pass that same list to our get_wordsapi_result() function from before... we'll write the results to a file again.

### Commented out to avoid accidental re-run

# results = get_wordsapi_result(squad_minus_checked)
# with open('datafiles/wordsapi_squad.json', 'w', encoding='utf8') as f_out:
#     json.dump(results, f_out)

# We can pull the data back in using the same method used for the other API results, and concat it with the existing dataframe.

with open('datafiles/wordsapi_{}.json'.format('squad'), 'r', encoding='utf8') as f_open:
    x = json.load(f_open)
    
df_squad_syll = pd.DataFrame(x)
df_squad_syll = df_squad_syll[~df_squad_syll['syllables'].isna()][['word','syllables']].reset_index(drop=True)
df_squad_syll['syllables'] = df_squad_syll.apply(lambda row: row['syllables']['count'], axis = 1)        

syll_df_2 = pd.concat([syll_df_2, df_squad_syll]).reset_index(drop=True) 

# Finally, we found that the Words API consistently counts 1 syllable too few for hyphenated words...
display(syll_df_2[syll_df_2['word'].str.contains('-')])

# ... so we increment their counts by one to compensate.

syll_df_2.loc[syll_df_2['word'].str.contains('-'), 'syllables'] = syll_df_2['syllables'] + 1 

# And we write the results so we don't have to do this again...
syll_df_2 = syll_df_2.drop_duplicates().reset_index(drop=True)
syll_df_2.to_csv('./syllable_counts.csv', index=False)

# Now we need to update the df_distinct_context syllables counts and re-populate any dependent metrics
syll_df_2 = pd.read_csv('./syllable_counts.csv', keep_default_na=False)
syll_2_dict = syll_df_2.set_index('word').to_dict('index')

Unnamed: 0,word,syllables
1,in-between,2
14,yo-yo,1
77,twenty-third,2
114,clean-living,2
166,tip-off,1
...,...,...
50013,anti-catholicism,5
50058,anti-french,2
50061,hurdy-gurdy,3
50186,child-bearing,2


In [19]:
def get_syllables(word):
    if word in syll_2_dict.keys():
        syll_count =  syll_2_dict['word']['syllables']
    elif len(word) < 100:
        syll_count = len(Hyphenator('en_US').syllables(word))
    else:
        syll_count = 0
    
    return syll_count

In [20]:
#.map_partitions(lambda df: df.apply(lambda row: [max(1, len(Hyphenator('en_US').syllables(x))) if len(str(x)) < 100 else -1 for x in row['words'] ], axis = 1)) \

syll_df = dd.from_pandas(df_distinct_context, npartitions = 2*multiprocessing.cpu_count()) \
            .map_partitions(lambda df: df.apply(lambda row: [get_syllables(x) for x in row['words'] ], axis = 1)) \
            .compute(scheduler='processes')

In [21]:
df_distinct_context['syllables_per_word'] = syll_df

df_distinct_context['polysyllable_count'] = df_distinct_context.apply(lambda row: len([x for x in row['syllables_per_word'] if x > 1]), axis = 1)
df_distinct_context['avg_word_length'] = df_distinct_context.apply(lambda row: sum([len(x) for x in row['words']])/row['word_count'], axis = 1)

df_distinct_context['avg_sentence_length_in_words'] = df_distinct_context['word_count']/df_distinct_context['sentence_count']
df_distinct_context['context_character_count'] = df_distinct_context.apply(lambda row: len(row['context']), axis=1)
df_distinct_context['avg_sentence_length_in_characters'] = df_distinct_context['context_character_count']/df_distinct_context['sentence_count']
df_distinct_context['syllables_per_word'] = df_distinct_context.apply(lambda row: sum([x for x in row['syllables_per_word'] if x > 0])/ len([x for x in row['syllables_per_word'] if x > 0]) , axis=1)
df_distinct_context['flesch-kincaid_grade_level'] = df_distinct_context.apply(lambda row: (0.39 * row['avg_sentence_length_in_words']) + (11.8 * row['syllables_per_word']) - 15.59, axis=1)

df_distinct_context['coleman-liau'] = df_distinct_context.apply(lambda row: (0.0588 * (row['avg_word_length']) * 100) - (0.296 * (100/row['avg_sentence_length_in_words'])) - 15.8, axis=1)
df_distinct_context['gunning-fog'] = df_distinct_context.apply(lambda row: 0.4 * ((row['word_count'] / row['sentence_count']) + ((row['polysyllable_count'] / row['word_count']) * 100)), axis=1)
df_distinct_context['automated-readability'] = df_distinct_context.apply(lambda row: 4.71 * (row['context_character_count'] / row['word_count']) + 0.5 * (row['word_count'] / row['sentence_count']) - 21.43, axis=1)

df_distinct_context['lexical_diversity'] = df_distinct_context.apply(lambda row: len(set(row['words']))/row['word_count'], axis=1)
df_distinct_context['lexical_diversity_lower'] = df_distinct_context.apply(lambda row: len(set([x.lower() for x in row['words']]))/len([x.lower() for x in row['words']]), axis=1)

df_distinct_context['nltk_sentence_count'] = df_distinct_context.apply(lambda row: len(nltk.FreqDist(nltk.tokenize.sent_tokenize(row['context'].lower()))), axis=1)

In [22]:
df_merged_answers = df_answers.merge(df_distinct_answers, on=['answer_text'])   

In [23]:
df_merged_answers['is_numeric'] = df_merged_answers.apply(lambda row: row['answer_text'].isnumeric(), axis=1)

In [24]:
df_merged_answers_and_context = df_merged_answers.merge(df_distinct_context, on=['context'])   

In [25]:
df_pred_answers_context = df_pred.merge(df_merged_answers_and_context, left_on=['qid'], right_on=['question_id'])

In [26]:
df_pred_answers_context['exact_match'] = df_pred_answers_context['exact_match'].map({'True':True, 'False':False})

In [27]:
df_answers.to_csv('answers.csv', index=False)
df_distinct_answers.to_csv('distinct_answers.csv', index=False)
df_distinct_context.to_csv('distinct_context.csv', index=False)
df_merged_answers.to_csv('merged_answers.csv', index=False)
df_merged_answers_and_context.to_csv('merged_answers_and_context.csv', index = False)
df_pred_answers_context.to_csv('pred_answers_context.csv', index=False)

In [30]:
del df_answers
del df_distinct_answers
del df_distinct_context
del df_merged_answers
del df_merged_answers_and_context
del df_pred_answers_context
del df_pred

In [28]:
# df_answers = pd.read_csv('answers.csv')
# df_distinct_answers = pd.read_csv('distinct_answers.csv')
# df_distinct_context = pd.read_csv('distinct_context.csv')
# df_merged_answers = pd.read_csv('merged_answers.csv')
# df_merged_answers_and_context = pd.read_csv('merged_answers_and_context.csv')
# df_pred_answers_context = pd.read_csv('pred_answers_context.csv')
# df_pred = pd.DataFrame(load_data(PREDICTION_PATH + '/all_predictions.csv'))