In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import glob
import os

import pandas as pd
pd.set_option('display.max_colwidth', 255)


from dpp_helpline_qa.model_validation.model_validation import cal_em_score, calculate_semantic_similarity
from dpp_helpline_qa.modelling.question_answer import load_model_flan, answer_question_flan
from dpp_helpline_qa.modelling.semantic_search import load_model_ss, context_ranking
from dpp_helpline_qa.preprocessing.preprocessing import process_docs

In [None]:
import yaml

with open('../config/config.yaml') as cf_file:
    config = yaml.safe_load(cf_file.read())
    
max_length = config['preprocessing']['max_context_length']
vector_method = config['preprocessing']['vector_simalirity_method']
nclus_ivf = config['preprocessing']['nclus_ivf']
model_semantic = config['modelling']['semantic_model']
qna_model = config['modelling']['qna_model']
use_gpu = config['modelling']['use_gpu']
top_k_context = config['modelling']['top_k_context']
min_ans_length = config['modelling']['min_ans_length']
max_ans_length = config['modelling']['max_ans_length']
no_repeat_ngram_size = config['modelling']['no_repeat_ngram_size']

In [None]:
files = glob.glob(os.path.join("..", "data", "*", "*.pdf"))

In [None]:
model_ss, tokenizer_ss = load_model_ss(model_semantic)


In [None]:
output_path = '../output'

In [None]:
# load and pre-process the documents to prepare for searching
import time
st = time.time()
para_dfs = process_docs(files, output_path, model_ss, tokenizer_ss, max_length, vector_method, nclus_ivf) 
para_dfs[0].head()
time.time() -st

In [None]:
# identify the pre-processed files for searching
op_files = glob.glob(os.path.join(output_path,'*/*.*'))
len(op_files), op_files

In [None]:
# load the model and tokenizer for question and answering
model_qa, tokenizer_qa = load_model_flan(qna_model, use_gpu)

In [None]:
# automatic evaluation process
import time
st = time.time()
answers = pd.read_excel('LLM_QA.xlsx')
context = []
final_ans = []
EM_score_ans = []
Sbert_score_ans = []
NLP_score_ans = []
EM_score_context = []
Sbert_score_context = []
NLP_score_context = []
main_context_store = []
for con in range(1, top_k_context + 1):
    exec(f'ContextRef_{con} = []')
no_ques = answers.shape[0]
for i in range(no_ques): #
    question = answers['Question'][i]
    topic = answers['Primary Topic'][i]
    actual_ans = answers['Answer'][i]
    op_files = glob.glob(os.path.join(output_path,topic,'*.csv'))
    context_df = context_ranking(question, op_files, model_ss, tokenizer_ss, vector_method)
    # answer generated from top 5 contexts
    main_context = '\n'.join(context_df['content'].values[0:top_k_context])
    # to store in csv with marking
    main_context_store = '\n------\n'.join(context_df['content'].values[0:top_k_context])
    # answer generated from only 1st context
    context.append(main_context_store)
    # QA
    output = answer_question_flan(model_qa, tokenizer_qa, main_context, question, use_gpu, min_ans_length,
    max_ans_length, no_repeat_ngram_size)
    final_ans.append(output)
    # output scoring
    EM_score_ans.append(cal_em_score(output, actual_ans))
    sim_score_ans = calculate_semantic_similarity(model_ss, tokenizer_ss, output, actual_ans) #model_val
    Sbert_score_ans.append(sim_score_ans[1])
    NLP_score_ans.append(sim_score_ans[2])
    # context scoring
    EM_score_context.append(cal_em_score(main_context, actual_ans))
    sim_score_cnxt = calculate_semantic_similarity(model_ss, tokenizer_ss, main_context, actual_ans) #model_val
    Sbert_score_context.append(sim_score_cnxt[1])
    NLP_score_context.append(sim_score_cnxt[2])
    # adding context reference
    for con in range(0, top_k_context):
        temp = str(context_df["doc_name"].values[con])+ ';' + str(context_df["page"].values[con])
        var_list = globals()[f"ContextRef_{con+1}"] 
        var_list.append(temp)                                     

time.time() -st

In [None]:
answers['Extracted context'] = context
answers['Final answer'] = final_ans
answers['EM_Score_ans'] = EM_score_ans
answers['Sbert_score_ans'] = Sbert_score_ans
answers['NLP_score_ans'] = NLP_score_ans
answers['EM_Score_context'] = EM_score_context
answers['Sbert_score_context'] = Sbert_score_context
answers['NLP_score_context'] = NLP_score_context
for con in range(0, top_k_context):
    answers[f'ContextRef_{con+1}'] = globals()[f"ContextRef_{con+1}"] 
answers.to_csv('ques_score.csv', index=False)

In [None]:
for col in answers.columns[9:]:
    print(f"{col}_mean: {round(answers[col].mean(), 2)}")