In [None]:
# Line magic functions that will allow for imports to be reloaded and not cached
%load_ext autoreload
%autoreload 2

# Imports
import os
import pandas as pd
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.azure_openai import AzureOpenAI
from datetime import date
from json import dump
from tqdm import tqdm

# Local
from scripts.extractmd import Extractor
from scripts.vectorindex import QnAEngine
from scripts.utilities import get_prompt_dict, get_questions, get_answers, get_procurement_content, get_config_data, get_ini_files, get_supplementary_info, get_questions_without_q0
from scripts.gen_results import gen_results
from scripts.gen_reports import generate_precision_report

**Global config**

In [None]:
embedding_conf = {
    "embeddingmodel": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",  # "BAAI/bge-m3" "nomic-ai/nomic-embed-text-v2-moe" # "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
    "chunk_size": 1536,
    "chunk_overlap": 0,
    "top_similar": 5,
    "n4rerank": 0, #How many nodes to retrieve for reranking. If 0, reranker is not used
    "use_similar_chunks": True, #To use similar chunks or the whole document as the context
    "prevnext": True #to include in the context also the previouse and the next chunk of the current similar chunk
}
embedding=HuggingFaceEmbedding(model_name=embedding_conf["embeddingmodel"],trust_remote_code=True)

#For nomic-embed-text-v2-moe
#embedding=HuggingFaceEmbedding(model_name=embedding_conf["embeddingmodel"],trust_remote_code=True,query_instruction="search_query: ",text_instruction="search_document: ")

In [None]:
# LLM Setup 
llmmodelAzure = { "model": "gpt-4o",
                "version":os.environ.get('AZURE_OPENAI_VERSION',''),
                "azure_deployment":"gpt-4o",
                "azure_endpoint":os.environ.get('AZURE_ENDPOINT',''),
                "api_key":os.environ.get('AZURE_OPENAI_KEY','')}

llm=AzureOpenAI(azure_deployment=llmmodelAzure["azure_deployment"],
                azure_endpoint=llmmodelAzure["azure_endpoint"],temperature=0.0,
                api_version=llmmodelAzure["version"], api_key=llmmodelAzure["api_key"],
                timeout=120,max_retries=3,top_p=0.0001)

In [None]:
extractor = Extractor() # Markdown doc extractor

In [None]:
#Ollama model
# llmmodelOllama = { "model": "gemma3:27b",
#                 "url":os.environ.get('OLLAMA_ENDPOINT',''),
#                 "context_window":"128000"}

#from llama_index.llms.ollama import Ollama
#llm = Ollama(base_url=llmmodelOllama["url"],
#             model=llmmodelOllama["model"], 
#             context_window=int(llmmodelOllama["context_window"]),
#            request_timeout=300.0,
#            temperature=0.0,
#            additional_kwargs={"seed":1337})

**PROCUREMENT FILE SETTINGS**

In [None]:
# Script dir for getting relative paths for notebook file
script_dir = globals()['_dh'][0] 

# Document paths
question_file_path = script_dir / "questions" / "questions.yaml" # original.yaml
prompt_file = script_dir / "questions" / "prompts.tsv"
report_dir = script_dir / "reports"
config_dir = script_dir / "dev_config" # "dev_config" # "config"
procurement_file_dir = script_dir / "cfla_files" # "cfla_files"
answer_file_dir = script_dir / "answers"

# TODO perhaps prompt user to define unique report name; some types - all; one etc?
report_identifier = "dev-test"
# TODO maybe add report as a subdirectory as there are 2 files per report; might be even more with histograms etc.
report_name = f"{report_identifier}_{date.today():%d.%m}"

report_dir_path = report_dir / report_name
report_path_htm = report_dir_path / "report.htm"
report_path_csv = report_dir_path / "report.csv"
report_path_config = report_dir_path / "config.json"

In [None]:
# Loading static information
overwrite = False  # If true this will delete the existing report and generate a new one;
                  # Else - new data will be appended only if it isn't in the CSV file.

question_dictionary = get_questions(question_file_path)
prompt_dictionary = get_prompt_dict(prompt_file)
supplementary_info = get_supplementary_info()

ini_files = get_ini_files(config_dir, overwrite, report_path_csv)
print(f"Processing {len(ini_files)} procurement files: {sorted(ini_files)}")

**MAIN Q/A GENERATION SCRIPT**

In [None]:
# TODO add parallel prompting

if overwrite: # overwrtitting report; Delete and create new
        if report_path_htm.exists():
                report_path_htm.unlink()
        if report_path_csv.exists():
                report_path_csv.unlink()
        if report_path_config.exists():
                report_path_config.unlink()
                
if not os.path.exists(report_dir_path):
        os.makedirs(report_dir_path)

# Make config dictionary and save as json
config_dict = embedding_conf
config_dict["model"] = llmmodelAzure["model"]
config_dict["temperature"] = llm.temperature
config_dict["temperature"] = llm.timeout
config_dict["max_retries"] = llm.max_retries
#config_dict["top_p"] = 0.0001

with open(report_path_config, "w") as file:
        dump(config_dict, file) 

# To process all questions, leave this empty. Otherwise, specify the exact numbers.
# To process a sub-question, include the parent question number as well, e.g., ["9", "9.2"].
questions_to_process = ["16"] 

for file in tqdm(ini_files, desc="Config files", unit="file"): # TODO fix TQDM; expected that after first file finishes it shows the aproximation of all the other files left to process
        configfile = config_dir / f"{file}.ini"
        
        tqdm.write(f"Processing config file: {configfile}")
        procurement_id, procurement_file, agreement_file, answer_file = get_config_data(configfile, procurement_file_dir, answer_file_dir)
        # Open CSV file, maybe as pandas dataframe
        answer_dictionary = get_answers(answer_file)

        # Getting markdown text from procurement doc
        procurement_content = get_procurement_content(extractor, procurement_file, agreement_file)
    
        # Creating FAISS vector index for the procurement document
        qnaengine = QnAEngine(embedding,llm)
        await qnaengine.createIndex(
                procurement_content,
                "Procurement",
                chunk_size=embedding_conf["chunk_size"],
                chunk_overlap=embedding_conf["chunk_overlap"]
                )

        ### Generating results
        results_table = gen_results(qnaengine, configfile, embedding_conf, question_dictionary, answer_dictionary, prompt_dictionary, supplementary_info, questions_to_process)
        
        # add "Iepirkuma ID" as procurement_id to results table
        # TODO move this inside gen results function once it has been refactored
        for row in results_table:
                row.insert(0, file)
        
        ### Save output
        data = pd.DataFrame(results_table, columns=["Iepirkuma ID", "Nr", "Atbilde", "Sagaidāmā atbilde", "Pamatojums", "Chunk", "Prompt"])
        precision = (data['Atbilde'] == data['Sagaidāmā atbilde']).sum()/len(data)
        print(f"PRECIZITĀTE: {precision*100}%")

        with report_path_htm.open('a', encoding='utf-8') as ofile:  
                # TODO Create a dropdown menu that lets the user select file by "Iepirkuma ID" - in each page only information that has that "Iepirkuma ID" is displayed
                print(f"{procurement_id} PRECIZITĀTE: {precision*100}%" ,file=ofile)
                print(data.to_html(index=False).replace('\\n','<br>'),file=ofile)

        data.to_csv(report_path_csv, 
                    mode='a', 
                    index=False, 
                    header=not report_path_csv.exists(), # only adding one header
                    encoding='utf-8')
        
        
                

# Question precision data report

In [None]:
# Load report we want to analayze
analyzed_report_dir = report_dir_path
csv_filename = "report.csv"
input_csv = analyzed_report_dir / csv_filename

In [None]:
# Generate report
precison_report_html = generate_precision_report(input_csv)

# Save the report next to the CSV
output_html = analyzed_report_dir / f"precision_report.html"
with open(output_html, "w", encoding="utf-8") as f:
    f.write(precison_report_html)

print(f"HTML report saved to: {output_html}")

#### Questions that can't be answered count

In [None]:
# Count number of times expected_answer == "n/a; if question does not have 0th question
questions_wout_0q = get_questions_without_q0(question_dictionary)
df = pd.read_csv(input_csv)
df["Sagaidāmā atbilde"] = df["Sagaidāmā atbilde"].astype(str).str.strip().str.lower()
filtered_df = df[
    (df["Sagaidāmā atbilde"] == "nan") &
    (df["Nr"].astype(str).isin(questions_wout_0q))
]
print(f"\nCount of rows where 'Sagaidāmā atbilde' == 'n/a' and it doesn't have q0: {len(filtered_df)}")

# Confusion Matrix

In [None]:
# Load and normalize data as "n/a" has turned into NaN
df = pd.read_csv(input_csv)
df["Sagaidāmā atbilde"] = df["Sagaidāmā atbilde"].astype(str).str.strip().str.lower()
df["Atbilde"] = df["Atbilde"].astype(str).str.strip().str.lower()

# Replace missing values
df["Sagaidāmā atbilde"] = df["Sagaidāmā atbilde"].replace(["nan", "none", ""], "n/a")
df["Atbilde"] = df["Atbilde"].replace(["nan", "none", ""], "n/a")

# Group context responses
df["Atbilde"] = df["Atbilde"].replace({
    "x": "kontekstā nav informācijas",
    "kontekstā nav informācijas": "kontekstā nav informācijas"
})

# Define value order
expected = ["jā", "nē", "n/a"]
actual = ["jā", "nē", "n/a", "kontekstā nav informācijas"]

conf_matrix = pd.crosstab(
    df["Sagaidāmā atbilde"],
    df["Atbilde"],
    rownames=["Expected ↓"],
    colnames=["Actual →"],
    dropna=False
).reindex(index=expected, columns=actual, fill_value=0)
conf_matrix.index.name = "Expected ↓ / Actual →"

print("\nConfusion Matrix:\n")
print(conf_matrix.to_markdown(tablefmt="grid"))


In [None]:
# Get correct counts for "jā" and "nē"
correct_yes = conf_matrix.loc["jā", "jā"]
total_yes = conf_matrix.loc["jā"].sum()
print(f"total 'jā' count: {total_yes}")

correct_no = conf_matrix.loc["nē", "nē"]
total_no = conf_matrix.loc["nē"].sum()
print(f"total 'nē' count: {total_no}")

correct_na = conf_matrix.loc["n/a", "n/a"]
total_na = conf_matrix.loc["n/a"].sum()
print(f"total 'n/a' count: {total_na}")

# Calculate percentages
yes_accuracy = (correct_yes / total_yes) * 100 if total_yes > 0 else 0
no_accuracy = (correct_no / total_no) * 100 if total_no > 0 else 0
na_accuracy = (correct_na / total_na) * 100 if total_na > 0 else 0

print(f"\nAccuracy for 'jā': {yes_accuracy:.2f}%")
print(f"Accuracy for 'nē': {no_accuracy:.2f}%")
print(f"Accuracy for 'n/a': {na_accuracy:.2f}%")


In [None]:
total_q = total_yes + total_no + total_na
print(f"Total question count: {total_q}")
correct_q = correct_yes + correct_no + correct_na

precision = round((correct_q / total_q) * 100, 2)
print(f"Total precision: {precision}%")

precison_wout_unanswerable_qs = round(correct_q / (total_q - len(filtered_df))*100,2)
print(f"Total precision without unanswerable (n/a) questions: {precison_wout_unanswerable_qs}%")

context_missing_total = conf_matrix["kontekstā nav informācijas"].sum()
confident_answers =  total_q - len(filtered_df) - context_missing_total
precision_wout_unanswerable_qs_nocontext = round((correct_q / confident_answers)*100,2)
print(f"Total precision without unanswerable (n/a) questions and when LLM is not unsure: {precision_wout_unanswerable_qs_nocontext}%")
print(f"Total question count without unanswerable (n/a) questions and when LLM is not unsure: {confident_answers}")