In [1]:
# Line magic functions that will allow for imports to be reloaded and not cached
%load_ext autoreload
%autoreload 2

# Imports
import os
import pandas as pd
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.azure_openai import AzureOpenAI
from datetime import date
from json import dump
from tqdm import tqdm

# Local
from scripts.extractmd import Extractor
from scripts.vectorindex import QnAEngine
from scripts.utilities import get_prompt_dict, get_questions, get_answers, get_procurement_content, get_config_data, get_ini_files, get_supplementary_info, get_questions_without_q0
from scripts.gen_results import gen_results
from scripts.gen_precision_report import generate_precision_report
from scripts.main_report import build_main_report_html




**My Config:**
[my_config_template.py](scripts/my_config_template.py)

In [2]:
with open("scripts/my_config.py") as f:
    code = f.read()
    print(code) 
    exec(code)  

# -----------------------------------------------------------------------------
# Configuration file: my_config.py (see 'my_config_template.py' if you have not created your local version)
# -----------------------------------------------------------------------------
# This file is used to define **user-specific configuration parameters**
# separately from the Jupyter notebook. It allows each user to set their own
# values without modifying the shared notebook or interfering with others.
#
# Usage pattern in the notebook:
#     overwrite = globals().get('my_overwrite', False)
#
# This means:
# - If `my_overwrite` is defined in this config file, its value will be used.
# - If it is not defined, the default value (`False` in this case) will be used.
#
# You can define any custom variable here that the notebook expects via `globals().get(...)`.
#
#  In Jupyter, this script may be executed multiple times.
# Commenting out a variable won't reset it if it was previously defined.
# To revert 

**Global config**

In [3]:
embedding_conf = {
    "embeddingmodel": globals().get('my_embeddingmodel', "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"), 
        #"sentence-transformers/paraphrase-multilingual-mpnet-base-v2",  # "BAAI/bge-m3" "nomic-ai/nomic-embed-text-v2-moe" # "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
    "chunk_size": 1536,
    "chunk_overlap": 0,
    "top_similar": 5,
    "n4rerank": 0, #How many nodes to retrieve for reranking. If 0, reranker is not used
    "use_similar_chunks": globals().get('my_use_similar_chunks',True), #To use similar chunks or the whole document as the context
    "prevnext": True #to include in the context also the previouse and the next chunk of the current similar chunk
}
embedding=HuggingFaceEmbedding(model_name=embedding_conf["embeddingmodel"],trust_remote_code=True)

#For nomic-embed-text-v2-moe
#embedding=HuggingFaceEmbedding(model_name=embedding_conf["embeddingmodel"],trust_remote_code=True,query_instruction="search_query: ",text_instruction="search_document: ")

In [4]:
# LLM Setup 
llmmodelAzure = { "model": "gpt-4o",
                "version":os.environ.get('AZURE_OPENAI_VERSION',''),
                "azure_deployment":"gpt-4o",
                "azure_endpoint":os.environ.get('AZURE_ENDPOINT',''),
                "api_key":os.environ.get('AZURE_OPENAI_KEY','')}

llm=AzureOpenAI(azure_deployment=llmmodelAzure["azure_deployment"],
                azure_endpoint=llmmodelAzure["azure_endpoint"],temperature=0.0,
                api_version=llmmodelAzure["version"], api_key=llmmodelAzure["api_key"],
                timeout=120,max_retries=3,top_p=0.0001)

In [5]:
extractor = Extractor() # Markdown doc extractor

In [6]:
#Ollama model
# llmmodelOllama = { "model": "gemma3:27b",
#                 "url":os.environ.get('OLLAMA_ENDPOINT',''),
#                 "context_window":"128000"}

#from llama_index.llms.ollama import Ollama
#llm = Ollama(base_url=llmmodelOllama["url"],
#             model=llmmodelOllama["model"], 
#             context_window=int(llmmodelOllama["context_window"]),
#            request_timeout=300.0,
#            temperature=0.0,
#            additional_kwargs={"seed":1337})

**PROCUREMENT FILE SETTINGS**

In [7]:
# Script dir for getting relative paths for notebook file
script_dir = globals()['_dh'][0] 

# Document paths
question_file_path = script_dir / "questions" / "questions.yaml" # original.yaml
prompt_file = script_dir / "questions" / "prompts.tsv"
report_dir = script_dir / "reports"
config_dir = script_dir / globals().get('my_config_dir', "dev_config") # "dev_config" # "dev_config" # "config"
procurement_file_dir = script_dir / "cfla_files" # "cfla_files"
answer_file_dir = script_dir / "answers"

# TODO perhaps prompt user to define unique report name; some types - all; one etc?
report_identifier = globals().get('my_report_identifier', "dev-test")
# TODO maybe add report as a subdirectory as there are 2 files per report; might be even more with histograms etc.
report_today = f"{date.today():%d.%m}"
report_name = f"{report_identifier}_{globals().get('my_report_date', report_today)}"

report_dir_path = report_dir / report_name
report_path_htm = report_dir_path / "report.htm"
report_path_csv = report_dir_path / "report.csv"
report_path_config = report_dir_path / "config.json"

In [8]:
# Loading static information
overwrite = globals().get('my_overwrite', False)  
            # If true this will delete the existing report and generate a new one;
            # Else - new data will be appended only if it isn't in the CSV file.

question_dictionary = get_questions(question_file_path)
prompt_dictionary = get_prompt_dict(prompt_file)
supplementary_info = get_supplementary_info()

ini_files = globals().get('my_ini_files', get_ini_files(config_dir, overwrite, report_path_csv))
print(f"Processing {len(ini_files)} procurement files: {sorted(ini_files)}")

Questions loaded
Found 20 config files in C:\Repos\vpp-cfla\config
Processing 20 procurement files: ['APP_DI_20202ERAF_AK', 'DND_20206', 'EDI_20203AK', 'IKVD_20213_ESF_SAM', 'IP2020_01_ERAF', 'IeM_IC_201913', 'KNP_202215', 'KP2020016A-KF', 'LNP_201976', 'LNP_202037ERAF', 'LU_202049_ERAF', 'MNP_202121_ERAF', 'PND_2019_15-ERAF', 'RTK_2019_12', 'SND_202015-ERAF', 'SNP_202001', 'SNP_202067', 'VND_20201', 'VNP_2023057AK', 'VeA_202012ERAF']


**MAIN Q/A GENERATION SCRIPT**

In [9]:
# TODO add parallel prompting

if overwrite: # overwrtitting report; Delete and create new
        if report_path_htm.exists():
                report_path_htm.unlink()
        if report_path_csv.exists():
                report_path_csv.unlink()
        if report_path_config.exists():
                report_path_config.unlink()
                
if not os.path.exists(report_dir_path):
        os.makedirs(report_dir_path)

# Make config dictionary and save as json
config_dict = embedding_conf
config_dict["model"] = llmmodelAzure["model"]
config_dict["temperature"] = llm.temperature
config_dict["timeout"] = llm.timeout
config_dict["max_retries"] = llm.max_retries
#config_dict["top_p"] = 0.0001

with open(report_path_config, "w") as file:
        dump(config_dict, file) 

# To process all questions, leave this empty. Otherwise, specify the exact numbers.
# To process a sub-question, include the parent question number as well, e.g., ["9", "9.2"].
questions_to_process = globals().get('my_questions_to_process', [])

for file in tqdm(ini_files, desc="Config files", unit="file"): # TODO fix TQDM; expected that after first file finishes it shows the aproximation of all the other files left to process
        configfile = config_dir / f"{file}.ini"
        
        tqdm.write(f"Processing config file: {configfile}")
        procurement_id, procurement_file, agreement_file, answer_file = get_config_data(configfile, procurement_file_dir, answer_file_dir)
        # Open CSV file, maybe as pandas dataframe
        answer_dictionary = get_answers(answer_file)

        # Getting markdown text from procurement doc
        procurement_content = get_procurement_content(extractor, procurement_file, agreement_file)
    
        # Creating FAISS vector index for the procurement document
        qnaengine = QnAEngine(embedding,llm)
        if embedding_conf["use_similar_chunks"] == True:
                await qnaengine.createIndex(
                        procurement_content,
                        "Procurement",
                        chunk_size=embedding_conf["chunk_size"],
                        chunk_overlap=embedding_conf["chunk_overlap"]
                )
        else:
             await qnaengine.load_text(procurement_content)   

        ### Generating results
        results_table = gen_results(qnaengine, configfile, embedding_conf, question_dictionary, answer_dictionary, prompt_dictionary, supplementary_info, questions_to_process)
        
        # add "Iepirkuma ID" as procurement_id to results table
        # TODO move this inside gen results function once it has been refactored
        for row in results_table:
                row.insert(0, file)
        
        ### Save output
        data = pd.DataFrame(results_table, columns=["Iepirkuma ID", "Nr", "Atbilde", "Sagaidāmā atbilde", "Pamatojums", "Uzvedne"])
        precision = (data['Atbilde'] == data['Sagaidāmā atbilde']).sum()/len(data)
        print(f"PRECIZITĀTE: {precision*100}%")

        with report_path_htm.open('a', encoding='utf-8') as ofile:  
                # TODO Create a dropdown menu that lets the user select file by "Iepirkuma ID" - in each page only information that has that "Iepirkuma ID" is displayed
                print(f"{procurement_id} PRECIZITĀTE: {precision*100}%" ,file=ofile)
                print(data.to_html(index=False).replace('\\n','<br>'),file=ofile)

        data.to_csv(report_path_csv, 
                    mode='a', 
                    index=False, 
                    header=not report_path_csv.exists(), # only adding one header
                    encoding='utf-8')
        
        
                

Config files:   0%|                                                                           | 0/20 [00:00<?, ?file/s]

Processing config file: C:\Repos\vpp-cfla\config\APP_DI_20202ERAF_AK.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\APP_DI_20202ERAF_AK\Konsolidets_Konkursa_Nolikums_ar grozijumiem_12.03.2020. (1).docx
Processing file: C:\Repos\vpp-cfla\cfla_files\APP_DI_20202ERAF_AK\Konsolidets_Konkursa_Nolikums_ar grozijumiem_12.03.2020. (1).docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  19%|█████████████                                                          [ time left: 01:31 ][A
Generating embeddings:  36%|████████████████████████▉                                              [ time left: 01:11 ][A
Generating embeddings:  53%|████████████████████████████████████▊                                  [ time left: 01:03 ][A
Generating embeddings:  69%|████████████████████████████████████████████████▋                      [ time left: 00:40 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


59 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 23 24 26 27 28 29 31 32 33 
34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 
38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 
39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 
39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 
40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 52 53 54 56 57 58 
60 61 62 63 65 

Config files:   5%|███▏                                                            | 1/20 [10:20<3:16:32, 620.67s/file]

PRECIZITĀTE: 61.44578313253012%
Processing config file: C:\Repos\vpp-cfla\config\DND_20206.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\DND_20206\NOLIKUMS_2020_6.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  18%|████████████▍                                                          [ time left: 01:31 ][A
Generating embeddings:  34%|███████████████████████▋                                               [ time left: 01:13 ][A
Generating embeddings:  50%|███████████████████████████████████                                    [ time left: 01:07 ][A
Generating embeddings:  66%|██████████████████████████████████████████████▎                        [ time left: 00:49 ][A
Generating embeddings:  82%|█████████████████████████████████████████████████████████▌             [ time left: 00:24 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


62 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 24 26 
27 28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 
37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 
39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 
39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 
40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 
52 53 54 56 57 58 60 61 62 63 65 

Config files:  10%|██████▍                                                         | 2/20 [21:20<3:13:07, 643.77s/file]

PRECIZITĀTE: 62.5%
Processing config file: C:\Repos\vpp-cfla\config\EDI_20203AK.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\EDI_20203AK\Nolikums EDI_2020_3_AK_EIS.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  15%|██████████▋                                                            [ time left: 01:55 ][A
Generating embeddings:  29%|████████████████████▍                                                  [ time left: 01:33 ][A
Generating embeddings:  43%|██████████████████████████████▏                                        [ time left: 01:24 ][A
Generating embeddings:  57%|███████████████████████████████████████▊                               [ time left: 01:18 ][A
Generating embeddings:  71%|█████████████████████████████████████████████████▌                     [ time left: 00:47 ][A
Generating embeddings:  85%|███████████████████████████████████████████████████████████▎           [ time left: 00:23 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


72 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 
18 19 22 23 24 26 27 28 29 31 32 33 34 35 36 37 37.2 37.4 37.5 37.6 
37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 
39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 
39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 
40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 52 
53 54 56 57 58 60 61 62 63 65 

Config files:  15%|█████████▌                                                      | 3/20 [33:37<3:14:29, 686.41s/file]

PRECIZITĀTE: 74.40476190476191%
Processing config file: C:\Repos\vpp-cfla\config\IKVD_20213_ESF_SAM.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\IKVD_20213_ESF_SAM\Nolikums_22.12.2021..docx
Processing file: C:\Repos\vpp-cfla\cfla_files\IKVD_20213_ESF_SAM\Nolikums_22.12.2021..docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  27%|██████████████████▊                                                    [ time left: 00:55 ][A
Generating embeddings:  51%|███████████████████████████████████▊                                   [ time left: 00:38 ][A
Generating embeddings:  76%|████████████████████████████████████████████████████▉                  [ time left: 00:21 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


41 segments created and vectorized.
Index is ready.
2 4 6 9 10 15 16 17 19 22 
23 24 26 27 28 29 31 32 33 34 35 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 
37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 
39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 
39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 
40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 52 53 54 56 
57 58 60 61 62 63 65 

Config files:  20%|████████████▊                                                   | 4/20 [43:13<2:51:25, 642.85s/file]

PRECIZITĀTE: 60.36585365853659%
Processing config file: C:\Repos\vpp-cfla\config\IP2020_01_ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\IP2020_01_ERAF\Nolikums_IP_2020_01_ERAF_pielikumi.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  23%|████████████████▍                                                      [ time left: 01:14 ][A
Generating embeddings:  45%|███████████████████████████████▎                                       [ time left: 01:01 ][A
Generating embeddings:  66%|██████████████████████████████████████████████▏                        [ time left: 00:41 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


47 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 
24 26 27 28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 
37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 
39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 
39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 
40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 
51.2 51.4 52 53 54 56 57 58 60 61 62 63 65 

Config files:  25%|████████████████                                                | 5/20 [53:54<2:40:31, 642.13s/file]

PRECIZITĀTE: 57.14285714285714%
Processing config file: C:\Repos\vpp-cfla\config\IeM_IC_201913.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\IeM_IC_201913\NOLIKUMS vec.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  15%|██████████▋                                                            [ time left: 01:35 ][A
Generating embeddings:  29%|████████████████████▍                                                  [ time left: 01:30 ][A
Generating embeddings:  43%|██████████████████████████████▏                                        [ time left: 01:26 ][A
Generating embeddings:  57%|███████████████████████████████████████▊                               [ time left: 01:00 ][A
Generating embeddings:  71%|█████████████████████████████████████████████████▌                     [ time left: 00:40 ][A
Generating embeddings:  85%|███████████████████████████████████████████████████████████▎           [ time left: 00:26 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


72 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 
16 17 18 19 22 23 24 26 27 28 29 31 32 33 34 35 36 37 37.2 37.4 
37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.7 38.8 38.9 39 39.1 39.2 39.3 
39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 
39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 
40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 44 46 47 48 50 51 51.4 52 53 54 
56 57 58 60 61 62 63 65 

Config files:  30%|██████████████████▌                                           | 6/20 [1:04:28<2:29:10, 639.31s/file]

PRECIZITĀTE: 59.876543209876544%
Processing config file: C:\Repos\vpp-cfla\config\KNP_202215.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\KNP_202215\Nolikums_2022_15_ar_grozijumiem.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  27%|██████████████████▊                                                    [ time left: 00:52 ][A
Generating embeddings:  51%|███████████████████████████████████▊                                   [ time left: 00:34 ][A
Generating embeddings:  76%|████████████████████████████████████████████████████▉                  [ time left: 00:22 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


41 segments created and vectorized.
Index is ready.
2 4 6 9 10 15 16 17 19 22 23 24 
26 27 28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 
37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 
39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 
39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 
40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 
51.4 52 53 54 56 57 58 60 61 62 63 65 

Config files:  35%|█████████████████████▋                                        | 7/20 [1:15:30<2:20:06, 646.62s/file]

PRECIZITĀTE: 71.34146341463415%
Processing config file: C:\Repos\vpp-cfla\config\KP2020016A-KF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\KP2020016A-KF\Konkursa_nolikums_TENT_3_posms_GROZIJUMI_16.03.2020.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                        | 0/3 [00:00<?, ?it/s][A
Recognizing layout:  33%|█████████████████████▎                                          | 1/3 [00:09<00:18,  9.12s/it][A
Recognizing layout:  67%|██████████████████████████████████████████▋                     | 2/3 [00:17<00:08,  8.55s/it][A
Recognizing layout: 100%|████████████████████████████████████████████████████████████████| 3/3 [00:21<00:00,  7.08s/it][A

Running OCR Error Detection:   0%|                                                               | 0/4 [00:00<?, ?it/s][A
Running OCR Error Detection:  25%|█████████████▊                                         | 1/4 [00:00<00:01,  1.70it/s][A
Running OCR Error Detection:  50%|███████████████████████████▌                           | 2/4 [00:01<00:01,  1.70it/s][A
Running OCR Error Detection:  75%|█████████████████████████████████████████▎             | 3/4 [00:01<00:00,  1.70it/s][A
Running OCR Er

Processing file: C:\Repos\vpp-cfla\cfla_files\KP2020016A-KF\Konkursa_nolikums_TENT_3_posms_GROZIJUMI_16.03.2020.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                        | 0/3 [00:00<?, ?it/s][A
Recognizing layout:  33%|█████████████████████▎                                          | 1/3 [00:08<00:16,  8.46s/it][A
Recognizing layout:  67%|██████████████████████████████████████████▋                     | 2/3 [00:17<00:08,  8.65s/it][A
Recognizing layout: 100%|████████████████████████████████████████████████████████████████| 3/3 [00:24<00:00,  8.27s/it][A

Running OCR Error Detection:   0%|                                                               | 0/5 [00:00<?, ?it/s][A
Running OCR Error Detection:  20%|███████████                                            | 1/5 [00:00<00:02,  1.58it/s][A
Running OCR Error Detection:  40%|██████████████████████                                 | 2/5 [00:01<00:01,  1.59it/s][A
Running OCR Error Detection:  60%|█████████████████████████████████                      | 3/5 [00:01<00:01,  1.58it/s][A
Running OCR Er

61 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 
17 18 19 22 23 24 26 27 28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 
35.7 35.8 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 
38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 
39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 
39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 
47 48 50 51 51.1 51.2 51.4 52 53 54 56 57 58 60 61 62 63 65 

Config files:  40%|████████████████████████▊                                     | 8/20 [1:31:09<2:27:56, 739.69s/file]

PRECIZITĀTE: 51.78571428571429%
Processing config file: C:\Repos\vpp-cfla\config\LNP_201976.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\LNP_201976\Apstiprinats.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  20%|█████████████▊                                                         [ time left: 01:19 ][A
Generating embeddings:  38%|██████████████████████████▎                                            [ time left: 01:06 ][A
Generating embeddings:  55%|██████████████████████████████████████▊                                [ time left: 00:46 ][A
Generating embeddings:  73%|███████████████████████████████████████████████████▎                   [ time left: 00:27 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


56 segments created and vectorized.
Index is ready.
2 4 
6 7 9 10 15 16 17 18 19 22 23 24 26 27 28 29 31 32 33 34 
35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 
38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 
39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 
39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 
40.11 40.12 41 43 44 46 47 48 50 51 52 53 54 56 57 58 60 61 62 63 
65 

Config files:  45%|███████████████████████████▉                                  | 9/20 [1:45:39<2:23:04, 780.40s/file]

PRECIZITĀTE: 66.07142857142857%
Processing config file: C:\Repos\vpp-cfla\config\LNP_202037ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\LNP_202037ERAF\Nolikums_pilsdrupas_1.karta.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  31%|█████████████████████▍                                                 [ time left: 00:53 ][A
Generating embeddings:  58%|████████████████████████████████████████▊                              [ time left: 00:37 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


36 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 24 26 27 28 29 31 
32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 
37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 
39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 
39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 
40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 52 53 54 56 57 58 60 
61 62 63 65 

Config files:  50%|██████████████████████████████▌                              | 10/20 [1:59:34<2:12:52, 797.29s/file]

PRECIZITĀTE: 63.095238095238095%
Processing config file: C:\Repos\vpp-cfla\config\LU_202049_ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\LU_202049_ERAF\Nolikums.docx
Processing file: C:\Repos\vpp-cfla\cfla_files\LU_202049_ERAF\Nolikums.docx
An exception occurred: FileNotFoundError 2



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


17 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 24 26 27 
28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 37.6 
37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 
39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 
39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 
40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 52 
53 54 56 57 58 60 61 62 63 65 

Config files:  55%|█████████████████████████████████▌                           | 11/20 [2:10:41<1:53:36, 757.36s/file]

PRECIZITĀTE: 62.5%
Processing config file: C:\Repos\vpp-cfla\config\MNP_202121_ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\MNP_202121_ERAF\MNP2021_21_ERAF_nolikums_12-07-2021.doc.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                        | 0/9 [00:00<?, ?it/s][A
Recognizing layout:  11%|███████                                                         | 1/9 [00:08<01:07,  8.46s/it][A
Recognizing layout:  22%|██████████████▏                                                 | 2/9 [00:16<00:57,  8.24s/it][A
Recognizing layout:  33%|█████████████████████▎                                          | 3/9 [00:24<00:49,  8.26s/it][A
Recognizing layout:  44%|████████████████████████████▍                                   | 4/9 [00:33<00:42,  8.46s/it][A
Recognizing layout:  56%|███████████████████████████████████▌                            | 5/9 [00:42<00:34,  8.53s/it][A
Recognizing layout:  67%|██████████████████████████████████████████▋                     | 6/9 [00:50<00:25,  8.54s/it][A
Recognizing layout:  78%|█████████████████████████████████████████████████▊              | 7/9 [00:59<00:17,  8.58s/it][A
Recognizing lay

Processing file: C:\Repos\vpp-cfla\cfla_files\MNP_202121_ERAF\MNP2021_21_ERAF_nolikums_12-07-2021.doc.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                        | 0/4 [00:00<?, ?it/s][A
Recognizing layout:  25%|████████████████                                                | 1/4 [00:08<00:24,  8.06s/it][A
Recognizing layout:  50%|████████████████████████████████                                | 2/4 [00:16<00:16,  8.37s/it][A
Recognizing layout:  75%|████████████████████████████████████████████████                | 3/4 [00:25<00:08,  8.43s/it][A
Recognizing layout: 100%|████████████████████████████████████████████████████████████████| 4/4 [00:28<00:00,  7.01s/it][A

Running OCR Error Detection:   0%|                                                               | 0/5 [00:00<?, ?it/s][A
Running OCR Error Detection:  20%|███████████                                            | 1/5 [00:00<00:02,  1.62it/s][A
Running OCR Error Detection:  40%|██████████████████████                                 | 2/5 [00:01<00:01,  1.59it/s][A
Running OCR Er

118 segments created and vectorized.
Index is ready.
2 4 6 9 10 15 16 17 19 22 
23 24 26 27 28 29 31 32 33 34 35 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 
37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 
39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 
39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 
40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 52 53 54 56 
57 58 60 61 62 63 65 

Config files:  60%|████████████████████████████████████▌                        | 12/20 [2:23:57<1:42:33, 769.13s/file]

PRECIZITĀTE: 54.87804878048781%
Processing config file: C:\Repos\vpp-cfla\config\PND_2019_15-ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\PND_2019_15-ERAF\Nolikums_ar_grozijumiem_2.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:  22%|███████████████                                                        [ time left: 01:04 ][A
Generating embeddings:  41%|████████████████████████████▊                                          [ time left: 00:50 ][A
Generating embeddings:  61%|██████████████████████████████████████████▌                            [ time left: 00:38 ][A
Generating embeddings:  80%|████████████████████████████████████████████████████████▎              [ time left: 00:19 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


51 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 
24 26 27 28 29 31 32 33 34 35 35.1 35.2 35.5 35.6 35.7 35.8 36 37 37.2 37.4 
37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.7 38.8 38.9 39 39.1 39.2 39.3 
39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 
39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 
40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 44 46 47 48 50 51 51.4 52 53 54 
56 57 58 60 61 62 63 65 

Config files:  65%|███████████████████████████████████████▋                     | 13/20 [2:33:36<1:23:01, 711.58s/file]

PRECIZITĀTE: 70.98765432098766%
Processing config file: C:\Repos\vpp-cfla\config\RTK_2019_12.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\RTK_2019_12\Nolikums.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:   2%|█▎                                                                     [ time left: 00:05 ][A
Generating embeddings:  20%|██████████████▎                                                        [ time left: 01:06 ][A
Generating embeddings:  39%|███████████████████████████▏                                           [ time left: 00:56 ][A
Generating embeddings:  57%|████████████████████████████████████████▏                              [ time left: 01:02 ][A
Generating embeddings:  76%|█████████████████████████████████████████████████████▏                 [ time left: 00:35 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


54 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 
23 24 26 27 28 29 31 32 33 34 35 35.1 35.2 35.5 35.6 35.7 35.8 36 37 37.2 
37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.7 38.8 38.9 39 39.1 39.2 
39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 
39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 
40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 44 46 47 48 50 51 51.4 52 53 
54 56 57 58 60 61 62 63 65 

Config files:  70%|██████████████████████████████████████████▋                  | 14/20 [2:42:51<1:06:26, 664.42s/file]

PRECIZITĀTE: 59.25925925925925%
Processing config file: C:\Repos\vpp-cfla\config\SND_202015-ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\SND_202015-ERAF\SND_2020_ERAF_DI_projektesana_buvnieciba_groz_06082020.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                        | 0/4 [00:00<?, ?it/s][A
Recognizing layout:  25%|████████████████                                                | 1/4 [00:08<00:26,  8.79s/it][A
Recognizing layout:  50%|████████████████████████████████                                | 2/4 [00:16<00:16,  8.20s/it][A
Recognizing layout:  75%|████████████████████████████████████████████████                | 3/4 [00:24<00:08,  8.10s/it][A
Recognizing layout: 100%|████████████████████████████████████████████████████████████████| 4/4 [00:26<00:00,  6.57s/it][A

Running OCR Error Detection:   0%|                                                               | 0/5 [00:00<?, ?it/s][A
Running OCR Error Detection:  20%|███████████                                            | 1/5 [00:00<00:02,  1.60it/s][A
Running OCR Error Detection:  40%|██████████████████████                                 | 2/5 [00:01<00:01,  1.60it/s][A
Running OCR Er

Processing file: C:\Repos\vpp-cfla\cfla_files\SND_202015-ERAF\SND_2020_ERAF_DI_projektesana_buvnieciba_groz_06082020.pdf



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:   1%|▊                                                                      [ time left: 00:10 ][A
Generating embeddings:  12%|████████▎                                                              [ time left: 02:14 ][A
Generating embeddings:  23%|███████████████▊                                                       [ time left: 01:56 ][A
Generating embeddings:  33%|███████████████████████▎                                               [ time left: 02:18 ][A
Generating embeddings:  44%|██████████████████████████████▊                                        [ time left: 01:55 ][A
Generating embeddings:  55%|██████████████████████████████████████▍                                [ time left: 01:27 ][A
Generating embeddings:  66%|█████████████████████████████████████████████▉                         [ time left: 01:11 ][A
Generating embe

93 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 
22 23 24 26 27 28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 
37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 
39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 
39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 
40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 
51 51.1 51.2 51.4 52 53 54 56 57 58 60 61 62 63 65 PRECIZITĀTE: 51.19047619047619%


Config files:  75%|█████████████████████████████████████████████▊               | 15/20 [3:00:39<1:05:29, 785.95s/file]

Processing config file: C:\Repos\vpp-cfla\config\SNP_202001.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\SNP_202001\Nolikums_pielikumi_SNP_2020_1.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:   1%|▋                                                                      [ time left: 00:12 ][A
Generating embeddings:  10%|███████▎                                                               [ time left: 03:27 ][A
Generating embeddings:  20%|██████████████                                                         [ time left: 02:55 ][A
Generating embeddings:  30%|████████████████████▋                                                  [ time left: 02:42 ][A
Generating embeddings:  39%|███████████████████████████▎                                           [ time left: 02:18 ][A
Generating embeddings:  49%|██████████████████████████████████                                     [ time left: 02:00 ][A
Generating embeddings:  58%|████████████████████████████████████████▋                              [ time left: 01:33 ][A
Generating embe

105 segments created and vectorized.
Index is ready.
2 4 6 7 9 
10 15 16 17 18 19 22 23 24 26 27 28 29 31 32 33 34 35 35.1 35.2 
35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 
38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 
39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 
39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 
43 44 46 47 48 50 51 51.1 51.2 51.4 52 53 54 56 57 58 60 61 62 63 
65 

Config files:  80%|██████████████████████████████████████████████████▍            | 16/20 [3:16:11<55:19, 829.95s/file]

PRECIZITĀTE: 59.523809523809526%
Processing config file: C:\Repos\vpp-cfla\config\SNP_202067.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\SNP_202067\Nolikums_pielikumi.docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:   1%|▋                                                                      [ time left: 00:13 ][A
Generating embeddings:  11%|███████▊                                                               [ time left: 03:11 ][A
Generating embeddings:  21%|██████████████▊                                                        [ time left: 02:42 ][A
Generating embeddings:  31%|█████████████████████▉                                                 [ time left: 02:20 ][A
Generating embeddings:  42%|█████████████████████████████▋                                         [ time left: 01:56 ][A
Generating embeddings:  62%|███████████████████████████████████████████▏                           [ time left: 01:01 ][A
Generating embeddings:  72%|██████████████████████████████████████████████████▏                    [ time left: 00:54 ][A
Generating embe

99 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 24 26 27 28 29 31 
32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 37.4 37.5 37.6 37.7 37.9 37.10 
37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 39.3 39.4 39.5 39.6 39.7 
39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 39.23 39.24 39.25 39.26 39.27 
39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 40.3 40.4 40.5 40.6 40.7 
40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 51.4 52 53 54 56 
57 58 60 61 62 63 65 

Config files:  85%|█████████████████████████████████████████████████████▌         | 17/20 [3:31:39<42:58, 859.55s/file]

PRECIZITĀTE: 59.523809523809526%
Processing config file: C:\Repos\vpp-cfla\config\VND_20201.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\VND_20201\Nolikums-Buvnieciba (1).docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:   2%|█▎                                                                     [ time left: 00:07 ][A
Generating embeddings:  21%|██████████████▌                                                        [ time left: 01:17 ][A
Generating embeddings:  40%|███████████████████████████▋                                           [ time left: 01:08 ][A
Generating embeddings:  58%|████████████████████████████████████████▉                              [ time left: 00:45 ][A
Generating embeddings:  77%|██████████████████████████████████████████████████████▏                [ time left: 00:23 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


53 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 16 17 18 19 22 23 
24 26 27 28 29 31 32 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 37.2 
37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 
39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 
39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 
40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 
51.2 51.4 52 53 54 56 57 58 60 61 62 63 65 

Config files:  90%|████████████████████████████████████████████████████████▋      | 18/20 [3:45:54<28:35, 857.99s/file]

PRECIZITĀTE: 52.38095238095239%
Processing config file: C:\Repos\vpp-cfla\config\VNP_2023057AK.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\VNP_2023057AK\057AK_ Nolikums_Zilakalna skatu torna parbuve un teritorijas labiekartosana (1).docx



Generating embeddings:   0%|                                                                           [ time left: ? ][A
Generating embeddings:   2%|█▎                                                                     [ time left: 00:08 ][A
Generating embeddings:  20%|█████████████▊                                                         [ time left: 01:18 ][A
Generating embeddings:  38%|██████████████████████████▎                                            [ time left: 00:58 ][A
Generating embeddings:  55%|██████████████████████████████████████▊                                [ time left: 00:45 ][A
Generating embeddings:  73%|███████████████████████████████████████████████████▎                   [ time left: 00:26 ][A
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ][A


56 segments created and vectorized.
Index is ready.
2 4 6 9 10 16 19 
22 23 24 26 27 28 29 31 33 34 35 35.1 35.2 35.3 35.5 35.6 35.7 35.8 36 37 
37.2 37.4 37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.5 38.6 38.7 38.8 38.9 39 39.1 
39.2 39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 
39.22 39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 
40.2 40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 
51.2 51.4 52 53 54 56 57 58 60 61 62 63 65 

Config files:  95%|███████████████████████████████████████████████████████████▊   | 19/20 [3:55:39<12:56, 776.01s/file]

PRECIZITĀTE: 59.49367088607595%
Processing config file: C:\Repos\vpp-cfla\config\VeA_202012ERAF.ini
Processing file: C:\Repos\vpp-cfla\cfla_files\VeA_202012ERAF\VeA_2020_12_ERAF_Nolikums.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                       | 0/19 [00:00<?, ?it/s][A
Recognizing layout:   5%|███▎                                                           | 1/19 [00:09<02:45,  9.22s/it][A
Recognizing layout:  11%|██████▋                                                        | 2/19 [00:17<02:29,  8.78s/it][A
Recognizing layout:  16%|█████████▉                                                     | 3/19 [00:26<02:19,  8.74s/it][A
Recognizing layout:  21%|█████████████▎                                                 | 4/19 [00:34<02:06,  8.46s/it][A
Recognizing layout:  26%|████████████████▌                                              | 5/19 [00:42<01:55,  8.28s/it][A
Recognizing layout:  32%|███████████████████▉                                           | 6/19 [00:50<01:46,  8.20s/it][A
Recognizing layout:  37%|███████████████████████▏                                       | 7/19 [00:58<01:37,  8.14s/it][A
Recognizing lay

Processing file: C:\Repos\vpp-cfla\cfla_files\VeA_202012ERAF\VeA_2020_12_ERAF_Nolikums.pdf
Loaded layout model s3://layout/2025_02_18 on device cpu with dtype torch.float32
Loaded texify model s3://texify/2025_02_18 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded table recognition model s3://table_recognition/2025_02_18 on device cpu with dtype torch.float32
Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded detection model s3://inline_math_detection/2025_02_24 on device cpu with dtype torch.float32



Recognizing layout:   0%|                                                                        | 0/2 [00:00<?, ?it/s][A
Recognizing layout:  50%|████████████████████████████████                                | 1/2 [00:08<00:08,  8.23s/it][A
Recognizing layout: 100%|████████████████████████████████████████████████████████████████| 2/2 [00:12<00:00,  6.19s/it][A

Running OCR Error Detection:   0%|                                                               | 0/3 [00:00<?, ?it/s][A
Running OCR Error Detection:  33%|██████████████████▎                                    | 1/3 [00:00<00:01,  1.58it/s][A
Running OCR Error Detection:  67%|████████████████████████████████████▋                  | 2/3 [00:01<00:00,  1.58it/s][A
Running OCR Error Detection: 100%|███████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.10it/s][A

Detecting bboxes: 0it [00:00, ?it/s][A

Detecting bboxes: 0it [00:00, ?it/s][A

Recognizing tables:   0%|                             

145 segments created and vectorized.
Index is ready.
2 4 6 7 9 10 15 
16 17 18 19 22 23 24 26 27 28 29 31 32 33 34 35 36 37 37.2 37.4 
37.5 37.6 37.7 37.9 37.10 37.11 37.13 38 38.1 38.2 38.3 38.4 38.5 38.6 38.7 38.8 38.9 39 39.1 39.2 
39.3 39.4 39.5 39.6 39.7 39.8 39.9 39.10 39.11 39.12 39.13 39.14 39.15 39.16 39.17 39.18 39.19 39.20 39.21 39.22 
39.23 39.24 39.25 39.26 39.27 39.28 39.29 39.30 39.31 39.32 39.33 39.34 39.35 39.36 39.37 39.38 39.39 40 40.1 40.2 
40.3 40.4 40.5 40.6 40.7 40.8 40.9 40.10 40.11 40.12 41 43 44 46 47 48 50 51 51.1 51.2 
51.4 52 53 54 56 57 58 60 61 62 63 65 

Config files: 100%|███████████████████████████████████████████████████████████████| 20/20 [4:12:28<00:00, 757.40s/file]

PRECIZITĀTE: 61.30952380952381%





# Report generation

In [10]:
# Load report we want to analayze
analyzed_report_dir = report_dir_path
csv_filename = "report.csv"
input_csv = analyzed_report_dir / csv_filename

## Main report

In [11]:
html_output      = build_main_report_html(input_csv, question_dictionary)
output_html_path = analyzed_report_dir / "main_report.html"
output_html_path.write_text(html_output, encoding="utf-8")

print(f"Main HTML report saved to: {output_html_path}")

Main HTML report saved to: C:\Repos\vpp-cfla\reports\test_bge-m3_31.07\main_report.html


## Precision report

In [12]:
# Generate report
questions_wout_0q = get_questions_without_q0(question_dictionary)
precison_report_html = generate_precision_report(input_csv, questions_wout_0q)

# Save the report next to the CSV
output_html = analyzed_report_dir / f"precision_report.html"
with open(output_html, "w", encoding="utf-8") as f:
    f.write(precison_report_html)

print(f"HTML report saved to: {output_html}")

HTML report saved to: C:\Repos\vpp-cfla\reports\test_bge-m3_31.07\precision_report.html
