In [1]:
# 🔧 Setup & Import
import sys
import os

sys.path.append(
    os.path.join(
        os.path.abspath(os.path.join(os.getcwd(), "..")) , "src"
    )
)

from schema_linking.schema.extract_schema import extract_column_texts
from schema_linking.keywords.keyword_extractor import llama_keyword_extraction
from schema_linking.retrieval.retriever import build_or_load_index, retrieve_top_k
from schema_linking.llm.llama_linker import llama_table_linking, llama_column_linking, llama_filter_columns_by_keywords
from schema_linking.utils.printer import (
    print_header,
    print_keywords,
    pretty_print_list,
    print_summary
)
from schema_linking.config.config import TOP_K_COLUMNS

In [2]:
# ✍️ Input della Query
print_header("Input")
question = "Quali attività IVA risultano ancora valide nel 2024?"
print(f"Question: {question}")


Input
Question: Quali attività IVA risultano ancora valide nel 2024?


In [3]:
# 🔍 Step 1: Extracting keywords using LLM...
print_header("Step 1: Extracting keywords using LLM...")
keywords = llama_keyword_extraction(question)
print_keywords(keywords)
keyword_string = " ".join(keywords)


Step 1: Extracting keywords using LLM...





Keywords:
1. IVA
2. attività
3. risultano
4. valida
5. 2024


In [4]:
# 🧱 Step 2: Schema extraction
print_header("Step 2: Loading schema...")
column_texts = extract_column_texts()
print(f"Loaded {len(column_texts)} columns")


Step 2: Loading schema...
Loaded 47388 columns


In [5]:
# 🧠 Step 3: FAISS Pruning
print_header(f"Step 3: FAISS column pruning (top {TOP_K_COLUMNS}) using keywords...")
column_index, column_map = build_or_load_index(column_texts)
pruned_columns = retrieve_top_k(column_index, column_map, keyword_string, TOP_K_COLUMNS)

if pruned_columns:
    pretty_print_list("Pruned Columns (FAISS)", pruned_columns)
else:
    print("No columns found")


Step 3: FAISS column pruning (top 50) using keywords...
Loading FAISS index from disk → c:\Workspace\NL2SQL\data\faiss_columns.bin

Pruned Columns (FAISS):
1. ba_annint.CICODIVA: Codice IVA intestatario (tabella: IS - Dati per anno)
2. ei_reportvat.VPIDEB12: IVA dovuta (tabella: EI - Liquidazione dati contabili)
3. ba_tmpregiva.SRTOTIND: Totale IVA indetraibile (tabella: CG - TMP Stampa registri IVA)
4. ba_tmpliqiva2.LIIVAIVA: Credito IVA su IVA (tabella: CG - TMP Liquidazione IVA 2)
5. ba_tmpliqiva.LITOTIND: Totale IVA indetraibile (tabella: CG - TMP Liquidazione IVA)
6. ba_dettliqiva.LIPROIND: Progressivo IVA indetraibile (tabella: CG - Dettaglio liquidazione)
7. ei_reportvat.VPIMPOR6: IVA detratta (tabella: EI - Liquidazione dati contabili)
8. ba_tmpregiva.SRPROIND: Progressivo IVA indetraibile (tabella: CG - TMP Stampa registri IVA)
9. ei_reportvat.VPIDEB12_C: IVA dovuta (tabella: EI - Liquidazione dati contabili)
10. ba_liqiva.LIACCIVA: Acconto IVA (tabella: CG - Liquidazione Iva

In [6]:
# 🧠 Step 4: LLM filtering on pruned columns
print_header("Step 4: LLM filtering on pruned columns...")
llm_filtered_columns = llama_filter_columns_by_keywords(keyword_string, pruned_columns, keywords)

if llm_filtered_columns:
    pretty_print_list("Filtered Columns (LLM)", llm_filtered_columns)
else:
    print("No columns passed the LLM filtering step")


Step 4: LLM filtering on pruned columns...





Filtered Columns (LLM):
1. - ba_ivareg.RICODATT
2. - ba_ivareg.RICODIVA
3. - ba_regiva.RICODATT
4. - ri_dettsy_2022.CHRITOPE
5. - ri_dettsy_2022.CHRIIMSO
6. - ba_rifatt.RACODIVI
7. - ba_rifatt.RACODIVA
8. - ba_diceme_2020_m.DECOMPLA
9. - ei_reportvat.VPIMPOR6
10. - ei_reportvat.VPIDEB12
11. - ei_reportvat.VPIDEB12_C
12. - ei_reportvat.VPIDEB12S
13. - ei_reportvat.VPIMPOR5S
14. - ei_reportvat.VPIMPOR6S
15. - ei_reportvat.VPICRE12
16. - ba_tmpregiva.SRPROIND
17. - ba_tmpregiva.SRIVAIND
18. - ba_tmpregiva.SRTOTIND
19. - ba_tmpesidif.PTTOTIND
20. - ba_tmpnoseqiva.SQTOTIVA
21. - ba_tmpcastiva.CIIMPON2
22. - ba_tmpcastiva.CIIMPON2
23. - ba_tmpliqiva.LIPROIND
24. - ba_tmpliqiva.LIPRODET
25. - ba_tmpliqiva.LITOTIND
26. - ba_tmpliqiva.LITOTDET
27. - ba_tmpliqiva.LITOTIVA2
28. - ba_tmpliqiva.LITOTIVA3
29. - ba_tmpliqiva2.LIIVAIVA
30. - ba_tmpliqiva2.LICVP17D
31. - ba_tmpliqiva2.LICVPCPP
32. - ba_tmpliqiva2.LIACCIVA
33. - ba_liqiva.LIACCIVA
34. - ba_mycomp2.SCCONGRM
35. - ei_reportvat.VPIMPOR6
3

In [7]:
# 🧮 Step 5: Table Linking via LLM
print_header("Step 5: LLM-based table linking (using filtered columns)...")
candidate_tables = list(set([col.split('.')[0] for col in llm_filtered_columns]))

linked_tables = llama_table_linking(question, candidate_tables)

if linked_tables:
    pretty_print_list("Linked Tables (LLM)", linked_tables)
else:
    print("No tables linked by LLM")


Step 5: LLM-based table linking (using filtered columns)...





Linked Tables (LLM):
1. ba_liqiva
2. ba_diceme_2020_m
3. ba_regiva
4. ba_tmpnoseqiva
5. ba_tmpcastiva
6. ba_mycomp2
7. ba_ivareg
8. ba_rifatt
9. ba_tmpregiva
10. ei_reportvat
11. ei_reportvat_par
12. ei_reportvat_m
13. ba_tmpesidif
14. ba_tmpliqiva
15. ba_tmpliqiva2


In [8]:
# 🔗 Step 6: Final Column Linking
print_header("Step 6: LLM-based final column linking...")
final_columns = [col for col in llm_filtered_columns if col.split('.')[0] in linked_tables]
linked_columns = llama_column_linking(keyword_string, final_columns)

if linked_columns:
    print_summary(question, keywords, linked_tables, linked_columns)
else:
    print("No columns linked by LLM")


Step 6: LLM-based final column linking...





Final Result
Question: Quali attività IVA risultano ancora valide nel 2024?

Keywords:
1. Attività
2. IVA
3. 2024
4. risultano
5. valida

Linked Tables (LLM):
1. ba_ivareg
2. ei_tmpcastiva
3. ba_diceme_2020_m
4. ei_reportvat
5. ba_tmpliqiva
6. ei_reportvat_m
7. ba_tmpregiva
8. ba_diceme_m

Linked Columns (LLM):
1. - ei_reportvat_m.VPPARIVA
2. - ba_tmpliqiva.LITOTIVA2
3. - ei_reportvat.VPIDEB12
4. - ei_reportvat.VPIDEB12_CS
5. - ei_reportvat.VPIMPOR6
6. - ei_reportvat.VPICRE12
7. - ba_tmpliqiva.LITOTIND
8. - ba_tmpliqiva.LITOTDET
9. - ba_tmpregiva.SRPROIND
10. - ba_tmpregiva.SRIVAIND
11. - ba_tmpliqiva.LIPROIND
12. - ba_tmpliqiva.LIPRODET
13. - ba_diceme_2020_m.DECOMPLA
14. - ba_diceme_m.DECOMPLA
