In [1]:
# 🔧 Setup & Import
import sys
import os

sys.path.append(
    os.path.join(
        os.path.abspath(os.path.join(os.getcwd(), "..")) , "src"
    )
)

from schema_linking.schema.extract_schema import extract_column_texts
from schema_linking.keywords.keyword_extractor import llama_keyword_extraction
from schema_linking.retrieval.retriever import build_or_load_index, retrieve_top_k
from schema_linking.llm.llama_linker import llama_table_linking, llama_column_linking, llama_filter_columns_by_keywords
from schema_linking.utils.printer import (
    print_header,
    print_keywords,
    pretty_print_list,
    print_summary
)
from schema_linking.config.config import TOP_K_COLUMNS

In [2]:
# ✍️ Input della Query
print_header("Input")
question = "Question: Quali sono le descrizioni di rigo delle voci contabili del 2011?"
print(question)


Input
Question: Quali sono le descrizioni di rigo delle voci contabili del 2011?


In [6]:
# 🔍 Step 1: Extracting keywords using LLM...
print_header("Step 1: Extracting keywords using LLM...")
keywords = llama_keyword_extraction(question)
print_keywords(keywords)
keyword_string = " ".join(keywords)


Step 1: Extracting keywords using LLM...





Keywords:
1. voci
2. contabili
3. 2011
4. descrizioni
5. di
6. rigo


In [8]:
# 🧱 Step 2: Schema extraction
print_header("Step 2: Loading schema...")
column_texts = extract_column_texts()
print(f"Loaded {len(column_texts)} columns")


Step 2: Loading schema...
Loaded 47388 columns


In [10]:
# 🧠 Step 3: FAISS Pruning
print_header(f"Step 3: FAISS column pruning (top {TOP_K_COLUMNS}) using keywords...")
column_index, column_map = build_or_load_index(column_texts)
pruned_columns = retrieve_top_k(column_index, column_map, keyword_string, TOP_K_COLUMNS)

if pruned_columns:
    pretty_print_list("Pruned Columns (FAISS)", pruned_columns)
else:
    print("No columns found")


Step 3: FAISS column pruning (top 50) using keywords...
Loading FAISS index from disk → c:\Workspace\NL2SQL\data\faiss_columns.bin

Pruned Columns (FAISS):
1. ri_versena.DERIFCON: Riferimento a scrittura contabile
2. ri_dettsy_2021.CHSOMERE: Somme erogate(riservata al soggetto erogatore delle somme)
3. ri_dettsy_2019.CHSOMERE: Somme erogate(riservata al soggetto erogatore delle somme)
4. acbil_mrett_m.RECSEZNCONT: Sezionale contabile
5. ri_dettsy_2016.CHSOMERE: Somme erogate(riservata al soggetto erogatore delle somme)
6. ri_dettsy_2017.CHSOMERE: Somme erogate(riservata al soggetto erogatore delle somme)
7. ri_dettsy_2022.CHSOMERE: Somme erogate(riservata al soggetto erogatore delle somme)
8. fi_agoregass.RECPIACONRIGA: Codice piano dei conti di riga
9. acbil_mrett_m.RENPROGVSCH: Progressivo scheda contabile
10. ri_dettsy_2018.CHSOMERE: Somme erogate(riservata al soggetto erogatore delle somme)
11. acbil_mrett.RECPIACONRIGA: Codice piano dei conti (di riga)
12. ri_dettsy_2016.CHRITOPE

In [11]:
# 🧠 Step 4: LLM filtering on pruned columns
print_header("Step 4: LLM filtering on pruned columns...")
llm_filtered_columns = llama_filter_columns_by_keywords(keyword_string, pruned_columns, keywords)

if llm_filtered_columns:
    pretty_print_list("Filtered Columns (LLM)", llm_filtered_columns)
else:
    print("No columns passed the LLM filtering step")


Step 4: LLM filtering on pruned columns...





Filtered Columns (LLM):
1. - ri_dettsy_2011.CHSOMERE
2. - fi_agoregass.RECPIACONRIGA
3. - ri_dettcu_h1_2011.CHSOMERE
4. - ri_dettsy_2011.CHRITOPE
5. - ri_tmpgensto.RINUMROW
6. - ri_tmpgensto.RICONERA
7. - ba_mycompany.SARICANA
8. - ri_dettsy_2011.CHCODFI2
9. - ri_dettsy_2011.RENPROGVSCH


In [12]:
# 🧮 Step 5: Table Linking via LLM
print_header("Step 5: LLM-based table linking (using filtered columns)...")
candidate_tables = list(set([col.split('.')[0] for col in llm_filtered_columns]))

linked_tables = llama_table_linking(question, candidate_tables)

if linked_tables:
    pretty_print_list("Linked Tables (LLM)", linked_tables)
else:
    print("No tables linked by LLM")


Step 5: LLM-based table linking (using filtered columns)...





Linked Tables (LLM):
1. - ri_dettsy_2011
2. - ri_dettcu_h1_2011


In [None]:
# 🔗 Step 6: Final Column Linking
print_header("Step 6: LLM-based final column linking...")
final_columns = [col for col in llm_filtered_columns if col.split('.')[0] in linked_tables]
linked_columns = llama_column_linking(keyword_string, final_columns)

if linked_columns:
    print_summary(question, keywords, linked_tables, linked_columns)
else:
    print("No columns linked by LLM")


Step 6: LLM-based final column linking...


