# Presentazione esperimento

**Panoramica**
Integra Manuale di Cucina, Codice Galattico e distanze planetarie per coprire anche le domande Hard mantenendo compatibilita con le richieste piu semplici.

**Pipeline dati**
- Mantiene parsing/classificazione/mapping dei menu per retro-compatibilita.
- Aggiunge ingestion ed estrazione (`extraction_call`) su Manuale e Codice per ricavare categorie tecniche, requisiti di licenza e limiti normativi.
- Calcola mapping incrociati (licenze->tecniche, tecniche->piatti) e utilizza `Distanze.csv` per ragionare sulle distanze.

**Evoluzione dell'engine**
- `engine_hard` arricchisce ulteriormente il toolset con `get_technique_from_category`, `get_dish_from_minimum_licence`, `get_dishes_with_both_technique_categories` e `get_dishes_within_distance`.
- I tool Medium restano disponibili, consentendo sia filtri geospaziali sia controlli su licenze minime e combinazioni di categorie.
- Le set-ops finali rifiniscono i risultati dopo catene di ragionamento multi-fonte.

**Ruolo nel percorso**
Costituisce l'implementazione piu completa prima del livello Impossible, dimostrando come le diverse fonti vengano orchestrate nello stesso agente.

**Performance (Jaccard)**: 99.31%



# Setup

In [None]:
import os
from pathlib import Path
import sys


	
cwd = Path.cwd().resolve()
project_dir = cwd.parent.parent

if str(project_dir) not in sys.path:
	sys.path.insert(0, str(project_dir))
	

dataset_file_path = project_dir / "Dataset"
artifacts_file_path = cwd / "artifacts"

if not os.path.exists(artifacts_file_path):
        os.makedirs(artifacts_file_path)

# Preprocessing

## Menu

### Parsing e aggregazione

In [None]:
from src.preprocessing.menu_ingestion import group_and_concatenate_documents, parse_documents_in_directory
from src.utils import write_json


menus_path = dataset_file_path / "Knowledge_base" / "menu"
documents_pages = parse_documents_in_directory(document_path=[menus_path])
documents = group_and_concatenate_documents(documents=documents_pages)
write_json(documents, artifacts_file_path / "parsed_menus.json")

for doc_name, doc_text in list(documents.items())[:5]:
    print(f"Document: {doc_name}\nContent Preview: {doc_text[:100]}...\n")

### Classificazione

In [None]:
from src.preprocessing.menu_classification import classify_menu

classifications = classify_menu(text_extracted=documents, model_name="grok-4-1-fast-reasoning")
write_json(classifications, artifacts_file_path / "menu_classifications.json")

for doc_name, classification in list(classifications.items())[:5]:
    print(f"Document: {doc_name}\nClassification: {classification}\n")

### Estrazione struttura

In [None]:
from src.preprocessing.menu_extraction import extract_info_from_menus
from src.utils import read_json, write_json

documents = read_json(artifacts_file_path / "parsed_menus.json")
classifications = read_json(artifacts_file_path / "menu_classifications.json")

extracted_info = extract_info_from_menus(documents=documents, classifications=classifications, model_name="grok-4-1-fast-reasoning")
write_json(extracted_info, artifacts_file_path / "extracted_menu_info.json")

for info in extracted_info[:5]:
    restaurant_name = info.get("restaurant_name", "Unknown")
    print(f"Document: {restaurant_name}\nExtracted Info: {info}\n")

### Creazione di mapping

In [None]:
from src.preprocessing.menu_mapping import create_mappings_planets_restaurant_skills
from src.utils import read_json,write_json

dish_mapping = read_json(dataset_file_path / "ground_truth" / "dish_mapping.json")

planet_to_dishes, restaurant_to_dishes, skill_to_dishes = create_mappings_planets_restaurant_skills(extracted_info=extracted_info, dish_mapping=dish_mapping)

write_json(planet_to_dishes, artifacts_file_path / 'planet_to_dishes.json')
write_json(restaurant_to_dishes, artifacts_file_path / 'restaurant_to_dishes.json')
write_json(skill_to_dishes, artifacts_file_path / 'skill_to_dishes.json')


In [None]:
from src.preprocessing.menu_mapping import create_mappings_technique_ingredient

ingredient_to_dishes, technique_to_dishes = create_mappings_technique_ingredient(extracted_info=extracted_info, dish_mapping=dish_mapping)
write_json(ingredient_to_dishes, artifacts_file_path / 'ingredient_to_dishes.json')
write_json(technique_to_dishes, artifacts_file_path / 'technique_to_dishes.json')

## Manuale di Cucina

### Parsing e aggregazione

In [None]:
from src.preprocessing.menu_ingestion import group_and_concatenate_documents, parse_documents_in_directory
from src.utils import write_json


menus_path = dataset_file_path / "Knowledge_base" / "misc" / "Manuale di Cucina.pdf"
documents_pages = parse_documents_in_directory(file_path=[menus_path])
documents = group_and_concatenate_documents(documents=documents_pages)
write_json(documents, artifacts_file_path / "parsed_manuale_di_cucina.json")

for doc_name, doc_text in list(documents.items())[:5]:
    print(f"Document: {doc_name}\nContent Preview: {doc_text[:100]}...\n")

### Estrazione strutturata e mapping

In [None]:
from src.ai.agents.extractor import extraction_call
from src.ai.models.manuale_extractor import ResultExtracted
from src.ai.prompts.manuale_extractor import INPUT_PROMPT, SYSTEM_PROMPT
from src.preprocessing.manuale_mapping import mapping_category_technique

text = documents["Manuale di Cucina.pdf"]

category_techniques = extraction_call(text=text, 
                                      model_name="gpt-4.1", 
                                      system_prompt=SYSTEM_PROMPT, 
                                      input_prompt=INPUT_PROMPT, 
                                      output_cls=ResultExtracted)

category_techniques_mapped = mapping_category_technique(category_techniques)

write_json(category_techniques_mapped, artifacts_file_path / "category_to_techniques.json")

## Codice galattico

### Parsing e aggregazione

In [None]:
from src.preprocessing.menu_ingestion import group_and_concatenate_documents, parse_documents_in_directory
from src.utils import write_json


menus_path = dataset_file_path / "Knowledge_base" / "codice_galattico" / "Codice Galattico.pdf"
documents_pages = parse_documents_in_directory(file_path=[menus_path])
documents = group_and_concatenate_documents(documents=documents_pages)
write_json(documents, artifacts_file_path / "parsed_manuale_di_cucina.json")

for doc_name, doc_text in list(documents.items())[:5]:
    print(f"Document: {doc_name}\nContent Preview: {doc_text[:100]}...\n")

### Estrazione strutturata e mapping

In [None]:
from src.ai.agents.extractor import extraction_call
from src.ai.models.codice_extractor import ResultExtracted
from src.ai.prompts.codice_extractor import INPUT_PROMPT, SYSTEM_PROMPT
from src.preprocessing.codice_mapping import mapping_licences

text = documents["Codice Galattico.pdf"]

licence_to_techniques = extraction_call(text=text,
                                        model_name="gpt-4.1",
                                        system_prompt=SYSTEM_PROMPT,
                                        input_prompt=INPUT_PROMPT,
                                        output_cls=ResultExtracted)

licence_to_techniques_dict = mapping_licences(licence_to_techniques)

write_json(licence_to_techniques_dict, artifacts_file_path / "licence_to_techniques.json")

# Engine

## Agente conversazionale / interrogazioni

In [None]:
from src.ai.prompts.hard_engine import SYSTEM_PROMPT

categories_str = "\n- ".join(list(category_techniques_mapped.keys()))
system_prompt = SYSTEM_PROMPT.format(categories=categories_str)

In [None]:
from src.ai.agents.engine_hard import get_agent, query_dish_ids

agent = get_agent(model_name="grok-4-1-fast-reasoning",system_prompt=system_prompt)
response = query_dish_ids(question="Quali piatti sono stati creati utilizzando almeno una tecnica di SURGELAMENTO secondo il Manuale di Cucina di Sirius Cosmo e sono stati serviti in un ristorante situato entro un raggio di 317 anni luce dal pianeta Krypton, Krypton incluso?", agent=agent)
print(response)

## Valutazione

In [None]:
from src.evaluation.questions_evaluation import evaluate_questions

question_path = dataset_file_path / "domande.csv"
ground_truth_path = dataset_file_path / "ground_truth" / "ground_truth_mapped.csv"
eval_df = evaluate_questions(agent=agent, question_path=question_path, ground_truth_path=ground_truth_path, level="all")
eval_df.to_csv(artifacts_file_path / "hard_questions_evaluation_results.csv", index=False)

In [None]:
score =  round(eval_df['score'].mean()*100, 2)
print(f"Accuracy: {score}%")