<a href="https://colab.research.google.com/github/mmistroni/Magentic-AlgoTrading101/blob/main/Restaurant_Menu_Cost_Calculator_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### This is an Experiment to leverage Multimodal LLM to calculate how much does it cost to dine in Gabicce.
The notebook will load images of restaurants menus and i will ask LLM to calculate how much does it cost for a family of 3 and a standard meal to eat in a restaurant in Gabicce Mare (Italy)
I have downloaded menu images and stored them on my google drive.
I then generate a base64 representation of each image and ask the model to interpret it.

The next step will be to extract structured data from the image, load them
in a vector store and perform some queries

In [None]:
# Mounting the drive

import google.generativeai as genai
import os
import base64
import io
from google.colab import drive

# --- Step 1: Mount Google Drive ---
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted successfully.")

In [None]:
# --- Step 2: Define the path to your images in Google Drive ---
# IMPORTANT: Update this path to where your images are stored in your Google Drive.
# Example: '/content/drive/MyDrive/MyImagesFolder'
image_folder_path = '/content/drive/MyDrive/Menus' # <--- UPDATE THIS PATH


### Use Cell below to generate base64 representation of your images

In [None]:
# This cell will generat a base64 so that i can embed all the images in my notebook. you dont need to run it for my example, but if you want to load
# your own images you will need to
# 1. load images in a folder called menus  - see folder on the right
# 2. run the cell below that will output a a base64 image
# 3. copy the output in each new cell
import base64
import os
from IPython.display import Image, display, Markdown

# Define the directory where your images are located
image_directory = image_folder_path#'menus'

# --- Step 1: List all image files in the specified directory ---
image_files = []


encoded_images = []


try:
    # Check if the directory exists
    if not os.path.exists(image_directory):
        print(f"Error: Directory '{image_directory}' not found in the current Colab session.")
        print("Please ensure you have uploaded your 'menus' folder correctly.")
    else:
        # Filter for common image extensions
        for filename in os.listdir(image_directory):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
                image_files.append(filename)

    if not image_files:
        print(f"No image files found in '{image_directory}'. Please check the folder content.")
    else:
        print(f"Found {len(image_files)} image(s) in '{image_directory}': {', '.join(image_files)}")

except Exception as e:
    print(f"An error occurred while listing files: {e}")

# --- Step 2: Process and output Base64 for each found image ---
if image_files:
    print("\n--- Generating Base64 Markdown for all found images: ---")
    print("-----------------------------------------------------------------")

    for selected_filename in image_files:
        full_image_path = os.path.join(image_directory, selected_filename)

        print(f"\nProcessing '{selected_filename}'...")

        # --- Step 3: Read and Base64 encode the selected image ---
        try:
            with open(full_image_path, 'rb') as img_file:
                encoded_string = base64.b64encode(img_file.read()).decode('utf-8')

            # Determine MIME type based on file extension
            mime_type = "image/png" # Default
            if selected_filename.lower().endswith(('.jpg', '.jpeg')):
                mime_type = "image/jpeg"
            elif selected_filename.lower().endswith('.gif'):
                mime_type = "image/gif"
            elif selected_filename.lower().endswith('.bmp'):
                mime_type = "image/bmp"
            elif selected_filename.lower().endswith('.webp'):
                mime_type = "image/webp"

            # --- Step 4: Generate Markdown for embedding ---
            # You can customize the Alt Text here if needed
            alt_text = f"Image: {selected_filename}"
            markdown_code = f"![{alt_text}](data:{mime_type};base64,{encoded_string})"

            encoded_images.append((selected_filename, mime_type, encoded_string))


        except FileNotFoundError:
            print(f"Error: Image file '{full_image_path}' not found. It might have been deleted.")
        except Exception as e:
            print(f"An error occurred during encoding or display for '{selected_filename}': {e}")
else:
    print("\nNo images available to process. Please upload images to the 'menus' folder.")

from pprint import pprint
pprint(encoded_images)




### Calling model to extract menu from images

In [None]:
from re import M
import base64
import requests
import json
from PIL import Image
import io
from google.colab import userdata

# --- Configuration ---
# Replace with your actual Gemini API Key.
# In a real environment, you'd load this securely (e.g., from environment variables).
# For this Canvas environment, the __api_key__ variable will be provided at runtime.
API_KEY = userdata.get('GOOGLE_API_KEY') # Leave this empty, Canvas will inject the API key.
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

# --- Helper Function: Image to Base64 ---
# --- Function: Extract Menu Items using Gemini API ---
def extract_menu_from_image(base64_image_data, prompt_text):
    """
    Calls the Gemini API to extract menu items and prices from an image.
    It uses a structured response schema to get JSON output.
    """
    if not base64_image_data:
        return {"error": "No image data provided."}

    headers = {
        "Content-Type": "application/json",
    }

    # Define the structured schema for the response
    response_schema = {
        "type": "ARRAY",
        "items": {
            "type": "OBJECT",
            "properties": {
                "item": {"type": "STRING"},
                "price": {"type": "NUMBER"}
            },
            "required": ["item", "price"]
        }
    }

    payload = {
        "contents": [
            {
                "role": "user",
                "parts": [
                    {"text": prompt_text},
                    {
                        "inlineData": {
                            "mimeType": "image/jpeg", # Adjust mimeType if you save as PNG
                            "data": base64_image_data
                        }
                    }
                ]
            }
        ],
        "generationConfig": {
            "responseMimeType": "application/json",
            "responseSchema": response_schema
        }
    }

    try:
        response = requests.post(f"{GEMINI_API_URL}?key={API_KEY}", headers=headers, data=json.dumps(payload))
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
        result = response.json()

        if result.get('candidates') and result['candidates'][0].get('content') and result['candidates'][0]['content'].get('parts'):
            # The API returns the JSON as a string within the 'text' field
            json_string = result['candidates'][0]['content']['parts'][0]['text']
            # Parse the JSON string into a Python object
            parsed_json = json.loads(json_string)
            return parsed_json
        else:
            print("Unexpected API response structure:", result)
            return {"error": "Could not extract menu items. Unexpected API response."}

    except requests.exceptions.RequestException as e:
        print(f"API request failed: {e}")
        return {"error": f"API request failed: {e}"}
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON response: {e}")
        print(f"Raw response text: {response.text}")
        return {"error": f"Failed to decode JSON response: {e}"}
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return {"error": f"An unexpected error occurred: {e}"}

# --- Function: Calculate Meal Cost ---
def calculate_meal_cost(extracted_menu, desired_items, num_people, price_increase_percent, coperto_per_person=2.50):
    """
    Calculates the total estimated cost for a meal based on extracted menu prices,
    desired items, number of people, and a price increase percentage.
    """
    total_food_cost = 0.0
    print("\n--- Calculating Meal Cost ---")
    print(f"Desired items for {num_people} people:")

    for item_name, quantity in desired_items.items():
        found_price = None
        # Try to find the item in the extracted menu (case-insensitive, partial match)
        for menu_item in extracted_menu:
            if item_name.lower() in menu_item['item'].lower():
                found_price = menu_item['price']
                break

        if found_price is not None:
            cost_for_item = found_price * quantity
            total_food_cost += cost_for_item
            print(f"- {quantity}x {item_name}: €{found_price:.2f} each -> €{cost_for_item:.2f}")
        else:
            print(f"- Warning: '{item_name}' not found in the extracted menu. Skipping this item.")
            # For items not found, we'll use a reasonable average from previous context
            # This is a fallback if the OCR/LLM misses something or if the item is generic
            if "patatine fritte" in item_name.lower():
                found_price = 4.50
            elif "insalata" in item_name.lower():
                found_price = 4.50
            elif "primi piatti di pasta" in item_name.lower() or "pasta" in item_name.lower():
                found_price = 15.00
            elif "pizza" in item_name.lower():
                found_price = 10.00
            else:
                found_price = 0.0 # Default to 0 if no reasonable fallback

            if found_price > 0:
                cost_for_item = found_price * quantity
                total_food_cost += cost_for_item
                print(f"  (Using estimated price: €{found_price:.2f} for {item_name} -> €{cost_for_item:.2f})")


    print(f"\nSubtotal for food before increase: €{total_food_cost:.2f}")

    # Apply price increase
    increased_food_cost = total_food_cost * (1 + price_increase_percent / 100)
    print(f"Subtotal for food after {price_increase_percent}% increase: €{increased_food_cost:.2f}")

    # Add coperto
    total_coperto_cost = coperto_per_person * num_people
    print(f"Coperto ({coperto_per_person:.2f} per person for {num_people} people): €{total_coperto_cost:.2f}")

    final_total_cost = increased_food_cost + total_coperto_cost
    print(f"\nEstimated total cost for {num_people} people: €{final_total_cost:.2f}")

    return final_total_cost

# --- Main Execution ---
# --- Step 1: Prepare the Image ---
# We'll use one of the previously uploaded images.
# In a real RAG scenario, you'd retrieve the image data directly from your store.
# For demonstration, ensure 'menu maremosso temporaneo portrait.pages.png' is accessible
# in the same directory as this notebook, or provide its full path.
#image_file_path = "menu maremosso temporaneo portrait.pages.png" # Example image
#print(f"Loading image from: {image_file_path}")
#base64_image = encoded_images[0][2]
menus = {}
for title, typ, base_64_image in encoded_images:
    # --- Step 2: Extract Menu Items using Gemini  need to loop through all imagers---
    print("\n--- Calling Gemini API to extract menu items ---")
    prompt = "Extract all menu items and their prices from this image. Provide the output as a JSON array of objects, where each object has 'item' (string) and 'price' (number) keys. If a price is not explicitly stated, use 0.0. Do not include any introductory or concluding text, just the JSON array."
    extracted_menu_data = extract_menu_from_image(base_64_image, prompt)
    menus[title] = extracted_menu_data


for key, men in menus.items():
  print(f'----------------- {key}-------------')
  from pprint import pprint
  pprint(men)



### Creating Vector Store

In [None]:
!pip install chromadb sentence-transformers
!pip install langchain_google_vertexai
!pip install langchain_community
!pip install langchain
!pip install --quiet langchain chromadb sentence-transformers openai langchain-openai

In [None]:
import chromadb
from chromadb.utils import embedding_functions
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
CHROMA_DB_PATH = "./chroma_db_data_openai"
COLLECTION_NAME = "restaurant_menus_openai"
# --- 1. Initialize Embedding Functions (for both populating and querying) ---
# For populating ChromaDB directly:
# The OpenAIEmbeddingFunction for ChromaDB's native API will automatically use OPENAI_API_KEY from os.environ
openai_chroma_ef = embedding_functions.OpenAIEmbeddingFunction(
    model_name="text-embedding-ada-002" # or "text-embedding-3-small", "text-embedding-3-large"
)

# For LangChain to generate query embeddings:
# LangChain's OpenAIEmbeddings wrapper also automatically uses OPENAI_API_KEY from os.environ
langchain_embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002" # Match the model used for population
)



In [None]:

# 3. Prepare texts and metadatas for the vector store
texts = []
metadatas = []
ids = [] # Unique IDs for each entry (optional but good practice)
current_id = 0

for key, restaurant_menu in menus.items():
  print(f'----------------- {key}-------------')
  restaurant_name = key.split('.')[0]
  # Add Pizza Palace menu items
  for dish in restaurant_menu:
    text_content = f"{dish['item']} - {dish['price']}" # You can choose how to represent the item as text
    texts.append(text_content)
    metadatas.append({"restaurant_name": f"{restaurant_name}", "item_name": dish['item'], "price": dish['price']})
    ids.append(f"{restaurant_name}_{current_id}")
    current_id += 1

# --- 3. Initialize Persistent ChromaDB client and collection (POPULATION PHASE) ---
# Use PersistentClient so the data isn't lost within the Colab session
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)

# Delete collection if it already exists (for clean re-runs during development)
try:
    client.delete_collection(name=COLLECTION_NAME)
    print(f"Deleted existing collection: {COLLECTION_NAME}")
except Exception as e:
    print(f"Collection '{COLLECTION_NAME}' did not exist or could not be deleted: {e}")
    pass # Collection might not exist yet, so we pass

# Create the collection, passing the SentenceTransformerEmbeddingFunction instance
collection = client.create_collection(
    name=COLLECTION_NAME,
    embedding_function=openai_chroma_ef # Pass the instance here
)

# Add data to the vector store (only if not already populated)
# Checking count() can be a bit slow for very large collections, but fine for this purpose.
if collection.count() == 0: # Check if collection is empty
    collection.add(
        documents=texts,
        metadatas=metadatas,
        ids=ids
    )
    print(f"Successfully added {len(texts)} menu items to the vector store.")
else:
    print(f"Collection '{COLLECTION_NAME}' already contains {collection.count()} items. Skipping re-population.")


print("\n--- ChromaDB Population Complete ---")
print(f"ChromaDB data stored at: {CHROMA_DB_PATH}")



### Query for Margherita

### Kicking off Langchain Pipeline

In [None]:
import os
from google.colab import userdata # Import userdata to access Colab Secrets

# Retrieve the OpenAI API Key from Colab Secrets
# The key name here must match the name you set in the Secrets tab (e.g., OPENAI_API_KEY)
openai_api_key = userdata.get('OPENAI_API_KEY')

# Set it as an environment variable for LangChain and OpenAI libraries to pick up
os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["CHROMA_OPENAI_API_KEY"] = openai_api_key

In [None]:
# --- LangChain Integration (RETRIEVAL & GENERATION PHASE) ---

# 4. Initialize OpenAI LLM
try:
    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        temperature=0.3,
    )
    print(f"Successfully initialized OpenAI LLM: {llm.model_name}")
except Exception as e:
    print(f"Error initializing OpenAI LLM. Make sure your OPENAI_API_KEY is correct and accessible. Error: {e}")
    exit()

# 5. Load your ChromaDB collection into LangChain's Chroma wrapper
langchain_chroma_vectorstore = Chroma(
    client=client,
    collection_name=COLLECTION_NAME,
    embedding_function=langchain_embeddings
)
print(f"Successfully loaded ChromaDB collection '{COLLECTION_NAME}' into LangChain.")

# 6. Create a Retriever from the LangChain Chroma vector store
retriever = langchain_chroma_vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)
print("Retriever created.")

In [None]:
print("\n--- Testing RetrievalQA Chain ---")
query1 = "Which restaurants are there in gabicce ?"
response1 = qa_chain.invoke({"query": query1})
print(f"Question: {query1}")
print(f"Answer: {response1['result']}")
if response1.get('source_documents'):
    print("Source Documents:")
    for doc in response1['source_documents']:
        print(f"  - Content: '{doc.page_content}' | Metadata: {doc.metadata}")
print("-" * 50)


In [None]:
# 7. Define a custom prompt template for RAG
rag_prompt_template = """You are a helpful assistant that answers questions about restaurant menus.
Use the following pieces of context to answer the user's question.
If the context does not contain the answer, state that you don't know based on the provided information.
Provide the restaurant name, item, and price if available.

Context:
{context}

Question: {question}
Answer:"""

RAG_PROMPT = PromptTemplate.from_template(rag_prompt_template)

# 8. Build the LangChain RAG Chain

# Option A: Simple RetrievalQA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff", # 'stuff' concatenates all retrieved documents into the prompt
    retriever=retriever,
    return_source_documents=True, # Will return the documents used to generate the answer
    chain_type_kwargs={"prompt": RAG_PROMPT}
)
print("RetrievalQA chain created.")

# Option B: Conversational Retrieval Chain (for multi-turn Q&A)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversational_qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": RAG_PROMPT} # Use custom prompt for combining docs
)
print("ConversationalRetrievalChain created.")

# --- 9. Test your RAG application ---

print("\n--- Testing RetrievalQA Chain ---")
query1 = "What kind of pizzas do they serve across all restaurants?"
response1 = qa_chain.invoke({"query": query1})
print(f"Question: {query1}")
print(f"Answer: {response1['result']}")
if response1.get('source_documents'):
    print("Source Documents:")
    for doc in response1['source_documents']:
        print(f"  - Content: '{doc.page_content}' | Metadata: {doc.metadata}")
print("-" * 50)


In [None]:
print("\n--- Testing RetrievalQA Chain ---")
query1 = "Can you list all the pasta available across all restaurants?"
response1 = qa_chain.invoke({"query": query1})
print(f"Question: {query1}")
print(f"Answer: {response1['result']}")
if response1.get('source_documents'):
    print("Source Documents:")
    for doc in response1['source_documents']:
        print(f"  - Content: '{doc.page_content}' | Metadata: {doc.metadata}")
print("-" * 50)


In [None]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions
from statistics import mean # For calculating the average
# Now, to answer "What is the average price of pizza margherita?"

query_item_name = "Margherita" # Or "Pizza Margherita" to be more explicit if your data varied
# We'll query for items that are semantically similar to "Margherita"
# and also filter by the specific item name metadata for precision if needed.

# Step 1: Query ChromaDB to find all "Margherita" pizzas
# We use a broad query text and then filter by 'item_name' metadata.
# The 'item_name' metadata should be the exact name you want to match.

# Option 1: Direct Metadata Match (most precise if 'item_name' is exact)
# This assumes 'item_name' in your metadata is exactly "Margherita"
results_margherita_direct = collection.get(
    where={"item_name": "Margherita (Pom, mozzarella)"}, # Use the exact item name from your data
    include=['metadatas'] # Only need the metadata to get the price
)

prices_found_direct = [m['price'] for m in results_margherita_direct['metadatas']]

if prices_found_direct:
    average_price_margherita_direct = mean(prices_found_direct)
    print(f"\nAverage price of 'Margherita (Pom, mozzarella)' (direct match): £{average_price_margherita_direct:.2f}")
else:
    print(f"\nNo exact match for 'Margherita (Pom, mozzarella)' found in the database.")


# Option 2: Semantic Search + Partial Item Name Filter (more flexible)
# This might return "Margherita Pizza" or other variations if they existed
# We query for "pizza margherita" and then filter based on if "margherita" is in the item name
query_text_semantic = "pizza margherita"
results_margherita_semantic = collection.query(
    query_texts=[query_text_semantic],
    n_results=10, # Get enough results to catch variations
    # We can't directly use regex in `where` for text, but we can filter after retrieval.
    # For a more robust solution within Chroma, you might need `where_document` with `$contains`
    # if you want to search within the `documents` field.
    # For now, let's just get the results and filter by `item_name` content in Python.
    # We'll ensure the item_name contains "Margherita" or "margherita".
)

prices_found_semantic = []
if results_margherita_semantic and results_margherita_semantic['metadatas'] and results_margherita_semantic['metadatas'][0]:
    for metadata in results_margherita_semantic['metadatas'][0]:
        # Perform a case-insensitive check for "margherita" in the item_name
        if "margherita" in metadata.get('item_name', '').lower():
            prices_found_semantic.append(metadata['price'])

if prices_found_semantic:
    average_price_margherita_semantic = mean(prices_found_semantic)
    print(f"Average price of 'Pizza Margherita' (semantic search + item name check): £{average_price_margherita_semantic:.2f}")
else:
    print(f"No items semantically related to 'Pizza Margherita' with 'margherita' in their name found.")




### Testing Simple Model Access

In [None]:
from google.colab import userdata
import google.generativeai as genai


# --- Configuration ---
# Replace with your actual Gemini API Key.
# In a real environment, you'd load this securely (e.g., from environment variables).
# For this Canvas environment, the __api_key__ variable will be provided at runtime.
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY') # Leave this empty, Canvas will inject the API key.
genai.configure(api_key=GOOGLE_API_KEY)
try:
    # Initialize the Generative Model
    model = genai.GenerativeModel('gemini-2.5-pro')

    # Send a simple prompt
    prompt = "Hello, tell me something interesting."
    print(f"\nSending a test prompt to Gemini: '{prompt}'")
    response = model.generate_content(prompt)

    # Print the response to confirm functionality
    print("\n--- Gemini's Response ---")
    print(response.text)
    print("\nSUCCESS! Your API key appears to be working correctly with the Gemini API.")

except Exception as e:
    print(f"\nFAILED to make a call to the Gemini API: {e}")
    print("This usually means your API key is incorrect, invalid, or you haven't enabled the Gemini API for your project.")
    print("Double-check your API key and ensure the Generative Language API is enabled in your Google Cloud project.")

