<a href="https://colab.research.google.com/github/m-zayed5722/Miscellaneous-Projects/blob/main/LLM_Enricher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import requests
import time
from typing import Dict, List, Optional, Any, Union
from dataclasses import dataclass
import logging
from src.schema import MenuItem, MenuSchema
from pydantic import ValidationError


@dataclass
class LLMConfig:
    """Configuration for LLM integration"""
    model_name: str = "llama2"  # Default Ollama model
    base_url: str = "http://localhost:11434"
    timeout: int = 600  # 10 minutes timeout for large models
    max_retries: int = 3
    temperature: float = 0.1  # Low temperature for consistent structured output


class OllamaClient:
    """Client for interacting with Ollama API"""

    def __init__(self, config: LLMConfig = None):
        self.config = config or LLMConfig()
        self.logger = logging.getLogger(__name__)

    def is_available(self) -> bool:
        """Check if Ollama server is available"""
        try:
            response = requests.get(f"{self.config.base_url}/api/tags", timeout=5)
            return response.status_code == 200
        except Exception as e:
            self.logger.warning(f"Ollama not available: {e}")
            return False

    def list_models(self) -> List[str]:
        """List available models"""
        try:
            response = requests.get(f"{self.config.base_url}/api/tags", timeout=5)
            if response.status_code == 200:
                data = response.json()
                return [model['name'] for model in data.get('models', [])]
        except Exception as e:
            self.logger.error(f"Error listing models: {e}")
        return []

    def generate(self, prompt: str, model: str = None) -> Optional[str]:
        """Generate text using Ollama"""
        model = model or self.config.model_name

        payload = {
            "model": model,
            "prompt": prompt,
            "stream": False,
            "options": {
                "temperature": self.config.temperature
            }
        }

        for attempt in range(self.config.max_retries):
            try:
                self.logger.info(f"Sending request to {model} (timeout: {self.config.timeout}s)...")
                start_time = time.time()

                response = requests.post(
                    f"{self.config.base_url}/api/generate",
                    json=payload,
                    timeout=self.config.timeout
                )

                elapsed_time = time.time() - start_time
                self.logger.info(f"Response received in {elapsed_time:.2f} seconds")

                if response.status_code == 200:
                    result = response.json()
                    return result.get('response', '').strip()
                else:
                    self.logger.warning(f"Ollama API error: {response.status_code} - {response.text}")

            except requests.exceptions.Timeout as e:
                self.logger.warning(f"Attempt {attempt + 1} timed out after {self.config.timeout}s: {e}")
                if attempt < self.config.max_retries - 1:
                    self.logger.info("Retrying with exponential backoff...")
                    time.sleep(2 ** attempt)  # Exponential backoff
            except Exception as e:
                self.logger.warning(f"Attempt {attempt + 1} failed: {e}")
                if attempt < self.config.max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff

        return None


class LLMEnricher:
    """LLM-based menu item enricher"""

    def __init__(self, config: LLMConfig = None):
        self.config = config or LLMConfig()
        self.client = OllamaClient(config)
        self.schema = MenuSchema()
        self.logger = logging.getLogger(__name__)

        # System prompt template
        self.system_prompt = """You are an expert food categorization assistant. Your task is to analyze menu item names and return structured, clean information.

