# Contract Processor Notebook

This notebook demonstrates the workflow for extracting structured data from a contract PDF using functions from `utils.py` and Google's Generative AI.

In [None]:
# === Imports ===
import os
import google.generativeai as genai
import json
import logging

# Import specific functions and exceptions from utils.py
from utils import (
    read_pdf, 
    build_llm_prompt, 
    parse_llm_response, 
    PDFReadError, 
    JSONParsingError, 
    LLMConfigurationError, 
    LLMGenerationError,
    MODEL_NAME # Import the model name constant
)

# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

## 1. Configuration

Set the path to your contract PDF and configure the Google Generative AI API key.

**Important:** Replace `'YOUR_API_KEY'` with your actual key or load it securely (e.g., from environment variables using `os.getenv('GOOGLE_API_KEY')`). Also, update `contract_pdf_path` to point to your contract file.

In [None]:
# --- Configuration --- 
contract_pdf_path = 'data/Acme Corp_Lore Agreement_Oct 2023.pdf' # <<< --- CHANGE THIS TO YOUR PDF PATH ---
api_key = os.getenv('GOOGLE_API_KEY') # <<< --- LOAD FROM ENVIRONMENT VARIABLE (RECOMMENDED) ---

# Or uncomment and set manually (less secure):
# api_key = 'YOUR_API_KEY' 

if not api_key:
    logging.warning("API key not found in environment variables. Please set GOOGLE_API_KEY or uncomment the manual assignment.")
    # Optionally raise an error or prompt the user
    # raise ValueError("Google API Key is required.")

# Configure the generative AI client
if api_key:
    try:
        genai.configure(api_key=api_key)
        logging.info("Google Generative AI configured successfully.")
    except Exception as e:
        logging.error(f"Failed to configure Google Generative AI: {e}")
        # Raise a specific error for configuration issues
        raise LLMConfigurationError(f"Failed to configure Google Generative AI: {e}") from e
else:
    logging.error("API Key not configured. Cannot proceed with LLM interaction.")
    # Handle missing API key appropriately for notebook execution
    print("ERROR: Google API Key not configured. Please set it in the cell above.")

## 2. Read Contract PDF

Use the `read_pdf` function from `utils.py` to extract text from the specified PDF file.

In [None]:
contract_text = None
if os.path.exists(contract_pdf_path):
    try:
        logging.info(f"Reading PDF: {contract_pdf_path}")
        contract_text = read_pdf(contract_pdf_path)
        logging.info(f"Successfully read {len(contract_text)} characters from the PDF.")
        # print(f"Contract Text Snippet:\n{contract_text[:500]}...") # Uncomment to view snippet
    except PDFReadError as e:
        logging.error(f"Error reading PDF: {e}")
        print(f"Error reading PDF: {e}")
    except Exception as e:
        logging.error(f"An unexpected error occurred during PDF reading: {e}", exc_info=True)
        print(f"An unexpected error occurred during PDF reading: {e}")
else:
    logging.error(f"PDF file not found at: {contract_pdf_path}")
    print(f"Error: PDF file not found at '{contract_pdf_path}'. Please update the 'contract_pdf_path' variable in the configuration cell.")

## 3. Build LLM Prompt

Use the `build_llm_prompt` function to construct the prompt containing the contract text and JSON instructions for the LLM.

In [None]:
llm_prompt = None
if contract_text:
    try:
        logging.info("Building LLM prompt...")
        llm_prompt = build_llm_prompt(contract_text)
        logging.info("LLM prompt built successfully.")
        # print(f"Prompt Snippet:\n{llm_prompt[:500]}...") # Uncomment to view snippet
    except Exception as e:
        logging.error(f"Error building LLM prompt: {e}", exc_info=True)
        print(f"An unexpected error occurred building the LLM prompt: {e}")
else:
    logging.warning("Contract text not available. Cannot build LLM prompt.")
    print("Skipping prompt generation as contract text was not read.")

## 4. Generate Content with LLM

Initialize the Generative Model and call it with the prompt to get the structured data response.

In [None]:
llm_response_text = None
if llm_prompt and api_key: # Ensure prompt exists and API key is configured
    try:
        logging.info(f"Initializing model: {MODEL_NAME}")
        model = genai.GenerativeModel(MODEL_NAME)
        
        logging.info("Sending request to the LLM...")
        # Consider adding generation_config for safety settings, temperature, etc.
        # generation_config = genai.types.GenerationConfig(...) 
        response = model.generate_content(
            llm_prompt, 
            # generation_config=generation_config
        )
        
        # Check for safety ratings or blocks if necessary
        # if response.prompt_feedback.block_reason:
        #    logging.error(f"Request blocked: {response.prompt_feedback.block_reason}")
        #    raise LLMGenerationError(f"LLM request blocked: {response.prompt_feedback.block_reason}")

        llm_response_text = response.text
        logging.info("LLM response received.")
        # print(f"Raw LLM Response Snippet:\n{llm_response_text[:500]}...") # Uncomment to view

    except Exception as e:
        logging.error(f"Error during LLM content generation: {e}", exc_info=True)
        # Catch potential API errors, configuration errors etc.
        print(f"An error occurred interacting with the LLM: {e}")
        # Consider re-raising as LLMGenerationError or handling specific API exceptions
        # raise LLMGenerationError(f"LLM generation failed: {e}") from e
        
elif not api_key:
    logging.error("Cannot generate content: API key not configured.")
    print("Skipping LLM generation as API key is missing.")
else:
    logging.warning("LLM prompt not available. Skipping LLM generation.")
    print("Skipping LLM generation as the prompt was not created.")

## 5. Parse LLM Response

Use the `parse_llm_response` function to extract the clean JSON data from the raw text response provided by the LLM.

In [None]:
extracted_data = None
if llm_response_text:
    try:
        logging.info("Parsing LLM response...")
        extracted_data = parse_llm_response(llm_response_text)
        logging.info("LLM response parsed successfully.")
        
        # --- Final Output --- 
        print("--- Extracted Contract Data (JSON) ---")
        print(json.dumps(extracted_data, indent=2))
        print("--- End Extracted Data ---")
        
    except JSONParsingError as e:
        logging.error(f"Failed to parse JSON from LLM response: {e}")
        print(f"Error parsing JSON from LLM response: {e}")
        print("--- Raw LLM Response ---")
        print(llm_response_text)
        print("--- End Raw Response ---")
    except Exception as e:
        logging.error(f"An unexpected error occurred during response parsing: {e}", exc_info=True)
        print(f"An unexpected error occurred during response parsing: {e}")
else:
    logging.warning("LLM response text not available. Skipping parsing.")
    print("Skipping response parsing as no text was received from the LLM.")

## 6. Workflow Complete

If successful, the `extracted_data` variable holds the final JSON structure parsed from the contract.