# Secure PDF Bill of Materials (BoM) Analyzer (via ScopePDF API)

Welcome! This Google Colab notebook provides an interface to analyze Bill of Materials (BoM) data extracted from PDF files using the **ScopePDF API**.

**‚ö†Ô∏è SECURITY NOTE ‚ö†Ô∏è**
* This notebook requires you to enter your assigned **Username** and **ScopePDF API Key**.
* Keep your API Key confidential. Do not share it or save it directly in the notebook.

**What you can do here:**
* Securely provide your credentials (Username & API Key).
* Upload a PDF file containing a Bill of Materials.
* Send the PDF securely to the ScopePDF API for analysis.
* View the extracted materials data directly in the notebook.
* See your current usage count against your limit.
* Download the extracted data as a CSV file.

**Security:**
* Your API Key is entered using a secure prompt and is not stored in the notebook file.
* Data is transferred securely via HTTPS.

**Prerequisites:**
* You need an assigned **Username** and a **ScopePDF API Key**. 
* These credentials are required to authenticate your requests to the ScopePDF API and track usage.
* **How to get credentials:** Please contact the administrator or use the designated Scope4 channel (e.g., request forms) to obtain your Username and API Key.

*For questions or support, contact tommaso@scope4.dev.*

## 1. Setup and Authentication

Run this first code cell. It imports necessary libraries and handles the authentication process. You will be prompted for your Username and API Key.

In [None]:
import os
import json
import base64
import pandas as pd
import requests
from google.colab import files, output # type: ignore
import ipywidgets as widgets # type: ignore
from IPython.display import display, HTML, Javascript # type: ignore
import getpass

SCOPEPDF_API_URL = "https://europe-west1-lcabench-api.cloudfunctions.net/process_bom_from_colab_secure"

colab_username = None
scopepdf_api_key = None

def get_credentials():
    global colab_username, scopepdf_api_key
    if SCOPEPDF_API_URL == "YOUR_HTTPS_TRIGGER_URL_FROM_GOOGLE_CLOUD_FUNCTION_HERE" or not SCOPEPDF_API_URL:
        display(HTML("<p style='color:red; font-weight:bold;'>ADMIN ACTION REQUIRED: Please set the 'SCOPEPDF_API_URL' in the Colab script.</p>"))
        return False
    display(HTML("<h4>üîë API Authentication & User Identification</h4>"))
    try:
        entered_username = input("Enter your assigned Username: ")
        if not entered_username:
            display(HTML("<p style='color:red; font-weight:bold;'>Username cannot be empty.</p>"))
            colab_username = None
            scopepdf_api_key = None
            return False
        colab_username = entered_username.strip()
        entered_key = getpass.getpass("Enter your ScopePDF API Key: ")
        if not entered_key:
             display(HTML("<p style='color:red; font-weight:bold;'>API Key cannot be empty.</p>"))
             colab_username = None
             scopepdf_api_key = None
             return False
        scopepdf_api_key = entered_key
        display(HTML("<p style='color:green;'>Username and API Key received (API Key hidden for security).</p><hr>"))
        return True
    except Exception as e:
        display(HTML(f"<p style='color:red; font-weight:bold;'>An error occurred while entering credentials: {e}</p>"))
        colab_username = None
        scopepdf_api_key = None
        return False

file_upload_widget = None
process_button_widget = None
output_display_area = widgets.Output()

def on_process_button_clicked(b):
    global colab_username, scopepdf_api_key
    with output_display_area:
        output_display_area.clear_output(wait=True)
        if not colab_username or not scopepdf_api_key:
            display(HTML("<p style='color:red; font-weight:bold;'>Error: Username or API key missing. Please re-run the setup cell (Cell 1).</p>"))
            return
        if SCOPEPDF_API_URL == "YOUR_HTTPS_TRIGGER_URL_FROM_GOOGLE_CLOUD_FUNCTION_HERE" or not SCOPEPDF_API_URL:
            display(HTML("<p style='color:red; font-weight:bold;'>Error: ScopePDF API URL is not configured in the notebook. Please contact the administrator.</p>"))
            return
        if not file_upload_widget or not file_upload_widget.value:
            display(HTML("<p style='color:orange;'>Please upload a PDF file first.</p>"))
            return
        uploaded_file_data_dict = file_upload_widget.value
        if not isinstance(uploaded_file_data_dict, dict) or not uploaded_file_data_dict:
            display(HTML("<p style='color:red;'>Error reading uploaded file data.</p>"))
            return
        try:
            first_file_name = list(uploaded_file_data_dict.keys())[0]
            uploaded_file_info = uploaded_file_data_dict[first_file_name]
            pdf_filename = uploaded_file_info['metadata']['name']
            pdf_content_bytes = uploaded_file_info['content']
            display(HTML(f"<p>Sending '{pdf_filename}' to ScopePDF API for analysis (User: {colab_username})... This may take a moment.</p>"))
            base64_encoded_pdf = base64.b64encode(pdf_content_bytes).decode('utf-8')
            payload_to_api = {
                "username": colab_username,
                "api_key": scopepdf_api_key,
                "pdf_filename": pdf_filename,
                "pdf_base64_content": base64_encoded_pdf
            }
            request_headers = {
                "Content-Type": "application/json",
            }
            response_from_api = requests.post(
                SCOPEPDF_API_URL,
                json=payload_to_api,
                headers=request_headers,
                timeout=660
            )
            response_from_api.raise_for_status()
            api_result = response_from_api.json()
            if api_result.get("status") == "success" and "extracted_data" in api_result:
                display(HTML("<h4 style='color:green;'>Analysis Successful!</h4>"))
                extracted_data = api_result["extracted_data"]
                materials = extracted_data.get("materials", [])
                if "usage_info" in api_result:
                     usage = api_result["usage_info"].get("count", "?")
                     limit = api_result["usage_info"].get("limit", "?")
                     display(HTML(f"<p><i>Usage for {colab_username}: {usage}/{limit} analyses used.</i></p>"))
                if isinstance(materials, list) and materials:
                    df = pd.DataFrame(materials)
                    display(HTML("<h5>Extracted Materials:</h5>"))
                    display(df)
                    safe_base_filename = "".join(c if c.isalnum() or c in ('_', '-') else '_' for c in os.path.splitext(pdf_filename)[0])
                    csv_filename = f"{safe_base_filename}_extracted_materials.csv"
                    df.to_csv(csv_filename, index=False)
                    display(HTML(f"<p>File '{csv_filename}' has been generated and saved. You can download it from the 'Files' panel on the left-hand side of the Colab interface.</p>"))
                elif not materials:
                    display(HTML("<p>No materials were extracted from the PDF according to the ScopePDF API analysis.</p>"))
                else:
                    display(HTML("<p style='color:orange;'>The 'materials' data from the ScopePDF API was not in the expected list format or was empty.</p>"))
            elif "error" in api_result:
                error_msg = api_result.get("error", "Unknown error from ScopePDF API.")
                details_msg = api_result.get("details", "")
                raw_output_msg = api_result.get("raw_llm_output", "")
                display(HTML(f"<p style='color:red; font-weight:bold;'>ScopePDF API Error: {error_msg}</p>"))
                if details_msg: display(HTML(f"<p style='color:red;'>Details: {str(details_msg)}</p>"))
                if raw_output_msg: display(HTML(f"<p style='color:red;'>Raw LLM (from API - snippet): {str(raw_output_msg)[:500]}...</p>"))
            else:
                display(HTML("<p style='color:red;'>Received an unexpected response structure from the ScopePDF API.</p>"))
                display(HTML(f"<pre>{json.dumps(api_result, indent=2)}</pre>"))
        except requests.exceptions.HTTPError as http_err:
            error_text = f"HTTP Error communicating with ScopePDF API: {http_err}."
            if http_err.response is not None:
                try:
                    error_details = http_err.response.json()
                    api_error_msg = error_details.get("error", json.dumps(error_details))
                    api_details = error_details.get("details", "")
                    error_text += f" Server status: {http_err.response.status_code}. API said: {api_error_msg}"
                    if api_details: error_text += f" Details: {api_details}"
                except json.JSONDecodeError:
                    error_text += f" Server status: {http_err.response.status_code}. API response: {http_err.response.text}"
            else:
                 error_text += " No response received from API."
            display(HTML(f"<p style='color:red;'>{error_text}</p>"))
        except requests.exceptions.RequestException as req_err:
            display(HTML(f"<p style='color:red;'>Network error communicating with ScopePDF API: {req_err}</p>"))
        except Exception as e:
            display(HTML(f"<p style='color:red;'>An unexpected error occurred in the Colab notebook: {type(e).__name__} - {e}</p>"))
        finally:
            if file_upload_widget and hasattr(file_upload_widget, 'value') and file_upload_widget.value:
                try:
                    file_upload_widget.value.clear()
                    file_upload_widget._counter = getattr(file_upload_widget, '_counter', 0) + 1
                except Exception as e_clear:
                    print(f"Non-critical error resetting upload widget: {e_clear}")

def initialize_interface():
    global file_upload_widget, process_button_widget
    if get_credentials():
        display(HTML("<h2>2. Upload and Analyze PDF</h2>"))
        display(HTML("<h3>Upload your BoM PDF File:</h3>"))
        file_upload_widget = widgets.FileUpload(
            accept='.pdf',
            multiple=False,
            description='Upload BoM PDF'
        )
        display(file_upload_widget)
        display(HTML("<h3>Start Secure Analysis:</h3>"))
        process_button_widget = widgets.Button(
            description="Analyze PDF via API",
            button_style='success',
            tooltip='Upload a PDF and click to start the secure analysis via ScopePDF API',
            icon='rocket'
        )
        process_button_widget.on_click(on_process_button_clicked)
        display(process_button_widget)
        display(HTML("<hr><h3>Results:</h3>"))
        display(output_display_area)
    else:
        pass

initialize_interface()
