<a href="https://colab.research.google.com/github/prem-cre/compilance/blob/main/compilance_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title üì¶ Install Dependencies
# Installing the specific SDKs needed for LangGraph and Google Gen AI (v1 SDK)
!pip install -q -U langgraph langchain-core google-genai reportlab tenacity

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m47.8/47.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m475.8/475.8 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m262.4/262.4 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.0/2.0 MB[0m [31m51.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:


import os
import time
from typing import TypedDict, Optional, List
from google import genai
from google.genai import types
from langgraph.graph import StateGraph, END
from IPython.display import Markdown, display
from google.colab import userdata, files
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type

# --- CONFIGURATION ---
# "gemini-2.5" does not exist yet. Using the stable 1.5 Flash.
# You can change this to "gemini-2.0-flash-exp" if you want the latest preview.
MODEL_ID = "gemini-2.5-flash-lite"

try:
    API_KEY = userdata.get("geminikey")
except:
    API_KEY = input("Please enter your Google API Key: ")

client = genai.Client(api_key=API_KEY)

# --- 1. DEFINE STATE ---
class ComplianceState(TypedDict):
    local_pdf_path: str
    user_content: str
    store_name: Optional[str]
    extracted_rules: Optional[str]
    compliance_report: str
    errors: List[str]

# --- 2. RETRY LOGIC (Crucial for 503 Errors) ---
@retry(
    stop=stop_after_attempt(4),
    wait=wait_exponential(multiplier=2, min=2, max=20),
    retry=retry_if_exception_type(Exception)
)
def call_gemini_with_retry(model, contents, config):
    return client.models.generate_content(
        model=model,
        contents=contents,
        config=config
    )

# --- 3. NODES ---

def node_upload_to_store(state: ComplianceState):
    print(f"\n--- 1. UPLOADING {state['local_pdf_path']} TO FILE SEARCH ---")

    # Verify file exists before trying to upload
    if not os.path.exists(state['local_pdf_path']):
        return {"errors": state.get('errors', []) + [f"File not found: {state['local_pdf_path']}"]}

    try:
        # Create Store
        store = client.file_search_stores.create(
            config={'display_name': f"compliance_store_{int(time.time())}"}
        )

        # Upload
        print("‚è≥ Uploading & Indexing (this may take a moment)...")
        upload_op = client.file_search_stores.upload_to_file_search_store(
            file_search_store_name=store.name,
            file=state['local_pdf_path'],
            config={
                'display_name': 'Compliance_Doc',
                'chunking_config': {
                    'white_space_config': {
                        'max_tokens_per_chunk': 512,
                        'max_overlap_tokens': 100
                    }
                }
            }
        )

        # Wait for completion
        while not upload_op.done:
            time.sleep(2)
            upload_op = client.operations.get(upload_op)

        print(f"‚úÖ Store Ready: {store.name}")
        return {"store_name": store.name}

    except Exception as e:
        return {"errors": state.get('errors', []) + [f"Upload failed: {str(e)}"]}

def node_extract_rules(state: ComplianceState):
    print("\n--- 2. EXTRACTING RULES ---")

    if not state.get("store_name"):
        return {"extracted_rules": "ERROR: No store created."}

    prompt = """
    You are a Senior Compliance Architect.
    Analyze the attached Policy Document in the file search store.

    Extract STRICT rules for:
    1. **Citation Format**: (e.g., IEEE, APA)
    2. **PII Rules**: (What data must be masked?)
    3. **Governance (GRPC)**: (Required terminology or prohibited phrases)
    4. **Formatting**: (Margins, fonts, etc.)

    Output a clean, structured list. Do not summarize generalities.
    """

    try:
        response = call_gemini_with_retry(
            model=MODEL_ID,
            contents=prompt,
            config=types.GenerateContentConfig(
                temperature=0.0,
                tools=[types.Tool(
                    file_search=types.FileSearch(
                        file_search_store_names=[state['store_name']]
                    )
                )]
            )
        )
        print("‚úÖ Rules Extracted.")
        return {"extracted_rules": response.text}

    except Exception as e:
        error_msg = f"Extraction failed: {str(e)}"
        print(f"‚ùå {error_msg}")
        return {"extracted_rules": error_msg, "errors": state.get('errors', []) + [error_msg]}

def node_verify_compliance(state: ComplianceState):
    print("\n--- 3. VERIFYING COMPLIANCE ---")

    rules = state.get('extracted_rules', "")
    if "ERROR" in rules or not rules:
        return {"compliance_report": "‚ö†Ô∏è SKIPPED: Rules could not be extracted."}

    user_input = state['user_content']

    system_instruction = """
    You are a specialized Compliance Engine.

    NEGATIVE CONSTRAINTS:
    1. **NO FACT CHECKING**: Ignore factual errors (e.g., wrong dates, wrong physics) unless they violate a specific rule.
    2. **STRICT RULE ADHERENCE**: Only flag violations of the provided extracted rules.
    """

    user_prompt = f"""
    --- EXTRACTED POLICY RULES ---
    {rules}

    --- USER CONTENT TO CHECK ---
    {user_input}

    --- TASK ---
    Generate a compliance report checking for PIIC, GRPC, and Citation violations.
    """

    try:
        response = call_gemini_with_retry(
            model=MODEL_ID,
            contents=user_prompt,
            config=types.GenerateContentConfig(
                system_instruction=system_instruction,
                temperature=0.1
            )
        )
        return {"compliance_report": response.text}

    except Exception as e:
        return {"errors": state.get('errors', []) + [f"Verification failed: {str(e)}"]}

def node_cleanup(state: ComplianceState):
    print("\n--- 4. CLEANUP ---")
    if state.get('store_name'):
        try:
            client.file_search_stores.delete(
                name=state['store_name'],
                config={'force': True} # FORCE delete is required for stores with files
            )
            print(f"üßπ Deleted Store: {state['store_name']}")
        except Exception as e:
            print(f"‚ö†Ô∏è Cleanup Warning: {e}")
    return {}

# --- 4. BUILD GRAPH ---
workflow = StateGraph(ComplianceState)
workflow.add_node("upload", node_upload_to_store)
workflow.add_node("extract", node_extract_rules)
workflow.add_node("verify", node_verify_compliance)
workflow.add_node("cleanup", node_cleanup)

workflow.set_entry_point("upload")
workflow.add_edge("upload", "extract")
workflow.add_edge("extract", "verify")
workflow.add_edge("verify", "cleanup")
workflow.add_edge("cleanup", END)
app = workflow.compile()

# --- 5. EXECUTION ---

# DEFINE YOUR FILE NAME HERE
target_pdf = "complex_compliance_rules.pdf"

# AUTO-UPLOAD CHECK
if not os.path.exists(target_pdf):
    print(f"‚ö†Ô∏è '{target_pdf}' not found in Colab files.")
    print("Please upload your PDF now:")
    uploaded = files.upload()
    if uploaded:
        target_pdf = list(uploaded.keys())[0]
        print(f"‚úÖ Using uploaded file: {target_pdf}")
    else:
        raise FileNotFoundError("No file uploaded.")

# DEFINE USER INPUT (The text to check)
user_draft_text = """
IN THE HIGH COURT OF MIRZAPUR (Criminal Appellate Jurisdiction)
Case Title: State of Madhya Pradesh vs. Rohit Sharma Case No.: CRA/1023/2024
The court observes: Since the date of the alleged crime does not exist in 2023...
(Note: Using the text provided in your prompt which contains incorrect facts but valid styling checking needs)
...
We guarantee 100% uptime for all our servers.
The data will be sent to admin@company.com.
"""

print(f"\nüöÄ STARTING ENGINE using PDF: {target_pdf}")

# Run Graph
final_state = app.invoke({
    "local_pdf_path": target_pdf,
    "user_content": user_draft_text,
    "errors": []
})

# Display Results
print("\n" + "="*50)
print("FINAL COMPLIANCE REPORT")
print("="*50)

if "compliance_report" in final_state:
    display(Markdown(final_state["compliance_report"]))
else:
    print("‚ùå No report generated.")

if final_state.get("errors"):
    print(f"\n‚ùå ERRORS: {final_state['errors']}")

‚ö†Ô∏è 'complex_compliance_rules.pdf' not found in Colab files.
Please upload your PDF now:


Saving 0bfb6765d414b79e8bb23d99715fabfe78a9bc78f65a2bdd01547e514feb20cc1765295413.pdf to 0bfb6765d414b79e8bb23d99715fabfe78a9bc78f65a2bdd01547e514feb20cc1765295413 (3).pdf
‚úÖ Using uploaded file: 0bfb6765d414b79e8bb23d99715fabfe78a9bc78f65a2bdd01547e514feb20cc1765295413 (3).pdf

üöÄ STARTING ENGINE using PDF: 0bfb6765d414b79e8bb23d99715fabfe78a9bc78f65a2bdd01547e514feb20cc1765295413 (3).pdf

--- 1. UPLOADING 0bfb6765d414b79e8bb23d99715fabfe78a9bc78f65a2bdd01547e514feb20cc1765295413 (3).pdf TO FILE SEARCH ---
‚è≥ Uploading & Indexing (this may take a moment)...
‚úÖ Store Ready: fileSearchStores/compliancestore1765301752-4fxqcubr9um3

--- 2. EXTRACTING RULES ---
‚úÖ Rules Extracted.

--- 3. VERIFYING COMPLIANCE ---

--- 4. CLEANUP ---
üßπ Deleted Store: fileSearchStores/compliancestore1765301752-4fxqcubr9um3

FINAL COMPLIANCE REPORT


--- COMPLIANCE REPORT ---

**PIIC Violations:** None
**GRPC Violations:** None
**Citation Violations:** None