In [None]:
pip install -U langchain langchain-core


In [5]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") \
    or getpass("Enter your OpenAI API key: ")

In [11]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.runnables import RunnableMap, RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

# --- R√©utiliser le m√™me dossier que pr√©c√©demment ---
CHROMA_DIR = "chroma_eu_laws"

# --- Charger la base existante ---
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma(persist_directory=CHROMA_DIR, embedding_function=embedding)

retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

In [15]:
# üß© D√©finition du prompt pour la RAG chain
prompt = ChatPromptTemplate.from_template("""
You are a legal expert specialized in European and GDPR law.

Use the following retrieved context to answer the question comprehensively and clearly.

Context:
{context}

Question:
{question}

Answer in a clear, structured format. Highlight key legal principles, relevant EU directives, and practical implications.
""")

In [16]:
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

rag_chain = (
    RunnableMap({
        "context": retriever | (lambda docs: "\n\n".join([d.page_content for d in docs])),
        "question": RunnablePassthrough()
    })
    | prompt
    | llm
    | StrOutputParser()
)


In [None]:
from langchain_openai import ChatOpenAI
from datetime import datetime
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
import markdown
import re

def generate_full_compliance_report(question: str, rag_answer: str):
    """
    üß† Full-Intelligence Compliance Report
    ‚Üí R√©sum√©, score, analyse, next steps
    ‚Üí PDF format with bold titles, spacing & readability
    """
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.4)

    # üß© Prompt optimis√©
    prompt = f"""
You are a legal compliance expert specialized in EU law and GDPR.

Using the analysis below, produce a well-structured compliance report with the following sections:
- **Executive Summary**
- **Compliance Score** (one mention, formatted like "75/100")
- **Detailed Analysis**
- **Specific, Actionable Next Steps**

Use clear Markdown formatting:
- Use '## ' for section titles
- Use bold (**...**) for key terms
- Add blank lines between sections and bullet points
- Avoid repeating the Compliance Score

Analysis:
\"\"\"{rag_answer}\"\"\"

Question: "{question}"
"""

    # üí¨ GPT-4o output
    response = llm.invoke(prompt)
    markdown_text = response.content.strip()

    # üßπ Nettoyage du double score √©ventuel sans supprimer le "/100"
    markdown_text = re.sub(
        r"(Compliance Score\s*[:\-]?\s*\d{1,3}\s*/\s*100)[\s\S]*?(Compliance Score\s*[:\-]?\s*\d{1,3}\s*/\s*100)?",
        r"\1",
        markdown_text,
        flags=re.IGNORECASE,
    )

    # üîÅ Conversion Markdown ‚Üí HTML
    html_text = markdown.markdown(markdown_text)

    # üìÑ Cr√©ation PDF
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"compliance_report_{timestamp}.pdf"

    styles = getSampleStyleSheet()
    normal = ParagraphStyle(
        "Normal",
        parent=styles["Normal"],
        fontName="Helvetica",
        fontSize=10.5,
        leading=15,
        spaceAfter=8,
    )
    title = ParagraphStyle(
        "Title",
        parent=styles["Title"],
        fontSize=16,
        leading=20,
        spaceAfter=12,
        textColor="#1a1a1a",
    )
    section = ParagraphStyle(
        "Section",
        parent=styles["Heading2"],
        fontSize=12,
        leading=16,
        spaceBefore=10,
        spaceAfter=6,
        textColor="#000000",
    )

    doc = SimpleDocTemplate(
        filename,
        pagesize=A4,
        leftMargin=60,
        rightMargin=60,
        topMargin=60,
        bottomMargin=50,
    )

    story = []

    # En-t√™te
    story.append(Paragraph("<b>Compliance Analysis Report</b>", title))
    story.append(Paragraph(f"<b>Date:</b> {datetime.now().strftime('%d %B %Y, %H:%M')}", normal))
    story.append(Paragraph(f"<b>Question:</b> {question}", normal))
    story.append(Spacer(1, 12))

    # ‚úÖ Ajout du contenu converti Markdown ‚Üí HTML
    for section_html in re.split(r"<h2.*?>(.*?)</h2>", html_text):
        if not section_html.strip():
            continue
        # Si c‚Äôest un titre de section
        if re.match(r'^[A-Z].*', section_html.strip()) and len(section_html.strip()) < 100:
            story.append(Paragraph(f"<b>{section_html.strip()}</b>", section))
        else:
            # Nettoyage des listes et ajout de sauts de ligne
            clean_html = (
                section_html
                .replace("</li>", "<br/><br/>")
                .replace("<ul>", "")
                .replace("</ul>", "")
                .replace("<p>", "")
                .replace("</p>", "<br/><br/>")
            )
            story.append(Paragraph(clean_html, normal))
            story.append(Spacer(1, 6))

    # Signature
    story.append(Spacer(1, 12))
    story.append(Paragraph("<i>Generated automatically by the Policy Checker (GenAI).</i>", normal))

    doc.build(story)
    print(f"‚úÖ Formatted compliance report saved as {filename}")
    return filename

In [None]:
question = ": I'm building an AI tool that could help to easily check if the project that somebody is doing is compliant with EU norms. What should I be aware of laws regarding the project I want to do?"
rag_answer = rag_chain.invoke(question)

generate_full_compliance_report(question, rag_answer)


‚úÖ Formatted compliance report saved as compliance_report_2025-11-03_17-43-21.pdf


'compliance_report_2025-11-03_17-43-21.pdf'