<a href="https://colab.research.google.com/github/paulusieto/IA-Mates/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rf /content/IA-Mates
!git clone https://github.com/paulusieto/IA-Mates.git

Cloning into 'IA-Mates'...
remote: Enumerating objects: 504, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 504 (delta 2), reused 0 (delta 0), pack-reused 496 (from 2)[K
Receiving objects: 100% (504/504), 3.68 MiB | 8.08 MiB/s, done.
Resolving deltas: 100% (216/216), done.


In [None]:
# Installs
!pip install langchain langchain-community chromadb sentence-transformers unstructured flask-ngrok pyngrok
!pip install codecarbon


# Imports
from flask import Flask, render_template, request, jsonify, redirect, url_for
from pyngrok import ngrok
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import json, os, re, requests, random

# Codecarbon
from codecarbon import EmissionsTracker
tracker = EmissionsTracker(project_name="IA-Mates Chatbot")
tracker.start()


# Config
MISTRAL_API_KEY = "JvoQDfULLGFd3SqjP3tjb1sgoYeTg8A0"
NGROK_AUTH_TOKEN = "2ulTGZQ4tsSeoZeYJez8OubwAyi_88nULZ2Bz3RqPTamuK1Gf"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)



In [None]:
# Load questions
questions = []
qcm_by_source = {}
open_by_source = {}
tf_by_source = {}

with open("/content/IA-Mates/src/merged_dataset.jsonl", encoding="utf-8") as f:
    for line in f:
        q = json.loads(line)
        source = q.get("source", "Unknown")
        qtype = q.get("type", "qcm")
        questions.append(q)
        if qtype == "qcm":
            qcm_by_source.setdefault(source, []).append(q)
        elif qtype == "open":
            open_by_source.setdefault(source, []).append(q)
        elif qtype == "qcm_tf":
            tf_by_source.setdefault(source, []).append(q)

# Load RAG index
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(
    persist_directory="/content/IA-Mates/src/laws/chroma_laws_index.json",
    embedding_function=embedding_model,
    collection_name="law_chunks"
)

# RAG helper
def rag_answer(question_text, user_input):
    context = "\n\n".join(doc.page_content for doc in db.similarity_search(question_text, k=4))
    prompt = f"""
You are a European patent law expert. Below is a student's written answer to an open exam question. You are also provided with relevant legal context from official sources (EPC, PCT, Guidelines, Case Law). Based only on that, assess if the answer is correct and explain why.

Question:
{question_text}

User's Answer:
{user_input}

Relevant Legal Context:
{context}

Please reply with a clear evaluation and explanation.
"""
    headers = {
        "Authorization": f"Bearer {MISTRAL_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "mistral-tiny",
        "messages": [
            {"role": "system", "content": "You are a patent law examiner."},
            {"role": "user", "content": prompt.strip()}
        ]
    }
    try:
        res = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=payload)
        return res.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print("Mistral error:", e)
        return "The system was unable to evaluate this answer using the provided legal context."

def rag_chat(question_text):
    docs = db.similarity_search(question_text, k=4)
    context = "\n\n".join(doc.page_content for doc in docs)

    prompt = f"""
You are a helpful AI assistant specialized in European patent law. Use the context below to answer the user’s question.
If the answer is not clearly found in the context, reply: "I’m not sure based on the available legal sources."

Context:
{context}

User's question:
{question_text}

Answer:
""".strip()

    headers = {
        "Authorization": f"Bearer {MISTRAL_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "mistral-tiny",
        "messages": [
            {"role": "system", "content": "You are a legal assistant helping users with European patent law."},
            {"role": "user", "content": prompt}
        ]
    }

    try:
        res = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=payload)
        return res.json()["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print("Mistral error:", e)
        return "The system could not answer your question using the available legal documents."



In [None]:
# Flask
app = Flask(__name__, static_folder="/content/IA-Mates/src/static", template_folder="/content/IA-Mates/src/templates")

@app.route("/")
def home():
    return render_template("chat.html", questions=questions)

@app.route("/menu/<source>/<qtype>")
def open_question_list(source, qtype):
    def match_sources(source_key, data_by_source):
        source_key = source_key.lower()
        if source_key == "epac":
            return [
                q for s, qlist in data_by_source.items()
                if s.upper().startswith("EPAC_")
                for q in qlist
            ]
        elif source_key == "pre-exam":
            return [
                q for s, qlist in data_by_source.items()
                if s.upper().startswith("PRE")
                for q in qlist
            ]
        else:
            return data_by_source.get(source_key, [])

    if qtype == "qcm":
        filtered = match_sources(source, qcm_by_source)
    elif qtype == "open":
        filtered = match_sources(source, open_by_source)
    elif qtype == "qcm_tf":
        filtered = match_sources(source, tf_by_source)
    else:
        return "Invalid question type", 400

    return render_template("questions.html", questions=filtered, qtype=qtype, source=source)


@app.route("/api/ask", methods=["POST"])
def ask():
    data = request.json
    question_text = data.get("question", "")
    answer = rag_chat(question_text)
    return jsonify({"answer": answer})

@app.route("/api/check", methods=["POST"])
def check_answer():
    import re
    data = request.json
    chunk_id = data.get("chunk_id")
    user_input = data.get("user_answer", "").strip().upper()

    # Find the question
    question = next((q for q in questions if q["chunk_id"] == chunk_id), None)
    if not question:
        return jsonify({"error": "Question not found"}), 404

    explanation = question.get("answer", "").strip()

    # Try to extract correct letter from explanation
    match = re.search(r"The correct answer is\s+([A-D1-9])", explanation, re.IGNORECASE)
    if match:
        correct_letter = match.group(1).upper()
        is_correct = user_input == correct_letter

        # Clean old verdict from explanation
        cleaned_expl = re.sub(
            r"^(The answer is (correct|incorrect)\.)?\s*(The correct answer is\s+[A-D1-9]\.?)?\s*[A-D1-9]\.\s*",
            "",
            explanation,
            flags=re.IGNORECASE
        ).strip()


        verdict = "The answer is correct." if is_correct else "The answer is incorrect."
        final_expl = f"{verdict} The correct answer is {correct_letter}. {cleaned_expl}"

        return jsonify({
            "correct": is_correct,
            "correct_letter": correct_letter,
            "explanation": final_expl
        })

    # Fallback to RAG if no correct letter can be extracted
    print("⚠️ No correct letter found in explanation, using RAG fallback.")

    query_text = question["question"]
    context_chunks = db.similarity_search(query_text, k=3)
    context_text = "\n\n".join(chunk.page_content for chunk in context_chunks)

    prompt = f"""
You are an expert in European patent law.

A student answered the following multiple-choice question:
---
Question:
{query_text}

Student's Answer: {user_input}

Here are excerpts from official legal sources that may help you decide:
---
{context_text}

Your task:
1. Check whether the student's answer is correct.
2. Clearly state in your explanation:
   - If the answer is correct or incorrect
   - What the correct answer is (A, B, C, D, etc.)
   - A brief legal justification

⚠️ The first sentence of your explanation **must be**:
"The answer is correct." or "The answer is incorrect."

⚠️ Then provide a short explanation.

⚠️ Reply only in this strict JSON format:
{{
  "correct": true/false,
  "correct_letter": "A",
  "explanation": "The answer is incorrect. The correct answer is A. ... [explanation]"
}}
""".strip()

    headers = {
        "Authorization": f"Bearer {MISTRAL_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "mistral-tiny",
        "messages": [
            {"role": "system", "content": "You are a legal expert helping with patent law questions."},
            {"role": "user", "content": prompt}
        ]
    }

    try:
        res = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=payload)
        text = res.json()["choices"][0]["message"]["content"]

        # Try to extract clean JSON response
        match = re.search(r"\{.*\}", text, re.DOTALL)
        rag_response = json.loads(match.group(0)) if match else {
            "correct": None,
            "correct_letter": "?",
            "explanation": text.strip()
        }

        return jsonify(rag_response)

    except Exception as e:
        print("RAG fallback error:", e)
        return jsonify({
            "correct": None,
            "correct_letter": "?",
            "explanation": "Unable to verify answer using legal context."
        })


@app.route("/api/open", methods=["POST"])
def open_check():
    data = request.json
    chunk_id = data.get("chunk_id")
    user_answer = data.get("user_answer", "")
    question = next((q for q in questions if q["chunk_id"] == chunk_id), None)
    if not question:
        return jsonify({"error": "Not found"}), 404
    ai_expl = rag_answer(question["question"], user_answer)
    return jsonify({"explanation": ai_expl})

@app.route("/api/clarify", methods=["POST"])
def clarify_question():
    data = request.json
    raw = data.get("question", "")

    prompt = f"""
You are an assistant that structures MCQ questions **for display**, without modifying or rewriting the text.

From the raw question below, return:

- "question": everything BEFORE the first option (A., B., 1., 2., etc.). Include all context and legal background.
- "choices": a list of all the options (e.g. A. ..., B. ..., etc.)

⚠️ Never shorten, translate, or rewrite any part.
⚠️ Keep all context, dates, names, articles, legal expressions intact.

Raw input:
{raw}

Expected JSON:
{{ "question": "...", "choices": ["A. ...", "B. ...", ...] }}
"""

    headers = {
        "Authorization": f"Bearer {MISTRAL_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "mistral-tiny",
        "messages": [
            {"role": "system", "content": "Tu es un assistant qui structure des QCM pour affichage web sans modifier leur contenu."},
            {"role": "user", "content": prompt.strip()}
        ]
    }
    try:
        res = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=payload)
        msg = res.json()["choices"][0]["message"]["content"]
        match = re.search(r"\{.*\}", msg, re.DOTALL)
        return jsonify(json.loads(match.group(0)) if match else {"question": raw, "choices": []})
    except Exception as e:
        print(" clarify_question error:", e)
        return jsonify({"question": raw, "choices": []})

@app.route("/api/check_tf", methods=["POST"])
def check_tf():
    data = request.json
    chunk_id = data.get("chunk_id")
    user_answer = data.get("user_answer", {})
    question = next((q for q in questions if q["chunk_id"] == chunk_id), None)
    if not question:
        return jsonify({"error": "Not found"}), 404

    correct = question.get("answer")
    explanation = question.get("explanation")

    # RAG fallback if no structured answer provided
    if not correct or not isinstance(correct, dict):
        query_text = question["question"]
        context_chunks = db.similarity_search(query_text, k=3)
        context_text = "\n\n".join(chunk.page_content for chunk in context_chunks)

        prompt = f"""
You are an expert in European patent law.

A student answered the following true/false multi-part question:
---
{query_text}

Their answers were:
{json.dumps(user_answer, indent=2)}

Here are excerpts from official legal sources:
---
{context_text}

Your task:
1. For each statement, decide if the student's answer is correct.
2. Return a JSON like:
{{
  "correct_answers": {{ "1": "True", "2": "False", ... }},
  "explanation": "Start with 'The answer is correct/incorrect.' then explain briefly."
}}
""".strip()

        headers = {
            "Authorization": f"Bearer {MISTRAL_API_KEY}",
            "Content-Type": "application/json"
        }

        payload = {
            "model": "mistral-tiny",
            "messages": [
                {"role": "system", "content": "You are a legal assistant checking exam answers for true/false sub-statements."},
                {"role": "user", "content": prompt}
            ]
        }

        try:
            res = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=payload)
            msg = res.json()["choices"][0]["message"]["content"]
            match = re.search(r"\{.*\}", msg, re.DOTALL)
            rag_data = json.loads(match.group(0)) if match else {
                "correct_answers": {},
                "explanation": "Could not extract AI feedback."
            }
            return jsonify(rag_data)
        except Exception as e:
            print("RAG fallback error:", e)
            return jsonify({
                "correct_answers": {},
                "explanation": "Unable to verify using AI."
            })

    return jsonify({
        "correct_answers": correct,
        "explanation": explanation or "Answer feedback complete."
    })




@app.route("/api/clarify_tf", methods=["POST"])
def clarify_tf_question():
    data = request.json
    raw = data.get("question", "")

    prompt = f"""
You are a structuring assistant for legal multiple-true-false MCQs.

From the input below, extract:

- "main": everything before the list of sub-questions.
- "sub": a list of the sub-questions (10.1, 10.2, etc.), one per line.

Example input:
{raw}

Respond in JSON format:
{{ "main": "...", "sub": ["...", "...", "...", "..."] }}
"""

    headers = {
        "Authorization": f"Bearer {MISTRAL_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "mistral-tiny",
        "messages": [
            {"role": "system", "content": "You extract legal true/false sub-statements from questions."},
            {"role": "user", "content": prompt}
        ]
    }
    try:
        res = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=payload)
        msg = res.json()["choices"][0]["message"]["content"]
        match = re.search(r"\{.*\}", msg, re.DOTALL)
        return jsonify(json.loads(match.group(0)) if match else {"main": raw, "sub": []})
    except Exception as e:
        print(" clarify_tf error:", e)
        return jsonify({"main": raw, "sub": []})

@app.route("/api/questions")
def get_questions():
    return jsonify(questions)



In [None]:
# Running
port = 5000
public_url = ngrok.connect(port)
print("🔗 Access your app:", public_url)
app.run()
tracker.stop()

🔗 Access your app: NgrokTunnel: "https://dcff-34-106-214-52.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:27] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:28] "GET /static/style.css HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:28] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:31] "GET /menu/epac/qcm HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:32] "[36mGET /static/style.css HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:33] "POST /api/clarify HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:39] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:39] "[36mGET /static/style.css HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:42] "GET /menu/pre-exam/qcm_tf HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Mar/2025 02:13:42] "[36mGET /static/style.css HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 

0.0006935539637099686