1. Setup Neo4j Aura (vectoriel)
2. Chargement documents GreenPower via Gradio
3. Chunking + Embeddings (sentence-transformers)
4. Stockage Neo4j vector index
5. RAG pipeline : query ‚Üí retrieval ‚Üí Mistral
6. Interface Gradio interactive

In [None]:
# Cell 0 - Test imports AVANT de lancer Gradio
try:
    from langchain.schema import HumanMessage
    print("‚úì langchain.schema OK")
except:
    from langchain_core.messages import HumanMessage
    print("‚úì langchain_core.messages OK")
!pip install langchain-mistralai langchain qdrant-client gradio sentence-transformers pypdf
!pip install -r requirements.txt --force-reinstall
!pip install dotenv-python

In [19]:
import os
from pathlib import Path
import gradio as gr
from langchain_mistralai import ChatMistralAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Fix import
from langchain_community.embeddings import HuggingFaceEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
import uuid
from typing import List
import pypdf
import docx

In [20]:
env_path = Path('.env')
if env_path.exists():
    with open(env_path) as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith('#') and '=' in line:
                key, value = line.split('=', 1)
                os.environ[key.strip()] = value.strip()

MISTRAL_API_KEY = os.getenv('MISTRAL_API_KEY')
QDRANT_URL = os.getenv('QDRANT_URL', 'URL')  # Use :memory: for local or cloud URL
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY', None)

In [21]:
# Initialize components
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = ChatMistralAI(model='mistral-small-latest', mistral_api_key=MISTRAL_API_KEY, temperature=0.7)

qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
COLLECTION_NAME = "greenpower_docs"

  qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)


In [22]:
# Create collection
try:
    qdrant_client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=384, distance=Distance.COSINE)
    )
    print(f"‚úì Collection '{COLLECTION_NAME}' cr√©√©e")
except:
    print(f"‚úì Collection '{COLLECTION_NAME}' existe d√©j√†")

# Cell 5
def extract_text(file_path: str) -> str:
    ext = Path(file_path).suffix.lower()
    if ext == '.pdf':
        with open(file_path, 'rb') as f:
            reader = PyPDF2.PdfReader(f)
            return '\n'.join([page.extract_text() for page in reader.pages])
    elif ext == '.docx':
        doc = docx.Document(file_path)
        return '\n'.join([p.text for p in doc.paragraphs])
    elif ext == '.txt':
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    return ""

‚úì Collection 'greenpower_docs' existe d√©j√†


In [None]:
def upload_documents(files):
    if not files:
        return "‚ùå Aucun fichier"
    
    try:
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        uploaded_count = 0
        
        for file in files:
            # FIX: Gradio renvoie un objet, pas juste un nom
            file_path = file.name if hasattr(file, 'name') else file
            text = extract_text(file_path)
            
            if not text:
                continue
                
            chunks = text_splitter.split_text(text)
            
            points = []
            for chunk in chunks:
                vector = embeddings.embed_query(chunk)
                point = PointStruct(
                    id=str(uuid.uuid4()),
                    vector=vector,
                    payload={"text": chunk, "source": Path(file_path).name}
                )
                points.append(point)
            
            qdrant_client.upsert(collection_name=COLLECTION_NAME, points=points)
            uploaded_count += len(chunks)
        
        return f"‚úì {uploaded_count} chunks upload√©s depuis {len(files)} fichiers dans Qdrant"
    
    except Exception as e:
        return f"‚ùå Erreur: {str(e)}"

# Cell 7 - Fix search_and_answer
def search_and_answer(question: str, top_k: int = 3) -> str:
    if not question or question.strip() == "":
        return "‚ùå Veuillez poser une question"
    
    try:
        query_vector = embeddings.embed_query(question)
        
        results = qdrant_client.search(
            collection_name=COLLECTION_NAME,
            query_vector=query_vector,
            limit=top_k
        )
        
        if not results:
            return "‚ùå Aucun document trouv√© dans la base"
        
        context = "\n\n".join([hit.payload["text"] for hit in results])
        
        prompt = f"""Contexte GreenPower:
{context}

Question: {question}

R√©ponds en te basant uniquement sur le contexte fourni."""
        
        # FIX import HumanMessage
        try:
            from langchain.schema import HumanMessage
        except:
            from langchain_core.messages import HumanMessage
            
        response = llm.invoke([HumanMessage(content=prompt)]).content
        
        sources = "\n".join([f"- {hit.payload['source']} (score: {hit.score:.2f})" for hit in results])
        
        return f"{response}\n\n**Sources:**\n{sources}"
    
    except Exception as e:
        return f"‚ùå Erreur: {str(e)}"

# Cell 8 - Interface Gradio FIX√âE
with gr.Blocks(title="GreenPower RAG") as demo:
    gr.Markdown("# üå± GreenPower RAG System")
    
    with gr.Tab("üì§ Upload Documents"):
        file_input = gr.File(
            file_count="multiple", 
            label="Documents GreenPower (PDF, DOCX, TXT)",
            file_types=[".pdf", ".docx", ".txt"]
        )
        upload_btn = gr.Button("Upload", variant="primary")
        upload_output = gr.Textbox(label="Status", lines=3)
        
        upload_btn.click(
            fn=upload_documents, 
            inputs=[file_input], 
            outputs=[upload_output]
        )
    
    with gr.Tab("üí¨ Ask Questions"):
        question_input = gr.Textbox(
            label="Question", 
            placeholder="Ex: Quel est le prix du GreenPower Max?",
            lines=2
        )
        top_k_slider = gr.Slider(
            minimum=1, 
            maximum=10, 
            value=3, 
            step=1, 
            label="Nombre de chunks √† r√©cup√©rer"
        )
        ask_btn = gr.Button("Poser la question", variant="primary")
        answer_output = gr.Markdown(label="R√©ponse")
        
        ask_btn.click(
            fn=search_and_answer, 
            inputs=[question_input, top_k_slider], 
            outputs=[answer_output]
        )

demo.launch(server_name="127.0.0.1", server_port=7860, share=False, debug=Tru99 
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    uploaded_count = 0
    
    for file in files:
        text = extract_text(file.name)
        chunks = text_splitter.split_text(text)
        
        points = []
        for chunk in chunks:
            vector = embeddings.embed_query(chunk)
            point = PointStruct(
                id=str(uuid.uuid4()),
                vector=vector,
                payload={"text": chunk, "source": Path(file.name).name}
            )
            points.append(point)
        
        qdrant_client.upsert(collection_name=COLLECTION_NAME, points=points)
        uploaded_count += len(chunks)
    
    return f"‚úì {uploaded_count} chunks upload√©s depuis {len(files)} fichiers"


In [24]:
# Cell 7
def search_and_answer(question: str, top_k: int = 3) -> str:
    query_vector = embeddings.embed_query(question)
    
    results = qdrant_client.search(
        collection_name=COLLECTION_NAME,
        query_vector=query_vector,
        limit=top_k
    )
    
    if not results:
        return "‚ùå Aucun document trouv√©"
    
    context = "\n\n".join([hit.payload["text"] for hit in results])
    
    prompt = f"""Contexte GreenPower:
{context}

Question: {question}

R√©ponds en te basant uniquement sur le contexte fourni."""
    
    from langchain.schema import HumanMessage
    response = llm.invoke([HumanMessage(content=prompt)]).content
    
    sources = "\n".join([f"- {hit.payload['source']} (score: {hit.score:.2f})" for hit in results])
    
    return f"{response}\n\n**Sources:**\n{sources}"

In [26]:
# Cell 8
# Gradio Interface
with gr.Blocks(title="GreenPower RAG") as demo:
    gr.Markdown("# üå± GreenPower RAG System")
    
    with gr.Tab("üì§ Upload Documents"):
        file_input = gr.File(file_count="multiple", label="Documents GreenPower (PDF, DOCX, TXT)")
        upload_btn = gr.Button("Upload")
        upload_output = gr.Textbox(label="Status")
        upload_btn.click(upload_documents, inputs=file_input, outputs=upload_output)
    
    with gr.Tab("üí¨ Ask Questions"):
        question_input = gr.Textbox(label="Question", placeholder="Posez une question sur GreenPower...")
        top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Nombre de chunks √† r√©cup√©rer")
        ask_btn = gr.Button("Ask")
        answer_output = gr.Markdown(label="R√©ponse")
        ask_btn.click(search_and_answer, inputs=[question_input, top_k_slider], outputs=answer_output)

demo.launch(server_name="127.0.0.1", server_port=7990, share=False)

* Running on local URL:  http://127.0.0.1:7990
* To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\agaragar\Documents\00 CLIENTS\0000 VALLUP\00 MENSAFLOW\00 CNAM\UseCase\Jupyter\.venv\Lib\site-packages\gradio\queueing.py", line 766, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\agaragar\Documents\00 CLIENTS\0000 VALLUP\00 MENSAFLOW\00 CNAM\UseCase\Jupyter\.venv\Lib\site-packages\gradio\route_utils.py", line 355, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\agaragar\Documents\00 CLIENTS\0000 VALLUP\00 MENSAFLOW\00 CNAM\UseCase\Jupyter\.venv\Lib\site-packages\gradio\blocks.py", line 2152, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\agaragar\Documents\00 CLIENTS\0000 VALLUP\00 MENSAFLOW\00 CNAM\UseCase\Jupyter\.venv\Lib\site-packages\gradio\blocks.py", line 1629, in call_function
    predicti