# üß™ Test LlamaStack dans OpenShift AI

Ce notebook teste la fonctionnalit√© LlamaStack avec le mod√®le Llama-3.2-1B-Instruct d√©ploy√© sur OpenShift AI.

## üìã Pr√©requis
- Namespace: `llama-instruct-32-1b-demo`
- LlamaStackDistribution: `lsd-llama-32-1b-instruct`
- InferenceService: `llama-32-1b-instruct`

## üîó Services utilis√©s
- `lsd-llama-32-1b-instruct-service:8321` (LlamaStack)
- `llama-32-1b-instruct-predictor:80` (vLLM)

In [None]:
# Installation des d√©pendances
!pip install llama-stack-client fire requests

In [None]:
# Import des librairies
import requests
import json
from llama_stack_client import Client, Agent, AgentEventLogger
import uuid
import os

print("‚úÖ Librairies import√©es")

## üîç Test 1: V√©rification des services

In [None]:
# Test de connexion √† LlamaStack
llamastack_url = "http://lsd-llama-32-1b-instruct-service:8321"

print(f"üîó Test de connexion √† LlamaStack: {llamastack_url}")

try:
    response = requests.get(f"{llamastack_url}/v1/models", timeout=10)
    print(f"‚úÖ Status: {response.status_code}")
    
    if response.status_code == 200:
        models = response.json()
        print("üìã Mod√®les disponibles:")
        for model in models.get('data', []):
            print(f"  - {model.get('identifier', 'N/A')} ({model.get('model_type', 'N/A')})")
    else:
        print(f"‚ùå Erreur: {response.text}")
        
except Exception as e:
    print(f"‚ùå Erreur de connexion: {e}")

In [None]:
# Test de connexion √† vLLM
vllm_url = "http://llama-32-1b-instruct-predictor:80"

print(f"üîó Test de connexion √† vLLM: {vllm_url}")

try:
    response = requests.get(f"{vllm_url}/v1/models", timeout=10)
    print(f"‚úÖ Status: {response.status_code}")
    
    if response.status_code == 200:
        models = response.json()
        print("üìã Mod√®les vLLM disponibles:")
        for model in models.get('data', []):
            print(f"  - {model.get('id', 'N/A')}")
    else:
        print(f"‚ùå Erreur: {response.text}")
        
except Exception as e:
    print(f"‚ùå Erreur de connexion: {e}")

## ü§ñ Test 2: Client LlamaStack

In [None]:
# Connexion au client LlamaStack
try:
    client = Client(base_url=llamastack_url)
    print("‚úÖ Client LlamaStack connect√©")
    
    # Lister les mod√®les
    models = client.models.list()
    print(f"üìã {len(models)} mod√®les trouv√©s:")
    for model in models:
        print(f"  - {model.identifier} ({model.model_type})")
        
except Exception as e:
    print(f"‚ùå Erreur client LlamaStack: {e}")

In [None]:
# V√©rifier les bases vectorielles
try:
    vector_dbs = client.vector_dbs.list()
    print(f"üóÑÔ∏è {len(vector_dbs)} bases vectorielles trouv√©es:")
    for db in vector_dbs:
        print(f"  - {db.identifier}")
        
    if not vector_dbs:
        print("‚ÑπÔ∏è Aucune base vectorielle trouv√©e (normal si pas encore d'ingestion)")
        
except Exception as e:
    print(f"‚ùå Erreur bases vectorielles: {e}")

## üí¨ Test 3: Agent LlamaStack

In [None]:
# Cr√©er un agent LlamaStack
try:
    # Trouver le mod√®le LLM
    llm_model = None
    for model in models:
        if model.model_type == "llm":
            llm_model = model
            break
    
    if llm_model:
        print(f"ü§ñ Mod√®le LLM trouv√©: {llm_model.identifier}")
        
        # Cr√©er un agent simple (sans RAG pour l'instant)
        agent = Agent(
            client,
            model=llm_model.identifier,
            instructions="Tu es un assistant IA utile et amical."
        )
        print("‚úÖ Agent LlamaStack cr√©√©")
        
        # Test simple
        session_id = agent.create_session(session_name=f"test_session_{uuid.uuid4().hex[:8]}")
        print(f"üìù Session cr√©√©e: {session_id}")
        
        # Question simple
        response = agent.create_turn(
            messages=[{"role": "user", "content": "Bonjour, comment allez-vous ?"}],
            session_id=session_id,
            stream=True
        )
        
        print("üí¨ R√©ponse:")
        for log in AgentEventLogger().log(response):
            if hasattr(log, 'content') and log.content:
                print(log.content, end='', flush=True)
        print()
        
    else:
        print("‚ùå Aucun mod√®le LLM trouv√©")
        
except Exception as e:
    print(f"‚ùå Erreur agent: {e}")

## üîß Test 4: Test direct vLLM

In [None]:
# Test direct de l'API vLLM
print("üîß Test direct de l'API vLLM")

try:
    # Test de chat completion
    chat_payload = {
        "model": "llama-32-1b-instruct",
        "messages": [
            {"role": "user", "content": "Explique-moi ce qu'est l'intelligence artificielle en 2 phrases."}
        ],
        "max_tokens": 150,
        "temperature": 0.7
    }
    
    response = requests.post(
        f"{vllm_url}/v1/chat/completions",
        json=chat_payload,
        timeout=30
    )
    
    if response.status_code == 200:
        result = response.json()
        print("‚úÖ R√©ponse vLLM:")
        print(result['choices'][0]['message']['content'])
    else:
        print(f"‚ùå Erreur vLLM: {response.status_code}")
        print(response.text)
        
except Exception as e:
    print(f"‚ùå Erreur test vLLM: {e}")

## üìä R√©sum√© des tests

In [None]:
print("üéâ Tests termin√©s !")
print("\nüìã Prochaines √©tapes:")
print("1. Si les tests passent, d√©ployer la pipeline d'ingestion RAG")
print("2. Tester la fonctionnalit√© RAG avec des documents")
print("3. Utiliser le use case assurance")
print("\nüîó Services test√©s:")
print(f"  - LlamaStack: {llamastack_url}")
print(f"  - vLLM: {vllm_url}")
print("\nüìö Documentation:")
print("  - README RAG: llamastack/rag/README.md")
print("  - Scripts de d√©ploiement: deploy-rag.sh, deploy-assurance.sh")