# Utilisation d'un LLM et suivi dans MLflow

## Téléchargement d'un fichier gguf pour LlamaCpp

In [1]:
import os
import requests
import sys

# répertoire models
if os.path.isdir('models')==False:
    os.mkdir('models')

# téléchargement du fichier GGUF
if os.path.exists('models/llama.gguf')==False:
    url = 'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf?download=true'
    r = requests.get(url)
    open('models/llama.gguf', 'wb').write(r.content)
    print('downloaded')

## Création d'un Experiment dans MLflow pour suivre les différentes exécutions

In [2]:
import mlflow
mlflow.set_tracking_uri("http://mlflow-serveur:8080/")
mlflow.set_experiment("my-experiment")

<Experiment: artifact_location='mlflow-artifacts:/995425785044879997', creation_time=1725891041882, experiment_id='995425785044879997', last_update_time=1725891041882, lifecycle_stage='active', name='my-experiment', tags={}>

## Mise en place du LLM

Lors de la première exécution (sans la variable api_key) LlamaCpp est utilisé

Puis OpenAI est utilisé (avec la clé api_key)

In [7]:
# This example demonstrates defining a model directly from code.
# This feature allows for defining model logic within a python script, module, or notebook that is stored
# directly as serialized code, as opposed to object serialization that would otherwise occur when saving
# or logging a model object.
# This script defines the model's logic and specifies which class within the file contains the model code.
# The companion example to this, chain_as_code_driver.py, is the driver code that performs the  logging and
# loading of this model definition.

from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain_openai import OpenAI
from langchain_community.llms import LlamaCpp

import mlflow

mlflow.langchain.autolog()


# Return the string contents of the most recent message from the user
def extract_user_query_string(chat_messages_array):
    return chat_messages_array[-1]["content"]


# Return the chat history, which is is everything before the last question
def extract_chat_history(chat_messages_array):
    return chat_messages_array[:-1]


prompt = PromptTemplate(
    template="You are a hello world bot. Respond with a reply to the user's question that is fun and interesting to the user.  User's question: {question}",
    input_variables=["question"],
)

if 'api_key' in vars():
    model = OpenAI(temperature=0.95)
else:
    model = LlamaCpp(
      model_path="models/llama.gguf",
      n_ctx = 1024,
      n_batch= 1024,
      max_tokens = 350,
      temperature = 0.1,
      n_gpu_layers = 20,
      verbose = True,
    )

chain = (
    {
        "question": itemgetter("messages") | RunnableLambda(extract_user_query_string),
        "chat_history": itemgetter("messages") | RunnableLambda(extract_chat_history),
    }
    | prompt
    | model
    | StrOutputParser()
)


## Question et remontée dans MLflow

In [8]:
question = {
    "messages": [
        {
            "role": "user",
            "content": "what is rag?",
        },
    ]
}
print(chain.invoke(question))

# IMPORTANT: The model code needs to call `mlflow.models.set_model()` to set the model,
# which will be loaded back using `mlflow.langchain.load_model` for inference.
mlflow.models.set_model(model=chain)


Rag is actually a type of fabric made from scrap cloth pieces. It's been around for centuries and has been used for everything from cleaning to fashion. But did you know that rag rugs were once so valuable that they were passed down as family heirlooms? Now that's one fancy rag!


## Saisie de la clé pour OpenAI

In [5]:
import os
from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass()

 ········


In [6]:
import os
api_key = os.getenv('OPENAI_API_KEY')

il est maintenant possible de relancer la mise en place du LLM puis la question