In [0]:
%pip install --quiet -U transformers==4.41.1 pypdf==4.1.0 langchain-text-splitters==0.2.0 databricks-vectorsearch mlflow tiktoken==0.7.0 torch==2.3.0 llama-index==0.10.43 markdownify cloudpickle==2.2.1

dbutils.library.restartPython() 

In [0]:
import mlflow
from mlflow.models import infer_signature
import pandas as pd
import json
import logging

# Set logging level to DEBUG
logging.basicConfig(level=logging.DEBUG)

# Set the experiment name in the workspace
experiment_name = "/Users/jose.iram@avanade.com/prompts/invoices_processing_prompts"

mlflow.set_experiment(experiment_name)

# Define your prompts
prompts = {
    "document_classification": "Classify this document into one of the following categories: invoice, receipt, ID, contract.",
    "information_extraction": "Extract the following fields from this document: date, total amount, vendor name, transaction ID."
}

# Start MLflow run
with mlflow.start_run(run_name="invoices_processing_prompts"):
    # Register prompts as artifacts
    with open("invoices_prompts.json", "w") as f:
        json.dump(prompts, f)
    
    mlflow.log_artifact("invoices_prompts.json", "invoices_prompts")
    
    # Log additional metadata
    mlflow.log_params({
        "prompt_version": "2.0",
        "use_case": "invoices_processing",
        "language": "pt-br,en",
        "mlflow_version": mlflow.__version__  # Log the MLflow version
    })
    
    # Define the conda environment
    conda_env = {
        'name': 'invoices_processing_env',
        'channels': ['defaults'],
        'dependencies': [
            'python=3.8.5',
            'pip',
            {
                'pip': [
                    'mlflow==2.22.0'
                ]
            }
        ]
    }
    
    # Log the prompts model
    mlflow.pyfunc.log_model(
        artifact_path="invoices_prompt_model",
        loader_module="mlflow.pyfunc",
        data_path="invoices_prompts.json",
        code_path=None,
        conda_env=conda_env,
        python_model=None,
        artifacts=None,
        signature=infer_signature(pd.DataFrame({"context": ["example"]}), pd.DataFrame({"prompt": ["example"]})),
    )

# Backup

In [0]:
import mlflow
from mlflow.models import infer_signature
import pandas as pd
import json

# Iniciar um experimento MLflow
folderVolumePath = f"/Users/jose.iram@avanade.com/Databricks/llm-rag-pdf-based"

mlflow.create_experiment(folderVolumePath)

mlflow.set_experiment(experiment_name=folderVolumePath)

# Definir seus prompts
prompts = {
    "document_classification": "Classify this document into one of the following categories: invoice, receipt, ID, contract.",
    "information_extraction": "Extract the following fields from this document: date, total amount, vendor name, transaction ID."
}

# Iniciar o tracking do MLflow
with mlflow.start_run(run_name="invoices_processing_prompts"):
    # Registrar os prompts como artefatos
    with open("invoices_prompts.json", "w") as f:
        json.dump(prompts, f)
    
    mlflow.log_artifact("invoices_prompts.json", "invoices_prompts")
    
    # Registrar metadados adicionais
    mlflow.log_params({
        "prompt_version": "1.0",
        "use_case": "invoices_processing",
        "language": "pt-br,en"
    })
    
    # Registrar o modelo de prompts
    mlflow.pyfunc.log_model(
        artifact_path="invoices_prompt_model",
        python_model=None,  # Você pode criar uma classe personalizada se necessário
        artifacts={"invoices_prompts": "invoices_prompts.json"},
        signature=infer_signature(pd.DataFrame({"context": ["exemplo"]}), pd.DataFrame({"prompt": ["exemplo"]}))
    )

# Registrar o modelo no Model Registry
model_name = "invoices_processing_prompts"
model_version = mlflow.register_model(f"runs:/{mlflow.active_run().info.run_id}/invoices_prompt_model", model_name)