# Rag Deployment Development

In [1]:
from model_library.cards.architectures import LOCAL_AI_ARCHITECTURE
from model_library.cards.models import LOCAL_AI_MODEL_CARD
from model_library.cards.deployments import LOCAL_AI_DEPLOYMENT_CARD
from model_library.cards.model_deployment_templates import LOCAL_AI_DEPLOYMENT_TEMPLATE_CARD
from model_library.models import *   
from model_library import ARCHITECTURES, MODELS, DEPLOYMENTS, MODEL_DEPLOYMENT_TEMPLATES





In [2]:
user = UserInformation(
    id="2",
    API_key= "XXXX-XXXX-XXXX-XXXX",
    namespace = "adam",
)



In [3]:
from uuid import uuid4

# ArchitectureCard for the deployment setup
CHAT_UI_ARCHITECTURE_CARD = ArchitectureCard(
    id=str(uuid4()),
    deployment_yaml="path/to/chat_ui_app_deployment.yaml",
    name="Chat UI Deployment",
    description="Deployment architecture for the Chat UI application.",
    tags={},
    health_endpoint="/health"
)

# ModelCard for environment configuration
CHAT_UI_MODEL_CARD = ModelCard(
    id=str(uuid4()),
    architecture=CHAT_UI_ARCHITECTURE_CARD,
    params={}
)

# DeploymentCard for resource and volume specifications
CHAT_UI_DEPLOYMENT_CARD = DeploymentCard(
    id=str(uuid4()),
    params={
        "cpu_limits": "2",
        "memory_limits": "4Gi",
        "cpu_requests": "2",
        "memory_requests": "4Gi",
        "replicas": 1,
    }
)

# ModelDeploymentTemplateCard for overall deployment template
CHAT_UI_DEPLOYMENT_TEMPLATE_CARD = ModelDeploymentTemplateCard(
    id=str(uuid4()),
    name="Chat UI App Deployment",
    description="Deployment template for Chat UI application with Python 3.11-slim container.",
    model_card=CHAT_UI_MODEL_CARD,
    deployment_card=CHAT_UI_DEPLOYMENT_CARD,
    benchmarks=[],
    params={
        "deployment_name": "chat-ui-app",
        "llm_api_key": "None",
        "llm_model": "phi-2",
        "llm_base_url": "http://178.62.13.8:31095",
        "retriever_base_url": "http://178.62.13.8:31645",
        "retriever_api_key": "None",
        "chat_use_auth": "True",
        "username": "Adam",
        "password": "kalavai"
    }
)


ValidationError: 1 validation error for ArchitectureCard
health_endpoint
  Input should be a valid string [type=string_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.5/v/string_type

In [7]:
from model_library.cards.model_deployment_templates import RAG_DEPLOYMENT_TEMPLATE_CARD

model_deployment = ModelDeploymentCard(
    user_information=user,
    model_deployment_template=RAG_DEPLOYMENT_TEMPLATE_CARD,
)

deployment_config = model_deployment.extract_deployment_config()

file_path = "rag_deployment_example.yaml"  # Replace with your desired file path

# Writing the string to a file
with open(file_path, 'w') as file:
    file.write(deployment_config)

print(f"YAML string written to {file_path}")



YAML string written to rag_deployment_example.yaml


In [None]:
import requests
import json

def deploy_generic_model(config):
    # Read the configuration from the file

    # Prepare the request data
    data = {
        "config": config
    }

    # Define the URL and headers
    url = "http://0.0.0.0:8000/v1/deploy_generic_model"
    headers = {
        "accept": "application/json",
        "Content-Type": "application/json"
    }

    # Make the POST request
    response = requests.post(url, headers=headers, data=json.dumps(data))

    # Check the response
    if response.status_code == 200:
        print("Deployment successful.")
        print(response.json())
    else:
        print(f"Deployment failed with status code {response.status_code}.")
        print(response.text)




In [None]:
deploy_generic_model(deployment_config)

Deployment successful.
{'successful': ['[[{\'api_version\': \'v1\',\n \'kind\': \'Service\',\n \'metadata\': {\'annotations\': None,\n              \'creation_timestamp\': datetime.datetime(2024, 2, 5, 16, 37, 41, tzinfo=tzutc()),\n              \'deletion_grace_period_seconds\': None,\n              \'deletion_timestamp\': None,\n              \'finalizers\': None,\n              \'generate_name\': None,\n              \'generation\': None,\n              \'labels\': {\'app\': \'rag-tool-app\'},\n              \'managed_fields\': [{\'api_version\': \'v1\',\n                                  \'fields_type\': \'FieldsV1\',\n                                  \'fields_v1\': {\'f:metadata\': {\'f:labels\': {\'.\': {},\n                                                                            \'f:app\': {}}},\n                                                \'f:spec\': {\'f:externalTrafficPolicy\': {},\n                                                           \'f:internalTrafficPolicy\'