## Deploy Your App!
#### Use this Notebook after you have Populated Pinecone with the relevant vectors for your application and you are now ready to deploy your app in CML

In [24]:
import os
import cmlapi
import random
import string
import json

#### Get CML API Client and list the available Runtimes

In [25]:
client = cmlapi.default_client(url=os.getenv("CDSW_API_URL").replace("/api/v1", ""), cml_api_key=os.getenv("CDSW_APIV2_KEY"))
available_runtimes = client.list_runtimes(search_filter=json.dumps({
    "kernel": "Python 3.10",
    "edition": "Nvidia GPU",
    "editor": "JupyterLab"
}))
print(available_runtimes)

## Set available runtimes to the latest runtime in the environment (iterator is the number that begins with 0 and advances sequentially)
## The JOB_IMAGE_ML_RUNTIME variable stores the ML Runtime which will be used to launch the job
print(available_runtimes.runtimes[1])
print(available_runtimes.runtimes[1].image_identifier)
APP_IMAGE_ML_RUNTIME = available_runtimes.runtimes[1].image_identifier

## Store the ML Runtime for any future jobs in an environment variable so we don't have to do this step again
os.environ['APP_IMAGE_ML_RUNTIME'] = APP_IMAGE_ML_RUNTIME

{'next_page_token': '',
 'runtimes': [{'description': 'Python runtime with CUDA libraries provided by '
                              'Cloudera',
               'edition': 'Nvidia GPU',
               'editor': 'JupyterLab',
               'full_version': '2023.08.1-b6',
               'image_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-cuda:2023.08.1-b6',
               'kernel': 'Python 3.10',
               'status': 'ENABLED'},
              {'description': 'Python runtime with CUDA libraries provided by '
                              'Cloudera',
               'edition': 'Nvidia GPU',
               'editor': 'JupyterLab',
               'full_version': '2023.08.2-b8',
               'image_identifier': 'docker.repository.cloudera.com/cloudera/cdsw/ml-runtime-jupyterlab-python3.10-cuda:2023.08.2-b8',
               'kernel': 'Python 3.10',
               'status': 'ENABLED'}]}
{'description': 'Python runtime with CUDA libraries provi

#### Get the current working project

In [26]:
project = client.get_project(project_id=os.getenv("CDSW_PROJECT_ID"))
print(project)

{'created_at': datetime.datetime(2023, 12, 11, 21, 45, 34, 844743, tzinfo=tzlocal()),
 'creation_status': 'success',
 'creator': {'email': 'ktalbert@cloudera.com',
             'name': 'Kevin Talbert',
             'username': 'ktalbert'},
 'default_engine_type': 'ml_runtime',
 'description': '',
 'environment': '{"CDSW_APP_POLLING_ENDPOINT":"/","PROJECT_OWNER":"ktalbert"}',
 'ephemeral_storage_limit': 10,
 'ephemeral_storage_request': 0,
 'id': 'ven5-648f-q47e-b3n5',
 'name': 'CML-LLM-HOL-Workshop',
 'owner': {'email': 'ktalbert@cloudera.com',
           'name': 'Kevin Talbert',
           'username': 'ktalbert'},
 'permissions': {'admin': True,
                 'business_user': True,
                 'inherit': False,
                 'operator': True,
                 'read': True,
                 'write': True},
 'shared_memory_limit': 0,
 'updated_at': datetime.datetime(2023, 12, 12, 2, 6, 51, 228788, tzinfo=tzlocal()),
 'visibility': 'private'}


#### Create and Run Application for Hosted LLM Application

In [27]:
application_request = cmlapi.CreateApplicationRequest(
     name = "CML LLM Gradio Interface",
     description = "Hosted interface for the CML LLM Gradio UI",
     project_id = project.id,
     subdomain = "cml-llm-interface",
     kernel = "python3",
     script = "4_launch_hosted_app/frontend_app.py",
     cpu = 1,
     memory = 4,
     runtime_identifier = os.getenv('APP_IMAGE_ML_RUNTIME')
)

app = client.create_application(
     project_id = project.id,
     body = application_request
)