#  Deployment and inference

In [29]:
import os
import sys

current_dir = os.getcwd()
kit_dir =  os.path.abspath(os.path.join(current_dir, '..'))
repo_dir = os.path.abspath(os.path.join(kit_dir, '..'))
sys.path.append(repo_dir)

from utils.fine_tuning.src.snsdk_wrapper import SnsdkWrapper

## Step by Step / Manual setting

First instantiate the SambaStudio client 

In [30]:
sambastudio_client = SnsdkWrapper()

2025-03-31 22:44:27,721 [INFO] Using variables from .snapi config to set up Snsdk.


### Create Project

#### Set Project configs

In [31]:
project = {
    'project_name': 'e2e-draft-model-training-project',
    'project_description': 'this project will be used to test the E2E Draft Model Training pipeline implementation'
}

In [32]:
# Execute the create project method from client with project parameters
sambastudio_client.create_project(
    project_name = project['project_name'],
    project_description = project['project_description']
)

2025-03-31 22:44:38,122 [INFO] Project with name 'e2e-draft-model-training-project' found with id 0b4bf4f5-5e29-409c-8a40-7a2e4183ec95
2025-03-31 22:44:38,124 [INFO] Project with name 'e2e-draft-model-training-project' already exists with id '0b4bf4f5-5e29-409c-8a40-7a2e4183ec95', using it


'0b4bf4f5-5e29-409c-8a40-7a2e4183ec95'

### Create Endpoint

#### Set endpoint config 

In [33]:

# xxx
# endpoint = {
#   'endpoint_name': 'llama3-openbiollm-8b-endpoint',
#   'endpoint_description': 'Endpoint for Llama3-OpenBioLLM-8B',
#   'endpoint_instances': 1,
#   'hyperparams': {}
# }
endpoint = {
  'endpoint_name': 'llama-3.1-8b-instruct',
  'endpoint_description': 'Endpoint for Llama-3.1-8B-Instruct',
  'endpoint_instances': 1,
  'hyperparams': {}
}

In [34]:
# Execute the create endpoint method from client with endpoint parameters
sambastudio_client.create_endpoint(
    project_name=project['project_name'],
    endpoint_name=endpoint['endpoint_name'],
    endpoint_description=endpoint['endpoint_description'],
    model_name='Llama-3.1-8B-Instruct',
    model_version=1,
    instances=endpoint['endpoint_instances'],
    hyperparams=endpoint['hyperparams'],
    rdu_arch="SN40L-8",
)

2025-03-31 22:52:28,479 [INFO] Project with name 'e2e-draft-model-training-project' found with id 0b4bf4f5-5e29-409c-8a40-7a2e4183ec95
2025-03-31 22:52:28,722 [INFO] Model with name 'Llama-3.1-8B-Instruct' found with id b11219b8-84ba-4c5a-833b-edd6ffd5c0d6
2025-03-31 22:52:28,937 [INFO] Endpoint with name 'llama-3.1-8b-instruct' not found in project '0b4bf4f5-5e29-409c-8a40-7a2e4183ec95'
2025-03-31 22:52:29,279 [INFO] Endpoint 'llama-3.1-8b-instruct' created


'54e113d6-495f-44e9-b6b0-01bc43852806'

In [35]:
#get endpoint details, including api key and envs
sambastudio_client.get_endpoint_details(
    project_name=project['project_name'],
    endpoint_name=endpoint['endpoint_name'],
)

2025-03-31 22:52:31,742 [INFO] Project with name 'e2e-draft-model-training-project' found with id 0b4bf4f5-5e29-409c-8a40-7a2e4183ec95


{'status': 'SettingUp',
 'url': '/api/predict/generic/0b4bf4f5-5e29-409c-8a40-7a2e4183ec95/54e113d6-495f-44e9-b6b0-01bc43852806',
 'langchain_wrapper_env': {'SAMBASTUDIO_URL': 'https://sjc3-demo2.sambanova.net/api/predict/generic/0b4bf4f5-5e29-409c-8a40-7a2e4183ec95/54e113d6-495f-44e9-b6b0-01bc43852806',
  'SAMBASTUDIO_API_KEY': '509c194e-6026-47b7-9c73-dca8ffed6994'}}

### Inference

#### Get endpoint details

In [36]:
# store envs
endpoint_env = sambastudio_client.get_endpoint_details(
    project_name=project['project_name'],
    endpoint_name=endpoint['endpoint_name']
    )['langchain_wrapper_env']

2025-03-31 22:52:35,185 [INFO] Project with name 'e2e-draft-model-training-project' found with id 0b4bf4f5-5e29-409c-8a40-7a2e4183ec95


In [37]:
endpoint_env

{'SAMBASTUDIO_URL': 'https://sjc3-demo2.sambanova.net/api/predict/generic/0b4bf4f5-5e29-409c-8a40-7a2e4183ec95/54e113d6-495f-44e9-b6b0-01bc43852806',
 'SAMBASTUDIO_API_KEY': '509c194e-6026-47b7-9c73-dca8ffed6994'}

In [38]:
# instantiate langchain chat models to test inference 

from langchain_sambanova import ChatSambaStudio

llm = ChatSambaStudio(
    sambastudio_url=endpoint_env.get("SAMBASTUDIO_URL"),
    sambastudio_api_key=endpoint_env.get("SAMBASTUDIO_API_KEY"),
    temperature = 0.01,
    max_tokens = 1024,
    top_p = 0.1,
    do_sample = False
)


In [41]:
messages = [
    ("system", "You are an expert and experienced from the healthcare and biomedical domain with extensive medical knowledge and practical experience. Your name is OpenBioLLM, and you were developed by Saama AI Labs. who's willing to help answer the user's query with explanation. In your explanation, leverage your deep medical expertise such as relevant anatomical structures, physiological processes, diagnostic criteria, treatment guidelines, or other pertinent medical concepts. Use precise medical terminology while still aiming to make the explanation clear and accessible to a general audience."),
    ("human", "What are the morphological characteristics of a particular organism that determine its correct genus classification in the Taxonomy system? Identify the key features that indicate the proper classification for a given species and explain how they are used in Taxonomy")
]

In [44]:
llm.invoke(messages).content

'As OpenBioLLM, I\'d be happy to explain the morphological characteristics that determine the correct genus classification in the Taxonomy system.\n\nIn Taxonomy, the classification of organisms is based on their shared characteristics, which are used to group them into hierarchical categories. The genus classification is a critical level in this hierarchy, as it represents a group of related species that share common characteristics.\n\nTo determine the correct genus classification, taxonomists examine the following key morphological characteristics:\n\n1. **Body shape and size**: The overall shape and size of the organism, including its proportions, can be indicative of its genus classification. For example, the shape of the body, the presence of appendages, and the size of the organism can be used to distinguish between different genera.\n2. **Skeletal system**: The structure and composition of the skeletal system, including the presence of bones, cartilage, or other supporting tiss

In [57]:
import json

# Path to the input JSONL file
input_filename = "../data/datasets/bio_train_completions/bio_train_formatted.jsonl"
# Path to the output JSONL file
output_filename = "../data/datasets/bio_train_completions/bio_train_completions.jsonl"

# Assuming llm is already defined in your environment with an invoke() method.
# For example:
# def dummy_invoke(messages):
#     # This dummy function simulates an LLM response
#     return type("LLMResponse", (object,), {"content": "This is a dummy answer."})()
# llm = type("DummyLLM", (object,), {"invoke": dummy_invoke})()

output_lines = []

with open(input_filename, "r", encoding="utf-8") as infile:
    for line in infile:
        # Each line should be a valid JSON string
        data = json.loads(line)
        
        # Extract the system prompt and the human instruction
        system_msg = data["system_prompt"]
        human_msg = data["instruction"]

        # Construct the messages list as required
        messages = [
            ("system", system_msg),
            ("human", human_msg)
        ]

        # Invoke the LLM with the messages list and get the content of the response
        # Here we call the llm.invoke() and access its 'content' attribute.
        response = llm.invoke(messages)
        completion = response.content

        # Create a new dictionary (triple) with the keys system, prompt, and completion.
        new_entry = {
            "system": system_msg,
            "prompt": human_msg,
            "completion": completion
        }
        output_lines.append(new_entry)

# Write the transformed entries into the output JSONL file
with open(output_filename, "w", encoding="utf-8") as outfile:
    for entry in output_lines:
        json.dump(entry, outfile)
        outfile.write("\n")

print(f"Successfully processed {len(output_lines)} entries and saved to {output_filename}.")

Successfully processed 3 entries and saved to ../data/datasets/bio_train_completions/bio_train_completions.jsonl.


## Streamlined Execution

The bundle model creation and endpoint deployment can be done in a streamlined way setting all the composite model and endpoint parameters in a config file like in the [deploy_config.yaml](../deploy_config.yaml) example, and executing:

In [None]:
config_file = os.path.join(kit_dir, 'deploy_config.yaml')
sambastudio_client = SnsdkWrapper(config_file)
sambastudio_client.create_project()
sambastudio_client.create_composite_model()
sambastudio_client.create_endpoint()
endpoint_env = sambastudio_client.get_endpoint_details()