In [None]:
import digitalhub as dh
import requests

In [2]:
PROJECT = "llm"
project = dh.get_or_create_project(PROJECT)

## Text Classification LLM

In [None]:
llm_function = project.new_function("llm_classification",
                                   kind="huggingfaceserve",
                                   model_name="mymodel",
                                   path="huggingface://distilbert/distilbert-base-uncased-finetuned-sst-2-english"
                                  )

In [4]:
llm_run = llm_function.run(action="serve", profile="template-a100")


In [5]:
SERVICE_URL = llm_run.refresh().status.to_dict()["service"]["url"]
MODEL_NAME = "mymodel"

In [6]:
with requests.post(f'http://{SERVICE_URL}/v2/models/{MODEL_NAME}/infer', json={
    "inputs": [
        {
        "name": "input-0",
        "shape": [2],
        "datatype": "BYTES",
        "data": ["Hello, my dog is cute", "I am feeling sad"]
        }
    ]
}) as r:
    res = r.json()
print(res)

{'model_name': 'mymodel', 'model_version': None, 'id': '11eb98d9-c0a4-479f-9dfd-9c58792e8d9b', 'parameters': None, 'outputs': [{'name': 'output-0', 'shape': [2], 'datatype': 'INT64', 'parameters': None, 'data': [1, 0]}]}


## Text Generation LLM

In [None]:
llm_function = project.new_function("llm_generation",
                                   kind="huggingfaceserve",
                                   model_name="mymodel",
                                   path="huggingface://meta-llama/meta-llama-3-8b-instruct"
                                  )

In [None]:
llm_run = llm_function.run(action="serve",
                           profile="template-a100",
                           env = [{
                                "name": "HF_TOKEN",
                                "value": "<HUGGINGFACE TOKEN>"
                            }]
                          )

In [11]:
SERVICE_URL = llm_run.refresh().status.to_dict()["service"]["url"]
MODEL_NAME = "mymodel"

In [13]:
with requests.post(f'http://{SERVICE_URL}/openai/v1/completions', json={"model": "mymodel", "prompt": "Hello! How are you?", "stream":False, "max_tokens": 30}) as r:
    res = r.json()
print(res)

{'id': 'cmpl-2826903870a94277978e164880a58e9f', 'choices': [{'finish_reason': 'length', 'index': 0, 'logprobs': None, 'text': " Hope you're having a great day!\n\nHere I'd like to share some news about my new podcast, where I'll be exploring the world of..."}], 'created': 1724843471, 'model': 'mymodel', 'system_fingerprint': None, 'object': 'text_completion', 'usage': {'completion_tokens': 30, 'prompt_tokens': 7, 'total_tokens': 37}}


In [None]:
with requests.post(f'http://{SERVICE_URL}/openai/v1/chat/completions', json={
    "model": "mymodel",
    "messages":[
        {"role":"system","content":"You are an assistant that speaks like Shakespeare."},
        {"role":"user","content":"Write a poem about colors"}
    ],"max_tokens":30,
    "stream":False}) as r:
    res = r.json()
print(res)

{'id': 'cmpl-97af9ba1bebd402ea49e7748d54e37f4', 'choices': [{'finish_reason': 'length', 'index': 0, 'message': {'content': 'O, fairest hues that doth adorn our sight,\nA world of wonder, in thy tints and bright!\nThy palette, rich and', 'tool_calls': None, 'role': 'assistant', 'function_call': None}, 'logprobs': None}], 'created': 1724843487, 'model': 'mymodel', 'system_fingerprint': None, 'object': 'chat.completion', 'usage': {'completion_tokens': 30, 'prompt_tokens': 30, 'total_tokens': 60}}
