In [1]:
from getpass import getpass
import requests
import os

In [2]:

hard_reset = False  ## <-- Set to True if you want to reset your NVIDIA_API_KEY
while "nvapi-" not in os.environ.get("NVIDIA_API_KEY", "") or hard_reset:
    ## Try to set NVIDIA_API_KEY as part of docker_router routine.
    ##  When running in course container, this helps to save your API key between sessions.
    try: 
        assert not hard_reset
        response = requests.get("http://docker_router:8070/get_key").json()
        assert response.get('nvapi_key')
    except: response = {'nvapi_key' : getpass("NVIDIA API Key: ")}
    os.environ["NVIDIA_API_KEY"] = response.get("nvapi_key")
    try: requests.post("http://docker_router:8070/set_key/", json={'nvapi_key' : os.environ["NVIDIA_API_KEY"]}).json()
    except: pass
    hard_reset = False
    if "nvapi-" not in os.environ.get("NVIDIA_API_KEY", ""):
        print("[!] API key assignment failed. Make sure it starts with `nvapi-` as generated from the model pages.")
        

print(f"Retrieved NVIDIA_API_KEY beginning with \"{os.environ.get('NVIDIA_API_KEY')[:9]}...\"")

NVIDIA API Key: ········
Retrieved NVIDIA_API_KEY beginning with "nvapi-ZAj..."


In [7]:
import requests
import json

In [8]:
headers = {
    "Authorization": f"Bearer {os.environ.get('NVIDIA_API_KEY')}",
    "accept": "text/event-stream",
    "content-type": "application/json",
}

In [9]:
## HINT 2: If you're streaming, you can use print(line.decode("utf-8")) for raw responses
##  For more user-friendly responses, you may want to get_stream_token(line):
def get_stream_token(entry: bytes):
    """Utility: Coerces out ['choices'][0]['delta'][content] from the bytestream"""
    if not entry: return ""
    entry = entry.decode('utf-8')
    if entry.startswith('data: '):
        try: entry = json.loads(entry[5:])
        except ValueError: return ""
    return entry.get('choices', [{}])[0].get('delta', {}).get('content')

In [10]:
####################################################################################
## TODO: Save the invocation URL for the endpoint here
invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/404"

## TODO: Construct the payload, which will be sent over to the endpoint
payload = {

}

## Use requests.post to send the header (streaming meta-info) the payload to the endpoint
## Make sure streaming is enabled, and expect the response to have an iter_lines response.
response = requests.post(invoke_url, headers=headers, json=payload, stream=True)

## If your response is an error message, this will raise an exception in Python
response.raise_for_status()

## If the post request is honored, you should be able to iterate over 
for line in response.iter_lines():
    print(get_stream_token(line), end="")
    # if line: print(line.decode("utf-8"))

HTTPError: 400 Client Error: Bad Request for url: https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/404

## Langchain

In [None]:
%pip install -q langchain-nvidia-ai-endpoints

In [11]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints._common import NVEModel  ## Backend Model

## Using the backbone NVIDIA Endpoints client, which makes the calls as you saw above
NVEModel().available_models

{'playground_kosmos_2': '0bcd1a8c-451f-4b12-b7f0-64b4781190d1',
 'playground_nvolveqa_40k': '091a03bb-7364-4087-8090-bd71e9277520',
 'playground_gemma_7b': '1361fa56-61d7-4a12-af32-69a3825746fa',
 'playground_mistral_7b': '35ec3354-2681-4d0e-a8dd-80325dcf7c63',
 'playground_yi_34b': '347fa3f3-d675-432c-b844-669ef8ee53df',
 'playground_llama2_code_70b': '2ae529dc-f728-4a46-9b8d-2697213666d8',
 'playground_starcoder2_15b': '6acada03-fe2f-4e4d-9e0a-e711b9fd1b59',
 'playground_mamba_chat': '381be320-4721-4664-bd75-58f8783b43c7',
 'playground_phi2': '6251d6d2-54ee-4486-90f4-2792bf0d3acd',
 'playground_sdxl': '89848fb8-549f-41bb-88cb-95d6597044a4',
 'playground_nv_llama2_rlhf_70b': '7b3e3361-4266-41c8-b312-f5e33c81fc92',
 'playground_neva_22b': '8bf70738-59b9-4e5f-bc87-7ab4203be7a0',
 'playground_cuopt': '8f2fbd00-2633-41ce-ab4e-e5736d74bff7',
 'playground_mixtral_8x7b': '8f4118ba-60a8-4e6b-8574-e38a4067a4a3',
 'playground_steerlm_llama_70b': 'd6fe6881-973a-4279-a0f8-e1d486c9618d',
 'playgro

In [12]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA

## NOTE: "playground_" prefix is optional for our client
chat = ChatNVIDIA(model='llama2_13b')
chat.invoke("Hello! How's it going?")

AIMessage(content="Hello! I'm here to help and I'm doing well, thank you for asking! It's great to see you here. Is there something specific you'd like to know or talk about? I'm here to provide helpful and accurate information to the best of my ability. Please feel free to ask me anything, and I'll do my best to assist you.")

In [13]:
## Dictionary comprehension of the form {key: value for key, value in dict.items()}
{k:v for k,v in chat if k != 'client'}

{'name': None,
 'cache': None,
 'verbose': False,
 'callbacks': None,
 'callback_manager': None,
 'tags': None,
 'metadata': None,
 'model': 'llama2_13b',
 'temperature': None,
 'max_tokens': None,
 'top_p': None,
 'seed': None,
 'bad': None,
 'stop': None,
 'labels': None}

In [14]:
{k:v for k,v in chat.client if k != 'headers_tmpl'}

{'fetch_url_format': 'https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/',
 'call_invoke_base': 'https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions',
 'func_list_format': 'https://api.nvcf.nvidia.com/v2/nvcf/functions',
 'get_session_fn': requests.sessions.Session,
 'get_asession_fn': aiohttp.client.ClientSession,
 'nvidia_api_key': SecretStr('**********'),
 'is_staging': False,
 'timeout': 60,
 'interval': 0.02,
 'last_inputs': {'url': 'https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/e0bb7fb9-5333-4a27-8534-c6288f921d3f',
  'headers': {'Accept': 'application/json',
   'Authorization': 'Bearer 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',
   'User-Agent': 'langchain-nvidia-ai-endpoints'},
  'json': {'messages': [{'role': 'user', 'content': "Hello! How's it going?"}],
   'stream': False},
  'stream': False},
 'payload_fn': <function langchain_nvidia_ai_endpoints._common.NVEModel.<lambda>(d)>,
 'stagify': functools.partial(<function NVEModel._stagify at 0x7