In [None]:
# This is a sample notebook illustrating how to get started with the LLM Access Service API

In [None]:
# Pre-requisites
# Download the service key of your proxy service instance to a local file called key.json

In [None]:
import json
import requests

In [None]:
KEY_FILE = "llmacces1-key.json" #specify the path to your key.json

In [None]:
# Load the service key

with open(KEY_FILE, "r") as key_file:
    svc_key = json.load(key_file)

In [None]:
# Get Token

svc_url = svc_key["url"]
client_id = svc_key["uaa"]["clientid"]
client_secret = svc_key["uaa"]["clientsecret"]
uaa_url = svc_key["uaa"]["url"]

params = {"grant_type": "client_credentials" }
resp = requests.post(f"{uaa_url}/oauth/token",
                     auth=(client_id, client_secret),
                     params=params)

token = resp.json()["access_token"]

In [None]:
# text-davinci-003 example

data = {
    "deployment_id": "text-davinci-003",
    "prompt": "Hello",
    "max_tokens": 500,
    "temperature": 0.0,
    "n": 1
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# gpt-35-turbo example, uses Chat Completions API format, note the use of messages array instead of prompt

data = {
    "deployment_id": "gpt-35-turbo",
    "messages": [
        {"role": "system", "content": "Assistant is an intelligent chatbot designed to help users answer their tax related questions.\n\nInstructions:\n- Only answer questions related to taxes.\n- If you're unsure of an answer, you can say \"I don't know\" or \"I'm not sure\" and recommend users go to the IRS website for more information."},
        {"role": "user", "content": "When are my taxes due?"}
    ],
    "max_tokens": 100,
    "temperature": 0.0,
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "stop": "null"
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# gpt-4 example, uses Chat Completions API format, note the use of messages array instead of prompt

data = {
    "deployment_id": "gpt-4",
    "messages": [
        {"role": "system", "content": "An interaction between a human and a machine"},
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hello"},
        {"role": "user", "content": "Who are you?"},
        {"role": "assistant", "content": "I am an intelligent machine"},
        {"role": "user", "content": "Who created you?"}
    ],
    "max_tokens": 800,
    "temperature": 0.7,
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "top_p": 0.95,
    "stop": "null"
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# BLOOM example

data = {
  "deployment_id": "bloom-7b1",
  "prompt": "A long, long time ago there was",
  "result_length": 50
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# GPT-J example

data = {
  "deployment_id": "gptj-full",
  "prompt": "SAP is a",
  "result_length": 50
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AlephAlpha example

data = {
  "deployment_id": "alephalpha",
  "prompt": "SAP is a",
  "maximum_tokens": 50
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# Embeddings example

data = {
  "deployment_id": "text-embedding-ada-002-v2",
  "input": "SAP SE (/ˌɛs.eɪˈpiː/; German pronunciation: [ɛsʔaːˈpeː] (listen)) is a German multinational software company based in Walldorf, Baden-Württemberg. It develops enterprise software to manage business operations and customer relations.[2][3] The company is the world's leading enterprise resource planning (ERP) software vendor.[4][5] SAP is the largest non-American software company by revenue, the world's third-largest publicly traded software company by revenue, and the second-largest German company by market capitalization.[6] Apart from ERP software, the company also sells database software and technology (particularly its own brands), cloud-engineered systems, and other ERP software products, such as human capital management (HCM) software, customer relationship management (CRM) software (also known as customer experience), enterprise performance management (EPM) software, product lifecycle management (PLM) software, supplier relationship management (SRM) software, supply chain management (SCM) software, business technology platform (BTP) software and programming environment SAP AppGyver for business."
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/embeddings",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# Google text-bison@001

data = {
    "deployment_id": "gcp-text-bison-001",
    "instances": [
        {
            "content": "Provide a very short summary, no more than three sentences, for the following article:\n\nOur quantum computers work by manipulating qubits in an orchestrated fashion that we call quantum algorithms.\nThe challenge is that qubits are so sensitive that even stray light can cause calculation errors — and the problem worsens as quantum computers grow.\nThis has significant consequences, since the best quantum algorithms that we know for running useful applications require the error rates of our qubits to be far lower than we have today.\nTo bridge this gap, we will need quantum error correction.\nQuantum error correction protects information by encoding it across multiple physical qubits to form a \\“logical qubit,\\” and is believed to be the only way to produce a large-scale quantum computer with error rates low enough for useful calculations.\nInstead of computing on the individual qubits themselves, we will then compute on logical qubits. By encoding larger numbers of physical qubits on our quantum processor into one logical qubit, we hope to reduce the error rates to enable useful quantum algorithms.\n\nSummary:\n"
        }
    ],
    "parameters": {
        "temperature": 0.2,
        "maxOutputTokens": 256,
        "topP": 0.8,
        "topK": 40
    }
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/embeddings",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AWS Bedrock anthropic-claude-v1
# PLEASE NOTE: refer the prompt format and the correct use here https://docs.anthropic.com/claude/docs/constructing-a-prompt#use-the-correct-format
# The special tokens “\n\nHuman:”, and “\n\nAssistant:" are required as it was trained as a conversational agent using these tokens.

data = {
    "deployment_id": "anthropic-claude-v1",
    "prompt":"Human: 1+1=\\nAssistant:",
    "max_tokens_to_sample":300,
    "temperature":0.5,
    "top_k":250,
    "top_p":1,
    "stop_sequences":["Human:"]
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AWS Bedrock anthropic-claude-instant-v1

# See Claude Docs for use cases https://docs.anthropic.com/claude/docs/content-generation
# PLEASE NOTE: refer the prompt format and the correct use here https://docs.anthropic.com/claude/docs/constructing-a-prompt#use-the-correct-format
# The special tokens “\n\nHuman:”, and “\n\nAssistant:" are required as it was trained as a conversational agent using these tokens.


data = {
    "deployment_id": "anthropic-claude-instant-v1",
    "prompt":"""
        Human: I'd like you to rewrite the following paragraph using the following instructions: "less detail".

        "In 1758, the Swedish botanist and zoologist Carl Linnaeus published in his Systema Naturae, the two-word naming of species (binomial nomenclature). Canis is the Latin word meaning "dog", and under this genus, he listed the domestic dog, the wolf, and the golden jackal."

        Please put your rewrite in <rewrite></rewrite> tags.

        Assistant: <rewrite>
""",
    "max_tokens_to_sample":300,
    "temperature":0.5,
    "top_k":250,
    "top_p":1,
    "stop_sequences":["Human:"]
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AWS Bedrock ai21-j2-grande-instruct

# See Jurrasic-2 Docs for API Reference https://docs.ai21.com/reference/j2-complete-api-ref

data = {
    "deployment_id": "ai21-j2-grande-instruct",
    "prompt": "These are a few of my favorite",
    "numResults": 1,
    "maxTokens": 30,
    "temperature": 0.7,
    "topKReturn": 0,
    "topP":1,
    "countPenalty": {
      "scale": 0,
      "applyToNumbers": False,
      "applyToPunctuations": False,
      "applyToStopwords": False,
      "applyToWhitespaces": False,
      "applyToEmojis": False
    },
    "frequencyPenalty": {
      "scale": 0,
      "applyToNumbers": False,
      "applyToPunctuations": False,
      "applyToStopwords": False,
      "applyToWhitespaces": False,
      "applyToEmojis": False
    },
    "presencePenalty": {
      "scale": 0,
      "applyToNumbers": False,
      "applyToPunctuations": False,
      "applyToStopwords": False,
      "applyToWhitespaces": False,
      "applyToEmojis": False
    },
    "stopSequences":["##"]
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AWS Bedrock ai21-j2-jumbo-instruct

# See Jurrasic-2 Docs for API Reference https://docs.ai21.com/reference/j2-complete-api-ref

data = {
    "deployment_id": "ai21-j2-jumbo-instruct",
    "prompt": "These are a few of my favorite",
    "numResults": 1,
    "maxTokens": 30,
    "temperature": 0.7,
    "topKReturn": 0,
    "topP":1,
    "countPenalty": {
      "scale": 0,
      "applyToNumbers": False,
      "applyToPunctuations": False,
      "applyToStopwords": False,
      "applyToWhitespaces": False,
      "applyToEmojis": False
    },
    "frequencyPenalty": {
      "scale": 0,
      "applyToNumbers": False,
      "applyToPunctuations": False,
      "applyToStopwords": False,
      "applyToWhitespaces": False,
      "applyToEmojis": False
    },
    "presencePenalty": {
      "scale": 0,
      "applyToNumbers": False,
      "applyToPunctuations": False,
      "applyToStopwords": False,
      "applyToWhitespaces": False,
      "applyToEmojis": False
    },
    "stopSequences":["##"]
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AWS Bedrock amazon-titan-tg1-large

data = {
    "deployment_id": "amazon-titan-tg1-large",
    "inputText":"Write an article explaining Artificial Intelligence.",
    "textGenerationConfig":{
        "maxTokenCount":512,
        "stopSequences":[],
        "temperature":0,
        "topP":0.9
    }
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# AWS Bedrock Embeddings - amazon-titan-e1t-medium

data = {
    "deployment_id": "amazon-titan-e1t-medium",
    "inputText":"explain black holes to 8th graders"
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# Falcon 7B model - AI Core - falcon-7b

data = {
    "deployment_id": "falcon-7b",
    "inputs": "The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User'\''s questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.\nUser: Can you give me a short summary of the '\''John Carter'\''?\nFalcon: ",
    "parameters": {
        "do_sample": True,
        "max_new_tokens": 512,
        "repetition_penalty": 10.0,
        "stop": [
            "\nUser",
            "<|endoftext|>"
        ],
        "temperature": 0.8,
        "top_k": 10,
        "top_p": 0.95
    }
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

# SAMPLE RESPONSE FOR FORMAT:
# {
#     'generated_text': "''The John Carter'' is one of the most important ships for humans.\nIt is designed to take large numbers of people off Earth and travel\nto another planet in our solar system, Mars. It was funded by the\nUnited States government.\nUser"
# }

In [None]:
# Falcon 40B Instruct model - AI Core - falcon-40b-instruct

data = {
    "deployment_id": "falcon-40b-instruct",
    "inputs": "The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User'\''s questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.\nUser: Can you give me a short summary of the '\''John Carter'\''?\nFalcon: ",
    "parameters": {
        "do_sample": True,
        "max_new_tokens": 512,
        "repetition_penalty": 10.0,
        "stop": [
            "\nUser",
            "<|endoftext|>"
        ],
        "temperature": 0.8,
        "top_k": 10,
        "top_p": 0.95
    }
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

# SAMPLE RESPONSE FOR FORMAT:
# {
#     'generated_text': "' John Carpenter'' refers both as musician/composer best known for his soundtracks from horror films such movies like Halloween or Escape From New York.'"
# }

In [None]:
# gpt-35-turbo with functions support

data = {
    "deployment_id": "gpt-35-turbo",
    "messages": [
        {
            "role": "user", 
            "content": "Find beachfront hotels in San Diego for less than $300 a month with free breakfast."
        }
    ],
    "functions": [  
        {
            "name": "search_hotels",
            "description": "Retrieves hotels from the search index based on the parameters provided",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The location of the hotel (i.e. Seattle, WA)"
                    },
                    "max_price": {
                        "type": "number",
                        "description": "The maximum price for the hotel"
                    },
                    "features": {
                        "type": "string",
                        "description": "A comma separated list of features (i.e. beachfront, free wifi, etc.)"
                    }
                },
                "required": ["location"],
            },
        }
    ]        
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# gpt-4 with functions support

data = {
    "deployment_id": "gpt-4",
    "messages": [
        {
            "role": "user", 
            "content": "Find beachfront hotels in San Diego for less than $300 a month with free breakfast."
        }
    ],
    "functions": [  
        {
            "name": "search_hotels",
            "description": "Retrieves hotels from the search index based on the parameters provided",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The location of the hotel (i.e. Seattle, WA)"
                    },
                    "max_price": {
                        "type": "number",
                        "description": "The maximum price for the hotel"
                    },
                    "features": {
                        "type": "string",
                        "description": "A comma separated list of features (i.e. beachfront, free wifi, etc.)"
                    }
                },
                "required": ["location"],
            },
        }
    ]        
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# gpt-4-32k with functions support

data = {
    "deployment_id": "gpt-4-32k",
    "messages": [
        {
            "role": "user", 
            "content": "Find beachfront hotels in San Diego for less than $300 a month with free breakfast."
        }
    ],
    "functions": [  
        {
            "name": "search_hotels",
            "description": "Retrieves hotels from the search index based on the parameters provided",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The location of the hotel (i.e. Seattle, WA)"
                    },
                    "max_price": {
                        "type": "number",
                        "description": "The maximum price for the hotel"
                    },
                    "features": {
                        "type": "string",
                        "description": "A comma separated list of features (i.e. beachfront, free wifi, etc.)"
                    }
                },
                "required": ["location"],
            },
        }
    ]        
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# Llama 2 13b chat hf model - AI Core - llama2-13b-chat-hf

data = {
    "deployment_id": "llama2-13b-chat-hf",
    "inputs": "SAP is a leading provider of",
    "parameters": {
        "do_sample": True,
        "max_new_tokens": 512,
        "repetition_penalty": 10.0,
        "stop": [
            "\nUser",
            "<|endoftext|>"
        ],
        "temperature": 0.8,
        "top_k": 10,
        "top_p": 0.95
    }
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

# SAMPLE RESPONSE FOR FORMAT:
# {
#     "generated_text": "enterprise software solutions, offering innovative technologies that help businesses and organizations improve their operations. With the acquisition by Qualtrics in 2018 (now known as Experience Cloud), they have expanded into experience management capabilities to provide an even more comprehensive platform for customers around globe; integrating data collection tools such surveys/feedback forms with analytical power offered through its ERP system – making it easier than ever before!\nQualTricks offers robust survey creation toolset designed specifically cater towards market research needs including advanced logic branching capability which allows users tailor responses based on previous answers given within same questionnaire or across multiple ones if needed . It also comes equipped powerful reporting features enabling analysts gain valuable insights about customer preferences trends & patterns via detailed visualizations like heat maps , scatter plots etc.. All these functionalities allow teams collect relevant feedback from clients at scale efficiently while maintain substantial depth qualitative analysis required make informed decisions moving forward strategically speaking"
# }

In [None]:
# AWS Bedrock anthropic.claude-v2 model - anthropic-claude-v2
# PLEASE NOTE: refer the prompt format and the correct use here https://docs.anthropic.com/claude/docs/constructing-a-prompt#use-the-correct-format
# The special tokens “\n\nHuman:”, and “\n\nAssistant:" are required as it was trained as a conversational agent using these tokens.
data = {
    "deployment_id": "anthropic-claude-v2",
    "prompt":"Human: 1+1=\\nAssistant:",
    "max_tokens_to_sample":300,
    "temperature":0.5,
    "top_k":250,
    "top_p":1,
    "stop_sequences":["Human:"]
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# Google Vertex AI chat-bison@001 chat completions model
data = {
    "deployment_id": "gcp-chat-bison-001",
    "instances": [
        {
            "context":  "My name is Ned. You are my personal assistant. My favorite movies are Lord of the Rings and Hobbit.",
            "examples": [ 
                { 
             "input": {"content": "Who do you work for?"},
          "output": {"content": "I work for Ned."}
       },
       { 
          "input": {"content": "What do I like?"},
          "output": {"content": "Ned likes watching movies."}
       }],
      "messages": [
       { 
          "author": "user",
          "content": "Are my favorite movies based on a book series?",
       },
       { 
          "author": "bot",
          "content": "Yes, your favorite movies, The Lord of the Rings and The Hobbit, are based on book series by J.R.R. Tolkien.",
       },
       { 
          "author": "user",
          "content": "When were these books published?",
       }],
   }],
  "parameters": {
    "temperature": 0.3,
    "maxOutputTokens": 200,
    "topP": 0.8,
    "topK": 40
  }
  }

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# GCP Vertex AI textembedding-gecko@001 textembeddings model
data = {
    "deployment_id": "gcp-textembedding-gecko-001",
    "instances": [
        {
            "content": "What is life?"
        }
    ]
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# gpt-35-turbo-16k example, prompt uses Chat Completions API format
data = {
    "deployment_id": "gpt-35-turbo-16k",
    "messages": [
        {"role": "system", "content": "Assistant is a large language model trained by OpenAI."},
        {"role": "user", "content": "Who were the founders of SAP?"}
    ],
    "max_tokens": 100,
    "temperature": 0.0,
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "stop": "null"
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())

In [None]:
# Llama 2 70b chat hf model - AI Core - llama2-70b-chat-hf
data = {
    "deployment_id": "llama2-70b-chat-hf",
    "inputs": "[INST] <<SYS>>\n\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don'\''t know the answer to a question, please don'\''t share false information.\n<</SYS>>\n\nGive me a summary of \"John Carter\" movie.\n[/INST] ",
    "parameters": {
        "best_of": 2,
        "do_sample": True,
        "max_new_tokens": 512,
        "repetition_penalty": 10.0,
        "stop": [],
        "temperature": 0.1,
        "top_k": 10,
        "top_p": 0.95
    }
}

headers = {
    "Authorization":  f"Bearer {token}",
    "Content-Type": "application/json"
}

response = requests.post(f"{svc_url}/api/v1/completions",
                         headers=headers,
                         json=data)
print(response.json())