## Setup

In [6]:
import os
from dotenv import load_dotenv

from oci.config import from_file
from oci.generative_ai import GenerativeAiClient
from oci.generative_ai_inference import GenerativeAiInferenceClient
from oci.generative_ai_inference.models import (
    EmbedTextDetails,
    OnDemandServingMode,
    GenerateTextDetails,
    CohereLlmInferenceRequest,
    SummarizeTextDetails
)

In [6]:
load_dotenv()
config = from_file()

In [7]:
load_dotenv()
config = from_file()

GEN_AI_ENDPOINT = os.getenv('GEN_AI_ENDPOINT')
GEN_AI_INFERENCE_ENDPOINT = os.getenv('GEN_AI_INFERENCE_ENDPOINT')
COMPARTMENT_ID = os.getenv('COMPARTMENT_ID')
gen_ai_client = GenerativeAiClient(config=config, service_endpoint=GEN_AI_ENDPOINT)
gen_ai_inference_client = GenerativeAiInferenceClient(
    config=config,
    service_endpoint=GEN_AI_INFERENCE_ENDPOINT
)

## Generative AI - Manage

In [8]:
# Lists the models in a specific compartment
list_models_response = gen_ai_client.list_models(compartment_id=COMPARTMENT_ID)
list_models_response.data

{
  "items": [
    {
      "base_model_id": null,
      "capabilities": [
        "TEXT_GENERATION"
      ],
      "compartment_id": null,
      "defined_tags": {},
      "display_name": "meta.llama-2-70b-chat",
      "fine_tune_details": null,
      "freeform_tags": {},
      "id": "ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceyai3pxxkeezogygojnayizqu3bgslgcn6yiqvmyu3w75ma",
      "is_long_term_supported": null,
      "lifecycle_details": "Creating Base Model",
      "lifecycle_state": "ACTIVE",
      "model_metrics": null,
      "system_tags": {},
      "time_created": "2024-01-05T02:19:51.103000+00:00",
      "time_deprecated": null,
      "type": "BASE",
      "vendor": "meta",
      "version": "1.0"
    },
    {
      "base_model_id": null,
      "capabilities": [
        "TEXT_GENERATION",
        "FINE_TUNE"
      ],
      "compartment_id": null,
      "defined_tags": {},
      "display_name": "cohere.command",
      "fine_tune_details": null,
      "freeform_tags": {}

## Generative AI - Inference

### Inference setup

In [7]:
EMBEDDINGS_MODEL_OCID = os.getenv('EMBEDDINGS_MODEL_OCID')
SUMMARIZE_MODEL_OCID = os.getenv('SUMMARIZE_MODEL_OCID')
GENERATION_MODEL_OCID = os.getenv('GENERATION_MODEL_OCID')

print(f"embeddings: {EMBEDDINGS_MODEL_OCID}")
print(f"summatize: {SUMMARIZE_MODEL_OCID}")
print(f"generation: {GENERATION_MODEL_OCID}")

embeddings: None
summatize: None
generation: None


### Embeddings

In [10]:
inputs = [
    "Learn about the Employee Stock Purchase Plan",
    "Reassign timecard approvals during leave",
    "View my payslip online",
    "Learn about the Employee Stock Purchase Plan",
    "Reassign timecard approvals during leave",
    "View my payslip online",
    "Enroll in benefits",
    "Change my direct deposit",
    "Have my employment/income verified",
    "Request A Workplace Accommodation",
    "Submit my time card",
    "Report Information Security Incidents",
    "Review the Code of Conduct",
    "Review the Social Media Policy",
    "Review Corporate Information Security Policies",
    "Understand Compliance and Ethics",
    "Understand the Fiscal Year Calendar",
    "Change my email address",
    "Change my personal information",
    "Learn about Analyst Relations",
    "Learn about Business Skills",
    "Learn about Career Development",
    "Learn about Employee Resource Groups",
    "Learn about Information Security",
    "Learn about Leadership skills",
    "Learn about sustainability",
    "Learn about Technical skills",
    "Request a Phone Number",
    "Learn about video conferencing",
    "Learn about Organizational Distribution Lists",
    "Subscribe to Group Mailing Lists",
    "Find a Campus Map",
    "Obtain a security badge",
    "Obtain an office workspace",
    "Submit an ergonomics request",
    "Use printers",
    "Delegate workflows or transaction approvals while out on leave",
    "Tips for working remotely",
    "Volunteer",
    "Reassign workflow approvals while on vacation",
    "How to delegate timecard approvals",
    "Apply for Corporate Credit Card",
    "Book travel",
    "Submit an expense report"
]

embed_text_result = gen_ai_inference_client.embed_text(
    embed_text_details=EmbedTextDetails(
        inputs=inputs,
        serving_mode=OnDemandServingMode(model_id=EMBEDDINGS_MODEL_OCID),
        compartment_id=COMPARTMENT_ID,
        is_echo=True,
        truncate='NONE',
    )
)

embed_text_result.data

{
  "embeddings": [
    [
      0.02078247,
      -0.0014667511,
      -0.03463745,
      -0.06512451,
      -0.020446777,
      -0.02571106,
      -0.01272583,
      0.048583984,
      -0.09436035,
      0.036499023,
      -0.023651123,
      0.03643799,
      -0.024917603,
      -0.021835327,
      0.0473938,
      -0.053955078,
      0.039093018,
      0.009223938,
      0.06463623,
      0.023452759,
      -0.015991211,
      0.08917236,
      0.00078582764,
      -0.094055176,
      0.03050232,
      0.014320374,
      -0.03881836,
      -0.01838684,
      0.04852295,
      0.014663696,
      0.012191772,
      -0.015098572,
      0.034942627,
      -0.029205322,
      0.033569336,
      0.0602417,
      -0.009124756,
      0.0058135986,
      0.020339966,
      -0.01940918,
      0.0043907166,
      -0.028274536,
      0.012771606,
      -0.0037384033,
      -0.026046753,
      0.004337311,
      0.004650116,
      -0.01499176,
      0.011016846,
      0.0049819946,
      -0.0005

### Generate text

In [11]:
prompt = """
Generate a job description for a data visualization expert with the following three qualifications only:
1) At least 5 years of data visualization expert
2) A great eye for details
3) Ability to create original visualizations
"""

generate_text_response = gen_ai_inference_client.generate_text(
    generate_text_details=GenerateTextDetails(
        compartment_id=COMPARTMENT_ID,
        serving_mode=OnDemandServingMode(
            model_id=GENERATION_MODEL_OCID
        ),
        inference_request=CohereLlmInferenceRequest(
            prompt=prompt,
            is_stream=False,
            num_generations=1
        )
    )
)
print(generate_text_response.__dict__)

generate_text_response.data

{'status': 200, 'headers': {'content-type': 'application/json', 'opc-request-id': 'B44A8219CB8F4C0D801F0D2F7699D506/E38B98EC8D040B2D56BD8B5E9CB62C99/2319BA93551DD44AF5EF78F84D8B25BE', 'content-encoding': 'gzip', 'content-length': '331'}, 'data': {
  "inference_response": {
    "generated_texts": [
      {
        "finish_reason": null,
        "id": "892174ac-237b-4782-8545-20035ac9cec9",
        "likelihood": null,
        "text": " We're looking for a talented Data Visualization Expert to join our team! The ideal candidate will have",
        "token_likelihoods": null
      }
    ],
    "prompt": null,
    "runtime_type": "COHERE",
    "time_created": "2024-04-03T12:27:08.271000+00:00"
  },
  "model_id": "ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceyafhwal37hxwylnpbcncidimbwteff4xha77n5xz4m7p6a",
  "model_version": "15.6"
}, 'request': <oci.request.Request object at 0x7fd82038fa90>, 'next_page': None, 'request_id': 'B44A8219CB8F4C0D801F0D2F7699D506/E38B98EC8D040B2D56BD8B5E

{
  "inference_response": {
    "generated_texts": [
      {
        "finish_reason": null,
        "id": "892174ac-237b-4782-8545-20035ac9cec9",
        "likelihood": null,
        "text": " We're looking for a talented Data Visualization Expert to join our team! The ideal candidate will have",
        "token_likelihoods": null
      }
    ],
    "prompt": null,
    "runtime_type": "COHERE",
    "time_created": "2024-04-03T12:27:08.271000+00:00"
  },
  "model_id": "ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceyafhwal37hxwylnpbcncidimbwteff4xha77n5xz4m7p6a",
  "model_version": "15.6"
}

In [1]:
import requests
import json
from oci.auth.signers import InstancePrincipalsSecurityTokenSigner

prompt = "Tell me a so loooooong joke"

signer = InstancePrincipalsSecurityTokenSigner()
body = {
    "compartmentId": COMPARTMENT_ID,
    "servingMode": {
        "servingType": "ON_DEMAND",
        "modelId": GENERATION_MODEL_OCID
    },
    "inferenceRequest": {
        "runtimeType": "COHERE",
        "prompt": prompt,
        "isStream": True,
        "numGenerations": 1,
        "maxTokens": 500
    }
}

path = "/20231130/actions/generateText"

response = requests.post(GEN_AI_INFERENCE_ENDPOINT + path, json=body, auth=signer, stream=True)
print(response)
for chunk in response.iter_lines():
    if chunk.strip():
        content = json.loads(chunk.decode('utf-8').split(': ', 1)[1])
        print(content)

NameError: name 'COMPARTMENT_ID' is not defined

In [17]:
import oci

# Setup basic variables
# Auth Config
# TODO: Please update config profile name and use the compartmentId that has policies grant permissions for using Generative AI Service
compartment_id = "ocid1.compartment.oc1..aaaaaaaamlgyvhoa4qvxzsoguxpdo62juyzpnaxscwq3n5kaxptrha5ihy4a"
CONFIG_PROFILE = "DEFAULT"
config = oci.config.from_file('~/.oci/config', CONFIG_PROFILE)

# Service endpoint
endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config=config, service_endpoint=endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))
generate_text_detail = oci.generative_ai_inference.models.GenerateTextDetails()
llm_inference_request = oci.generative_ai_inference.models.CohereLlmInferenceRequest()
llm_inference_request.prompt = "Tell me a so loooooong joke"
llm_inference_request.max_tokens = 600
llm_inference_request.temperature = 1
llm_inference_request.frequency_penalty = 0
llm_inference_request.top_p = 0.75

generate_text_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id="ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceyafhwal37hxwylnpbcncidimbwteff4xha77n5xz4m7p6a")
generate_text_detail.inference_request = llm_inference_request
generate_text_detail.compartment_id = compartment_id
generate_text_response = generative_ai_inference_client.generate_text(generate_text_detail)
# Print result
print("**************************Generate Texts Result**************************")
print(generate_text_response.data)


**************************Generate Texts Result**************************
{
  "inference_response": {
    "generated_texts": [
      {
        "finish_reason": null,
        "id": "20d41f18-7240-4738-bbc3-2cede8e9916c",
        "likelihood": null,
        "text": " Okay, let me tell you a long joke about a turtle and a rabbit!\n\nOnce upon a time, in a grassy field beside a sparkling river, a rabbit was hopping about when he spotted a turtle sunning himself on a large rock. The rabbit, who was known for his mischievous nature, decided to have some fun with the slow-moving turtle.\n\nHe approached the turtle and said, \"Hello there, Mr. Turtle! You know, I've got a brilliant idea to have a race. It would be a friendly competition to see who's the fastest among us. What do you say to that?\"\n\nThe turtle, who was quite naive and unaware of the rabbit's intentions, smiled and replied, \"Why, that sounds like a splendid idea, Mr. Rabbit! I've never been in a race before, but I'm game to g

### Summarize text

In [19]:
input = """
Oracle’s strategy is built around the reality that enterprises work with AI through three different modalities: Infrastructure, models and services, and within applications.

First, we provide a robust infrastructure for training and serving models at scale. Through our partnership with NVIDIA, we can give customers superclusters, which are powered by the latest GPUs in the market connected together with an ultra-low-latency RDMA over converged ethernet (RoCE) network. This solution provides a highly performant, cost-effective method for training generative AI models at scale. Many AI startups like Adept and MosaicML are building their products directly on OCI.

Second, we provide easy-to-use cloud services for developers and scientists to utilize in fully managed implementations. We’re enabling new generative AI services and business functions through our partnership with Cohere, a leading generative AI company for enterprise-grade large language models (LLMs). Through our partnership with Cohere, we’re building a new generative AI service. This upcoming AI service, OCI Generative AI, enables OCI customers to add generative AI capabilities to their own applications and workflows through simple APIs.

Third, we embed generative models into the applications and workflows that business users use every day. Oracle plans to embed generative AI from Cohere into its Fusion, NetSuite, and our vertical software-as-a-service (SaaS) portfolio to create solutions that provide organizations with the full power of generative AI immediately. Across industries, Oracle can provide native generative AI-based features to help organizations automate key business functions, improve decision-making, and enhance customer experiences. For example, in healthcare, Oracle Cerner manages billions of electronic health records (EHR). Using anonymized data, Oracle can create generative models adapted to the healthcare domain, such as automatically generating a patient discharge summary or a letter of authorization for medical insurance.

Oracle’s generative AI offerings span applications to infrastructure and provide the highest levels of security, performance, efficiency, and value.
"""

summarize_text_response = gen_ai_inference_client.summarize_text(
    summarize_text_details=SummarizeTextDetails(
        input=input,
        serving_mode=OnDemandServingMode(
            model_id=SUMMARIZE_MODEL_OCID
        ),
        compartment_id=COMPARTMENT_ID,
        is_echo=True,
        temperature=1.0,
        length='SHORT',
        format='AUTO',
        extractiveness='AUTO'
    )
)

summarize_text_response.data

{
  "id": "01354de7-dca3-4bcf-bde9-8e940d78aa70",
  "input": "\nOracle\u2019s strategy is built around the reality that enterprises work with AI through three different modalities: Infrastructure, models and services, and within applications.\n\nFirst, we provide a robust infrastructure for training and serving models at scale. Through our partnership with NVIDIA, we can give customers superclusters, which are powered by the latest GPUs in the market connected together with an ultra-low-latency RDMA over converged ethernet (RoCE) network. This solution provides a highly performant, cost-effective method for training generative AI models at scale. Many AI startups like Adept and MosaicML are building their products directly on OCI.\n\nSecond, we provide easy-to-use cloud services for developers and scientists to utilize in fully managed implementations. We\u2019re enabling new generative AI services and business functions through our partnership with Cohere, a leading generative AI comp

In [1]:
import oracledb

conn = oracledb.connect(user="system", password="123", dsn="localhost:1521/FREEPDB1")
with conn.cursor() as cur:
   cur.execute("SELECT 'Hello World!' FROM dual")
   res = cur.fetchall()
   print(res)

[('Hello World!',)]


In [16]:
import os
SUMMARIZE_MODEL_OCID = os.getenv('SUMMARIZE_MODEL_OCID') 
GEN_AI_INFERENCE_ENDPOINT = os.getenv('GEN_AI_INFERENCE_ENDPOINT') 
COMPARTMENT_ID = os.getenv('COMPARTMENT_ID') 


In [12]:
from langchain_community.embeddings import OCIGenAIEmbeddings

# use default authN method API-key
embeddings = OCIGenAIEmbeddings(
    model_id="ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceya3bqursz5i2eeg5eesvnlrqj4mrdmi3infd4ve3kaqjva",
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id="ocid1.compartment.oc1..aaaaaaaamlgyvhoa4qvxzsoguxpdo62juyzpnaxscwq3n5kaxptrha5ihy4a",
)


query = "This is a query in English."
response = embeddings.embed_query(query)
print(response)

[-0.008422852, -0.030700684, -0.0010662079, -0.006931305, -0.022918701, -0.026565552, -0.01524353, 0.0032043457, 0.013137817, 0.042297363, 0.021484375, -0.02772522, -0.036346436, -0.031402588, -0.033569336, -0.0035419464, 0.04095459, 0.016586304, 0.022827148, 0.0058135986, -0.020401001, 0.037994385, 0.008369446, -0.06951904, 0.06021118, -0.044128418, -0.02079773, 0.012184143, 0.027328491, -0.0036792755, 0.022232056, -0.01411438, 0.027633667, -0.047607422, 0.025680542, 0.0025253296, 0.010017395, -0.043884277, 0.0018978119, 0.012962341, 0.026504517, -0.017456055, -0.093933105, 0.0074310303, -0.09967041, -0.04333496, 0.01322937, 0.03213501, 0.049835205, -0.02494812, 0.0049972534, 0.01360321, -0.005874634, 0.018341064, -0.03060913, 0.020111084, -0.04071045, -0.003572464, 0.015899658, -0.0038452148, -0.0078125, 0.018539429, 0.048095703, 0.031280518, -0.042877197, -0.017181396, 0.045410156, 0.03186035, 0.07788086, -0.066223145, -0.03817749, 0.079956055, -0.014984131, 0.01020813, -0.017456055

In [14]:
documents = ["This is a sample document", "and here is another one"]
response = embeddings.embed_documents(documents)
print(response)

[[-0.0021648407, -0.014160156, -0.011230469, -0.04937744, -0.040802002, -0.010726929, -0.07672119, 0.0024757385, -0.027526855, 0.036132812, -0.038482666, -0.025360107, -0.0015544891, -0.013671875, 0.010269165, -0.054718018, 0.04800415, 0.039886475, 0.04458618, 0.014892578, -0.01448822, 0.041503906, -0.012237549, -0.035308838, 0.028213501, -0.020874023, 0.01171875, -0.0043258667, -0.022964478, -0.0345459, -0.0030231476, 0.03567505, 0.023635864, 0.008491516, 0.005191803, 0.06976318, -0.012748718, 0.021469116, 0.019989014, -0.011756897, 0.003145218, -0.0010910034, -0.032073975, -0.015052795, -0.04510498, -0.034973145, 0.027740479, -0.010231018, -0.0127334595, 0.012237549, 0.024215698, -0.010131836, 0.00919342, 0.046447754, -0.033416748, -0.018325806, -0.0069503784, 0.018127441, 0.019927979, -0.009590149, -0.023330688, 0.0390625, 0.020935059, 0.0040779114, -0.018844604, -0.037597656, -0.0914917, 0.018157959, 0.041870117, -0.042236328, -0.025634766, -0.024673462, -0.018661499, -0.019729614,