## Initialization

Make sure to use Python 3.10.x or above and that your .env file is in your root folder

In [None]:
# Install the python-dotenv package to load environment variables easily
pip install python-dotenv

In [None]:
# Install the generative-ai-hub-sdk package with all optional dependencies
pip install "generative-ai-hub-sdk[all]"

Now restart your kernel


In [35]:
# Import necessary modules and initialize proxy client
from dotenv import load_dotenv
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client

# Load environment variables from .env file
load_dotenv()  # Load .env file with proxy config

# Initialize proxy client
proxy_client = get_proxy_client()


## Completions

In [None]:
# Use generative AI to complete a given prompt
from gen_ai_hub.proxy.native.openai import completions

# Call the completion API with model name and prompt
response = completions.create(
    model_name="meta--llama3.1-70b-instruct",
    prompt="The Answer to the Ultimate Question of Life, the Universe, and Everything is",
    max_tokens=20,
    temperature=0  # Setting temperature to 0 for deterministic response
)
# Print the completion result
print(response)

In [None]:
# Use generative AI for a simulated chat using a chat model
from gen_ai_hub.proxy.native.openai import chat

# Define conversation messages
messages = [{"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
            {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
            {"role": "user", "content": "Do other Azure Cognitive Services support this too?"}]

# Define keyword arguments for the chat model
kwargs = dict(model_name='gpt-4o-mini', messages=messages)

# Call the chat completion API
response = chat.completions.create(**kwargs)

# Print the chat response
print(response)

## Embeddings

In [36]:
# Generate embeddings for a given text
from gen_ai_hub.proxy.langchain.init_models import init_embedding_model

# Text for which embeddings need to be generated
text = 'Every decoding is another encoding.'

# Initialize embedding model
embeddings = init_embedding_model('text-embedding-ada-002')

# Generate embeddings for the text query
response = embeddings.embed_query(text)
# Print the generated embeddings
print(response)

[-0.013359896838665009, 0.002867788076400757, 0.021806415170431137, -0.01978559046983719, 0.005431791767477989, -0.006716270465403795, -0.005203953944146633, -0.011187179945409298, -0.005435093771666288, -0.03159354627132416, 0.005824729800224304, 0.026112224906682968, 0.009311643429100513, -0.006016246043145657, -0.015268453396856785, -0.021938495337963104, 0.018953487277030945, 0.010170163586735725, 0.022255487740039825, -0.031646378338336945, -0.031884122639894485, 0.010539988055825233, -0.00817575491964817, -0.032253947108983994, -0.01724965311586857, 0.007819138467311859, 0.00390626722946763, -0.017593061551451683, 0.0015676250914111733, -0.0070596784353256226, 0.008690866641700268, -0.01184757985174656, 0.001669987104833126, -0.027974553406238556, -0.01377594843506813, -0.02159508690237999, -0.01180135179311037, -0.033891741186380386, 0.005501133855432272, -0.004923283588141203, -0.0008733792928978801, -0.005240275990217924, -0.011583419516682625, -0.002118233824148774, -0.022730

## Langchain Integration

### LLM

In [None]:
# Integrate with Langchain for large language models
from langchain import PromptTemplate

# Import the Langchain class representing the AICore OpenAI models
from gen_ai_hub.proxy.langchain.openai import OpenAI
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client

# Initialize the proxy client
proxy_client = get_proxy_client('gen-ai-hub')
# Specify non-chat model
model_name = "meta--llama3.1-70b-instruct"

# Create an OpenAI object from Langchain using the model and proxy client
llm = OpenAI(proxy_model_name=model_name, proxy_client=proxy_client)  # can be used as usual with Langchain

# Define the prompt template
template = """Question: {question}

Answer: Let's think step by step."""

# Create Langchain prompt template
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = prompt | llm

# Define a question for the Langchain model
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"

# Invoke the model with the given question and print the response
print(llm_chain.invoke({'question': question}))

### Chat model

In [None]:
# Integrate with Langchain for chat models
from langchain.prompts.chat import (
    AIMessagePromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

# Import the Chat model from Langchain
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client

# Initialize the proxy client
proxy_client = get_proxy_client('gen-ai-hub')

# Create a ChatOpenAI object from Langchain using the model and proxy client
chat_llm = ChatOpenAI(proxy_model_name='gpt-4o-mini', proxy_client=proxy_client)
template = 'You are a helpful assistant that translates English to pirate.'

# Define system message prompt
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

# Example conversation prompt templates
example_human = HumanMessagePromptTemplate.from_template('Hi')
example_ai = AIMessagePromptTemplate.from_template('Ahoy!')
human_template = '{text}'

# Define human message prompt
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
# Create chat prompt template from message prompts
chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, example_human, example_ai, human_message_prompt])

# Create Langchain chain with chat prompt and chat model
chain = chat_prompt | chat_llm

# Invoke the chat model with user input and print the response
response = chain.invoke({'text': 'I love planking.'})
print(response.content)

### Embeddings

In [37]:
# Generate embeddings using Langchain model
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client

# Initialize the proxy client
proxy_client = get_proxy_client('gen-ai-hub')

# Create an OpenAIEmbeddings object from Langchain with the model and proxy client
embedding_model = OpenAIEmbeddings(proxy_model_name='text-embedding-ada-002', proxy_client=proxy_client)

# Generate and print embeddings for a text query
response = embedding_model.embed_query('Every decoding is another encoding.')
print(response)

# Call the embedding model without passing proxy client
embedding_model = OpenAIEmbeddings(proxy_model_name='text-embedding-ada-002')

# Generate and print embeddings for a text query
response = embedding_model.embed_query('Every decoding is another encoding.')
print(response)

[-0.013359896838665009, 0.002867788076400757, 0.021806415170431137, -0.01978559046983719, 0.005431791767477989, -0.006716270465403795, -0.005203953944146633, -0.011187179945409298, -0.005435093771666288, -0.03159354627132416, 0.005824729800224304, 0.026112224906682968, 0.009311643429100513, -0.006016246043145657, -0.015268453396856785, -0.021938495337963104, 0.018953487277030945, 0.010170163586735725, 0.022255487740039825, -0.031646378338336945, -0.031884122639894485, 0.010539988055825233, -0.00817575491964817, -0.032253947108983994, -0.01724965311586857, 0.007819138467311859, 0.00390626722946763, -0.017593061551451683, 0.0015676250914111733, -0.0070596784353256226, 0.008690866641700268, -0.01184757985174656, 0.001669987104833126, -0.027974553406238556, -0.01377594843506813, -0.02159508690237999, -0.01180135179311037, -0.033891741186380386, 0.005501133855432272, -0.004923283588141203, -0.0008733792928978801, -0.005240275990217924, -0.011583419516682625, -0.002118233824148774, -0.022730

## Streaming

In [None]:
# Define a function to stream responses from OpenAI models via generative AI
from gen_ai_hub.proxy.native.openai import chat

def stream_openai(prompt, model_name='gpt-4o'):
    # Set up a conversation with initial system and user messages
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    
    # Define keyword arguments for streaming response
    kwargs = dict(model_name=model_name, messages=messages, stream=True)
    # Create a stream for the chat completion
    stream = chat.completions.create(**kwargs)
    
    # Iterate over the stream and print response chunks
    for chunk in stream:
        if chunk.choices:
            content = chunk.choices[0].delta.content
            if content:
                print(content, end='')

In [None]:
stream_openai("Why is the sky blue?")

In [None]:
# Define a function to stream responses from Google's Gemini models
from gen_ai_hub.proxy.native.google_vertexai.clients import GenerativeModel
from vertexai.generative_models import GenerationConfig

def stream_gemini(prompt, model_name='gemini-1.5-flash'):
    # Set up generation config with max tokens
    generation_config = GenerationConfig(max_output_tokens=500)
    # Initialize the GenerativeModel
    model = GenerativeModel(model_name=model_name, generation_config=generation_config)
    # Create a content generation stream
    stream = model.generate_content(prompt, stream=True)
    
    # Iterate over the stream and print response text
    for chunk in stream:
        print(chunk.text, end='')    

In [None]:
# Execute the stream_gemini function with a given prompt
stream_gemini("Why is the sky blue?")

In [None]:
# Define a function to stream responses from Amazon's Claude model
import json
from gen_ai_hub.proxy.native.amazon.clients import Session

def stream_claude(prompt, model_name='anthropic--claude-3-haiku'):
    # Initialize Bedrock session client
    bedrock = Session().client(model_name=model_name)
    # Define request body with message and configuration
    body = json.dumps({
      "max_tokens": 500,
      "messages": [{"role": "user", "content": prompt}],
      "anthropic_version": "bedrock-2023-05-31"
    })
    
    # Invoke model and get response stream
    response = bedrock.invoke_model_with_response_stream(body=body)
    stream = response.get("body")
    
    # Iterate over the stream and print response text
    for event in stream:
        chunk = json.loads(event["chunk"]["bytes"])
        if chunk["type"] == "content_block_delta":
          print(chunk["delta"].get("text", ""), end="")

In [None]:
# Execute the stream_claude function with a given prompt
stream_claude("Why is the sky blue?")

In [None]:
# Define a function to stream responses from Amazon's Titan models
def stream_titan(prompt, model_name='amazon--titan-text-lite'):
    # Initialize Bedrock session client
    bedrock = Session().client(model_name=model_name)
    # Define request body with input text and generation configuration
    body = json.dumps({
        "inputText": prompt,
        "textGenerationConfig": {
            "maxTokenCount": 500
        }
    })
    
    # Invoke model and get response stream
    response = bedrock.invoke_model_with_response_stream(body=body)
    stream = response.get("body")
    
    # Iterate over the stream and print response text
    for event in stream:
        chunk = json.loads(event["chunk"]["bytes"])
        if "outputText" in chunk:
            print(chunk["outputText"])

In [None]:
# Execute the stream_titan function with a given prompt
stream_titan("Why is the sky blue?")

In [None]:
# Define a function to stream responses using Langchain
from gen_ai_hub.proxy.langchain import init_llm

def stream_langchain(prompt, model_name):
    # Initialize Langchain model with specified parameters
    llm = init_llm(model_name=model_name, max_tokens=500)
    
    # Iterate over the stream and print response content
    for chunk in llm.stream(prompt):
        print(chunk.content, end='')

In [None]:
# Execute the stream_langchain function for gpt-4o-mini model with a given prompt
stream_langchain("How do airplanes stay in the air?", model_name='gpt-4o-mini')

In [None]:
# Execute the stream_langchain function for gemini-1.5-flash model with a given prompt
stream_langchain("How do airplanes stay in the air?", model_name='gemini-1.5-flash')

In [None]:
# Execute the stream_langchain function for Anthropic's Claude model with a given prompt
stream_langchain("How do airplanes stay in the air?", model_name='anthropic--claude-3-haiku')

In [None]:
# Execute the stream_langchain function for Amazon's Titan model with a given prompt
stream_langchain("How do airplanes stay in the air?", model_name='amazon--titan-text-lite')