In [None]:
import os
from dotenv import load_dotenv

from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
from langfuse.callback import CallbackHandler

テキスト生成 - ストリーミング

In [None]:
_ = load_dotenv()
compartment_id = os.getenv("COMPARTMENT_ID")
service_endpoint = os.getenv("GENAI_ENDPOINT")
# Langfuse
secret_key = os.getenv("LANGFUSE_SECRET_KEY")
public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
langfuse_host = os.getenv("LANGFUSE_HOST")

print("servce endpoint: ", service_endpoint)
print("compartment id: ", compartment_id)

In [None]:
chat = ChatOCIGenAI(
    auth_type="INSTANCE_PRINCIPAL",
    service_endpoint=service_endpoint,
    compartment_id=compartment_id,
    model_id="cohere.command-r-plus",
    is_stream=True,
    model_kwargs={
        "temperature": 0,
        "max_tokens": 500,
        "top_p": 0.75,
        "top_k": 0,
        "frequency_penalty": 0,
        "presence_penalty": 0
    }
)

In [None]:
langfuse_handler = CallbackHandler(
    secret_key=secret_key,
    public_key=public_key,
    host=langfuse_host
)

In [None]:
res = chat.stream(
    "OCHaCafeってなんですか？",
    config={"callbacks": [langfuse_handler]},
)

for chunk in res:
    print(chunk.__dict__)
    print(chunk.content, end="")

エンベディング

In [None]:
from langchain_community.embeddings.oci_generative_ai import OCIGenAIEmbeddings

In [None]:
embeddings = OCIGenAIEmbeddings(
    auth_type="INSTANCE_PRINCIPAL",
    model_id="cohere.embed-multilingual-v3.0",
    service_endpoint=service_endpoint,
    compartment_id=compartment_id
)

In [None]:
docs = [
    "「Oracle Cloud Hangout Cafe」(通称「おちゃかふぇ」/以降、OCHaCafe)は、日本オラクルが主催するコミュニティの1つです。定期的に、開発者・エンジニアに向けたクラウドネイティブな時代に身につけておくべきテクノロジーを深堀する勉強会を開催しています。",
    "日本オラクル株式会社（にほんオラクル、英: Oracle Corporation Japan）は、米国企業オラクルコーポレーション (Oracle Corporation) （1977年設立）が、1985年に日本で設立した法人である。"
]

res = embeddings.embed_documents(docs)

print("Dims: ", len(res[0]))

In [None]:
import streamlit as st
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_milvus import Milvus
from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
from langchain_community.embeddings.oci_generative_ai import OCIGenAIEmbeddings

class OciGenerativeAi:
    def __init__(self, compartment_id: str, service_endpoint: str, **kwargs):
        self.compartment_id = compartment_id
        self.service_endpoint = service_endpoint
        self.chat_model = self._initialize_chat_model(
            is_stream=kwargs.get("streaming"),
            max_tokens=kwargs.get("max_tokens"),
            temperature=kwargs.get("temperature"),
            top_k=kwargs.get("top_k"),
            top_p=kwargs.get("top_p"),
            frequency_penalty=kwargs.get("frequency_penalty"),
            presence_penalty=kwargs.get("presence_penalty"),

        )
        self.embeddings_model = self._initialize_embedding_model()
        self.callback_handler = kwargs.get("callback_handler")
        if ("milvus_uri" in kwargs) & ("collection_name" in kwargs):
            self.milvus = Milvus(
                embedding_function=self.embeddings_model,
                collection_name=kwargs.get("collection_name"),
                connection_args={"uri": kwargs.get("milvus_uri")}
            )
            self.retriever = self.milvus.as_retriever(
                search_type=kwargs.get("search_type"),
                search_kwargs={
                    "k": kwargs.get("return_k"),
                    "score_threshold": kwargs.get("score_threshold"),
                    "fetch_k": kwargs.get("fetch_k"),
                    "lambda_mult": kwargs.get("lambda_mult")
                }
            )

    def chat(self, input: str, streaming: bool):
        if streaming == True:
            response = self.chat_model.stream(
                input,
                config={"callbacks": [self.callback_handler], "configurable": {"session_id": st.session_state["session_id"]}},
            )
            for chunk in response:
                yield chunk.content
        else:
            response = self.chat_model.invoke(
                input,
                config={"callbacks": [self.callback_handler], "configurable": {"session_id": st.session_state["session_id"]}},
            )
            return response

    def chat_with_rag(self, input: str, streaming: bool):
        langfuse = st.session_state["langfuse"]
        prompt_template = PromptTemplate.from_template(
            langfuse.get_prompt(name="demo-user-prompt", type="text").prompt,
            template_format="jinja2"
        )
        chain = (
            {"context": self.retriever, "question": RunnablePassthrough()}
            | prompt_template
            | self.chat_model
            | StrOutputParser()
        )
        if streaming == True:
            response = chain.stream(
                input,
                config={"callbacks": [self.callback_handler], "configurable": {"session_id": st.session_state["session_id"]}},
            )
            for chunk in response:
                yield chunk.content
        else:
            response = chain.invoke(
                input,
                config={"callbacks": [self.callback_handler], "configurable": {"session_id": st.session_state["session_id"]}},
            )
            return response

    def _initialize_chat_model(self, is_stream: bool, max_tokens: int, temperature: float, top_k: int, top_p: float, frequency_penalty: float, presence_penalty: float) -> ChatOCIGenAI:
        return ChatOCIGenAI(
            auth_type="INSTANCE_PRINCIPAL",
            model_id="cohere.command-r-plus",
            compartment_id=self.compartment_id,
            service_endpoint=self.service_endpoint,
            is_stream=is_stream,
            model_kwargs={
                "max_tokens": max_tokens,
                "temperature": temperature,
                "top_k": top_k,
                "top_p": top_p,
                "frequency_penalty": frequency_penalty,
                "presence_penalty": presence_penalty,
            },
            metadata={
                "model_name": "cohere.command-r-plus",
                "model_parameters": {
                    "is_stream": is_stream,
                    "max_tokens": max_tokens,
                    "temperature": temperature,
                    "top_k": top_k,
                    "top_p": top_p,
                    "frequency_penalty": frequency_penalty,
                    "presence_penalty": presence_penalty,
                }
            }
        )

    def _initialize_embedding_model(self) -> OCIGenAIEmbeddings:
        return OCIGenAIEmbeddings(
            auth_type="INSTANCE_PRINCIPAL",
            compartment_id=self.compartment_id,
            service_endpoint=self.service_endpoint,
            model_id="cohere.embed-multilingual-v3.0"
        )


In [None]:
cohere = OciGenerativeAi(
    compartment_id=compartment_id,
    service_endpoint=service_endpoint,
    streaming=True,
    max_tokens=500,
    temperature=0,
    k=0,
    p=0.75,
    frequency_penalty=0,
    presence_penalty=0
)

In [None]:
response = cohere.chat(
    input="OCHaCafeってなんですか？",
    streaming=True,
)

message = ""
for content in response:
    message += content
    print(content, end="")