In [1]:
import os
import re
import base64
from typing import List
import pandas as pd
from dotenv import find_dotenv, load_dotenv

from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import BaseRetriever
from langchain.chat_models import init_chat_model
from langchain_core.documents import Document
from langchain.prompts.chat import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.chat_models import ChatOCIGenAI
from langchain_community.embeddings import OCIGenAIEmbeddings
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.prebuilt import create_react_agent
from langgraph_supervisor import create_supervisor
from langchain.tools import tool

from oci.generative_ai_inference import GenerativeAiInferenceClient
from oci.generative_ai_inference.models import (
    EmbedTextDetails,
    OnDemandServingMode,
)
import oracledb

_ = load_dotenv(find_dotenv())
oracledb.init_oracle_client()

UN = os.getenv("UN")
PW = os.getenv("PW")
DSN = os.getenv("DSN")
OCI_COMPARTMENT_ID = os.getenv("OCI_COMPARTMENT_ID")

### Utils

In [2]:
model = ChatOCIGenAI(
    model_id="cohere.command-a-03-2025",
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id=OCI_COMPARTMENT_ID,
    model_kwargs={"temperature": 1.0, "max_tokens": 500},
    )

In [3]:
def get_embedding(text: str) -> list:
  embeddings = OCIGenAIEmbeddings(
    model_id="cohere.embed-multilingual-image-v3.0",
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id=OCI_COMPARTMENT_ID,
  )
  return embeddings.embed_query(text)

### Markdown Retriever

In [4]:
class CustomMarkdownRetriever(BaseRetriever):
    """
    Custom retriever.
    """

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        docs: List[Document] = []
        embed_query = str(get_embedding(query))
        try:
            with oracledb.connect(user=UN, password=PW, dsn=DSN) as connection:
                with connection.cursor() as cursor:
                    df = pd.DataFrame()
                    cursor.setinputsizes(oracledb.DB_TYPE_VECTOR)
                    select_sql = f"""
                        SELECT
                            file_id,
                            markdown
                        FROM
                            docs_contents
                        ORDER BY VECTOR_DISTANCE(embedding, to_vector(:1, 1024, FLOAT32), COSINE)
                        FETCH FIRST 3 ROWS ONLY
                    """
                    cursor.execute(select_sql, [embed_query])
                    for row in cursor:
                        df_tmp = pd.DataFrame([[row[0], row[1].read()]],
                                                columns=["file_id", "markdown"])
                        df = pd.concat([df, df_tmp], ignore_index=True)
                    
                    for i in range(len(df)):
                        file_id = df.iloc[i, 0]
                        markdown = df.iloc[i, 1]
                        # print(f"file_id: {file_id}, markdown: {markdown}")
                        doc = Document(
                            page_content=markdown,
                            metadata={'file_id':file_id, 'vector_index': i}
                            )
                        docs.append(doc)
                connection.close()
        except oracledb.DatabaseError as e:
            print(f"Database error: {e}")
            raise
        except Exception as e:
            print("Error Vector Search:", e)

        return docs

### Image Retriever

In [5]:
class CustomImageRetriever(BaseRetriever):
    """
    Custom image retriever.
    """

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        docs: List[Document] = []
        embed_query = str(get_embedding(query))
        try:
            with oracledb.connect(user=UN, password=PW, dsn=DSN) as connection:
                with connection.cursor() as cursor:
                    cursor.setinputsizes(oracledb.DB_TYPE_VECTOR)
                    select_sql = f"""
                        SELECT
                            file_id,
                            image_path,
                            summary
                        FROM
                            image_contents
                        ORDER BY VECTOR_DISTANCE(embedding, to_vector(:1, 1024, FLOAT32), COSINE)
                        FETCH FIRST 3 ROWS ONLY
                    """
                    cursor.execute(select_sql, [embed_query])
                    index = 1
                    for row in cursor:
                        doc = Document(
                            page_content=row[2],
                            metadata={
                                'file_id':row[0], 
                                'file_path': row[1], 
                                'vector_index': index
                                }
                            )
                        docs.append(doc)
                        index += 1
                    # connection.commit()
                connection.close()
                        
        except oracledb.DatabaseError as e:
            print(f"Database error: {e}")
            raise
        except Exception as e:
            print("Error Vector Search:", e)
        
        return docs

### Tools
* Get Text with Markdown
* Get Text with Image

In [6]:
@tool
def get_text_with_markdown(query: str) -> str:
    """
    Get the value of department expenses, sales, and operating income by text with markdown retriever
    Args:
        query (str): The query to ask the model. YYYY/MM format is required.
    Returns:
        str: The answer from the model.
    """
    
    llm = ChatOCIGenAI(
        model_id="cohere.command-a-03-2025",
        service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
        compartment_id=OCI_COMPARTMENT_ID,
        )
    
    prompt = ChatPromptTemplate([
        ("system", "あなたは質疑応答のAIアシスタントです。必ず日本語で答えてください。"),
        ("human", """
         以下のMarkdownのコンテキストに基づいて質問に答えてください。
         回答は数字だけを回答してください。
         ** 質問 **
          {query} 
          
        ** コンテキスト **
        {context}
        """),
    ])

    retriever = CustomMarkdownRetriever()
    chain = {'query': RunnablePassthrough(), 'context': retriever} | prompt | llm | StrOutputParser()

    result = chain.invoke(query)
    return result

In [7]:
def get_text_by_image(query: str):
    """
    Text to Image
    """

    retriever = CustomImageRetriever()
    result_images = retriever.invoke(query)

    file_id = result_images[0].metadata['file_id']
    file_name = os.path.basename(result_images[0].metadata['file_path'])
    try:
        with oracledb.connect(user=UN, password=PW, dsn=DSN) as connection:
            with connection.cursor() as cursor:
                cursor.setinputsizes(oracledb.DB_TYPE_VECTOR)
                select_sql = f"""
                    SELECT
                        image_blob
                    FROM
                        image_contents
                    WHERE file_id = :1
                """
                cursor.execute(select_sql, [file_id])
                blob, = cursor.fetchone()
                offset = 1
                bufsize = 65536

                with open("../images/" + file_name, 'wb') as f:
                    while True:
                        data = blob.read(offset, bufsize)
                        if data:
                            f.write(data)
                        if len(data) < bufsize:
                            break
                        offset += bufsize
            connection.close()
                    
    except oracledb.DatabaseError as e:
        print(f"Database error: {e}")
        raise
    except Exception as e:
        print("Error Vector Search:", e)
    
    return result_images[0]

In [8]:
def chat_with_image(image_path: str, prompt: str, system_prompt: str = None) -> str:
  with open(image_path, "rb") as img_file:
    image_data = base64.b64encode(img_file.read()).decode("utf-8")
  
  prompt_with_image = [
    SystemMessage(
        content=system_prompt
    ),
    HumanMessage(
        content=[
            {"type": "text", "text": prompt},
            {
              "type": "image_url",
              "image_url": {
                "url": "data:image/png;base64,"+image_data,
            }
        },
        ]
      )
  ]

  llm = ChatOCIGenAI(
      model_id="meta.llama-3.2-90b-vision-instruct",
      service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
      compartment_id=OCI_COMPARTMENT_ID,
      )
  result = llm.invoke(prompt_with_image)
  print(f"Result: {result}") 
  return result.content

In [9]:
@tool
def get_text_with_image(question: str) -> str:
    """
    Get Revenue or net income for the past years by image.
    Args:
        question (str): The question to ask the model.
    Returns:
        str: The answer from the model.
    """
    
    prompt = ChatPromptTemplate([
        ("system", "あなたは言語翻訳のAIアシスタントです。日本語を英語に翻訳してください。"),
        ("human", "{input} "),
    ])
    
    llm = ChatOCIGenAI(
        model_id="cohere.command-a-03-2025",
        service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
        compartment_id=OCI_COMPARTMENT_ID,
    )
    chain = {'input': RunnablePassthrough()} | prompt | llm | StrOutputParser()
    question_en = chain.invoke({"input":question})
    
    result_images = get_text_by_image(question)
    # print(result_images)
    image_path = "../images/" + os.path.basename(result_images.metadata['file_path'])
    res = chat_with_image(
        image_path=image_path,
        prompt=question_en,
        system_prompt=(
            "You are a AI assistant. Please answer the question based on the image.\n"

            "** Question **"
            f"{question}"

            "** Output **\n"
            "- FYXX: 1234\n"
            "- FYXX: 5678\n"
        )
    )
    return res

### Agent 

In [10]:
agent = create_react_agent(
    model=model,
    tools=[get_text_with_markdown, get_text_with_image],
    prompt=(
        "You are a chat agent. You can call two tools:\n"
        "- a get_text_with_markdown. Assign getting department expenses, sales, and operating income to this tool\n"
        "- a get_text_with_image. Assign getting revenue or net income for the past years tasks to this tool\n"
        "Do not do any work yourself."
        "If you need to use a tool, please use the tool and return the result.\n"
        "You must answer in Japanese.\n"
    ),
    debug=True,
)

In [15]:
result_markdown = agent.invoke({
    "messages": [
        {
            "role": "user",
            "content": "2024/5のQ4のFacilityの経費を教えてください。",
        },
    ]
})

[36;1m[1;3m[-1:checkpoint][0m [1mState at the end of step -1:
[0m{'messages': []}
[36;1m[1;3m[0:tasks][0m [1mStarting 1 task for step 0:
[0m- [32;1m[1;3m__start__[0m -> {'messages': [{'content': '2024/5のQ4のFacilityの経費を教えてください。', 'role': 'user'}]}
[36;1m[1;3m[0:writes][0m [1mFinished step 0 with writes to 1 channel:
[0m- [33;1m[1;3mmessages[0m -> [{'content': '2024/5のQ4のFacilityの経費を教えてください。', 'role': 'user'}]
[36;1m[1;3m[0:checkpoint][0m [1mState at the end of step 0:
[0m{'messages': [HumanMessage(content='2024/5のQ4のFacilityの経費を教えてください。', additional_kwargs={}, response_metadata={}, id='2eeed847-2f3a-4779-95c3-736bf98a7203')]}
[36;1m[1;3m[1:tasks][0m [1mStarting 1 task for step 1:
[0m- [32;1m[1;3magent[0m -> {'is_last_step': False,
 'messages': [HumanMessage(content='2024/5のQ4のFacilityの経費を教えてください。', additional_kwargs={}, response_metadata={}, id='2eeed847-2f3a-4779-95c3-736bf98a7203')],
 'remaining_steps': 24}
[36;1m[1;3m[1:writes][0m [1mFinished ste

In [16]:
print(result_markdown["messages"][-1].content)

2024年5月の第4四半期のFacilityの経費は、431です。


In [13]:
result_image = agent.invoke({
    "messages": [
        {
            "role": "user",
            "content": "5年間の売り上げの推移を教えてください。",
        },
    ]
})

[36;1m[1;3m[-1:checkpoint][0m [1mState at the end of step -1:
[0m{'messages': []}
[36;1m[1;3m[0:tasks][0m [1mStarting 1 task for step 0:
[0m- [32;1m[1;3m__start__[0m -> {'messages': [{'content': '5年間の売り上げの推移を教えてください。', 'role': 'user'}]}
[36;1m[1;3m[0:writes][0m [1mFinished step 0 with writes to 1 channel:
[0m- [33;1m[1;3mmessages[0m -> [{'content': '5年間の売り上げの推移を教えてください。', 'role': 'user'}]
[36;1m[1;3m[0:checkpoint][0m [1mState at the end of step 0:
[0m{'messages': [HumanMessage(content='5年間の売り上げの推移を教えてください。', additional_kwargs={}, response_metadata={}, id='a51d2834-ca2f-4c66-85f7-2fe64f67667e')]}
[36;1m[1;3m[1:tasks][0m [1mStarting 1 task for step 1:
[0m- [32;1m[1;3magent[0m -> {'is_last_step': False,
 'messages': [HumanMessage(content='5年間の売り上げの推移を教えてください。', additional_kwargs={}, response_metadata={}, id='a51d2834-ca2f-4c66-85f7-2fe64f67667e')],
 'remaining_steps': 24}
[36;1m[1;3m[1:writes][0m [1mFinished step 1 with writes to 1 channel:
[0m- [33

In [14]:
print(result_image["messages"][-1].content)

5年間の売り上げの推移は以下の通りです。

- FY20：211,357（前年度比＋57,176）
- FY21：208,523（前年度比＋53,042）
- FY22：214,691（前年度比＋6,168）
- FY23：226,911（前年度比＋12,220）
- FY24：244,542（前年度比＋17,631）

5年間の売り上げの推移は右肩上がりで、売上高は継続して増加しています。
