# reference

https://aws.amazon.com/blogs/machine-learning/build-an-intelligent-search-solution-with-automated-content-enrichment/


https://aws.amazon.com/blogs/machine-learning/augment-search-with-metadata-by-chaining-amazon-textract-amazon-comprehend-and-amazon-kendra/



In [None]:
import boto3

KENDRA_REGION: str = "ap-southeast-2"
KENDRA_INDEX: str = "0ee3a3ab-e4eb-4820-bd79-50ba88d48c21"

In [47]:
kendra_client = boto3.client("kendra", KENDRA_REGION)

query = "Tell me some Star Wars character names please"

In [48]:
# result: dict = kendra_client.retrieve(
#     QueryText=query,
#     IndexId=KENDRA_INDEX,
#     PageSize=10,
#     PageNumber=1,
# )

# chunks: list[str] = [retrieve_result["Content"] for retrieve_result in result["ResultItems"]]
# chunks


In [49]:
result: dict = kendra_client.query(
    QueryText=query,
    IndexId=KENDRA_INDEX,
    PageSize=10,
    PageNumber=1,
    QueryResultTypeFilter="QUESTION_ANSWER",
)

chunks: list[str] = [
    (
        retrieve_result["DocumentExcerpt"]["Text"],
        retrieve_result['AdditionalAttributes'][0]['Value']['TextWithHighlightsValue']['Text'],
        retrieve_result['DocumentURI']
    )
    for retrieve_result in result["ResultItems"]
    if retrieve_result["Type"] in ["QUESTION_ANSWER"]
]
chunks

[("['Qui-Gon Jinn']",
  "what was liam neeson's character in star wars?",
  'https://wqr001053.co.jp'),
 ("['Padmé Amidala']",
  'what character did natalie portman play in star wars?',
  'https://wqr000001.co.jp'),
 ("['James Earl Jones']",
  'who did the voice of darth vader in star wars?',
  'https://wqr001305.co.jp'),
 ("['Carrie Fisher']",
  'who played princess leia from star wars?',
  'https://wqr001181.co.jp')]

In [50]:
for r in result['ResultItems']:
    if r['Type'] == 'QUESTION_ANSWER':
        answer = r['DocumentExcerpt']['Text']
        question = r['AdditionalAttributes'][0]['Value']['TextWithHighlightsValue']['Text']
        url = r['DocumentURI']
        print(f"Question: {question}")
        print(f"Answer: {answer}")
        print(f"URL: {url}")

Question: what was liam neeson's character in star wars?
Answer: ['Qui-Gon Jinn']
URL: https://wqr001053.co.jp
Question: what character did natalie portman play in star wars?
Answer: ['Padmé Amidala']
URL: https://wqr000001.co.jp
Question: who did the voice of darth vader in star wars?
Answer: ['James Earl Jones']
URL: https://wqr001305.co.jp
Question: who played princess leia from star wars?
Answer: ['Carrie Fisher']
URL: https://wqr001181.co.jp


In [51]:
for r in result['ResultItems']:
    if r['Type'] == 'QUESTION_ANSWER':
        print(r)

{'Id': 'ff4caf74-5308-41f8-b840-824ca8319086-5acec553-aea0-48bb-9b9e-af5ddc3d88fa', 'Type': 'QUESTION_ANSWER', 'Format': 'TEXT', 'AdditionalAttributes': [{'Key': 'QuestionText', 'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE', 'Value': {'TextWithHighlightsValue': {'Text': "what was liam neeson's character in star wars?", 'Highlights': [{'BeginOffset': 23, 'EndOffset': 32, 'TopAnswer': False, 'Type': 'STANDARD'}, {'BeginOffset': 36, 'EndOffset': 40, 'TopAnswer': False, 'Type': 'STANDARD'}, {'BeginOffset': 41, 'EndOffset': 45, 'TopAnswer': False, 'Type': 'STANDARD'}]}}}, {'Key': 'AnswerText', 'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE', 'Value': {'TextWithHighlightsValue': {'Text': "['Qui-Gon Jinn']", 'Highlights': []}}}], 'DocumentId': 'cef5f603eb9154ada5e97aa2b589214c493db718d1fa70274ff4014b71b4131c43440d99-e5e9-4c33-b28e-391090f846dd', 'DocumentTitle': {'Text': ''}, 'DocumentExcerpt': {'Text': "['Qui-Gon Jinn']", 'Highlights': [{'BeginOffset': 0, 'EndOffset': 16, 'TopAnswer': False, 'Type': '

In [52]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
import openai

# from langchain.vectorstores.faiss import FAISS
import os

from dotenv import load_dotenv
load_dotenv()


True

In [53]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
qa_model = OpenAI(temperature=0, api_key=OPENAI_API_KEY)


In [54]:
contexts = chunks

prompt = f"""
You are a helpful AI assistant that provides answers. 
Use the following information <context> tags. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{contexts}
</context>
"""

# prompt = f"""
# Human: You are a financial advisor AI system, and provides answers to questions by using fact based and statistical information when possible. 
# Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags. 
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
# <context>
# {contexts}
# </context>

# <question>
# {query}
# </question>

# The response should be specific and use statistics or numbers when possible.

# Assistant:"""

In [55]:
import os
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system", "content": prompt},
        {
            "role": "user", "content": query,
        },
    ],
    model="gpt-4o-mini",
)

In [56]:
print(chat_completion.choices[0].message.content)

Here are some Star Wars character names:

1. Qui-Gon Jinn
2. Padmé Amidala
3. Darth Vader (voiced by James Earl Jones)
4. Princess Leia (played by Carrie Fisher)


In [57]:
contexts

[("['Qui-Gon Jinn']",
  "what was liam neeson's character in star wars?",
  'https://wqr001053.co.jp'),
 ("['Padmé Amidala']",
  'what character did natalie portman play in star wars?',
  'https://wqr000001.co.jp'),
 ("['James Earl Jones']",
  'who did the voice of darth vader in star wars?',
  'https://wqr001305.co.jp'),
 ("['Carrie Fisher']",
  'who played princess leia from star wars?',
  'https://wqr001181.co.jp')]

In [None]:
"""
<prompt>
  <instructions>Summarize the following text.</instructions>
  <context>
    <text>
      OpenAI's GPT-4 is a state-of-the-art language model that can generate human-like text based on the input it receives. It is used in a variety of applications, from chatbots to content creation.
    </text>
  </context>
  <formatting>Bullet points</formatting>
</prompt>
"""