In [None]:
import os, json
from IPython.display import Image
import openai
from azure.storage.blob import BlobServiceClient

import utils
from dotenv import load_dotenv
load_dotenv(".env")

### SET VARIABLES

In [2]:
use_azure_active_directory = False  # Set this flag to True if you are using Azure Active Directory
if not use_azure_active_directory:
    aoai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
    aoai_api_key = os.environ["AZURE_OPENAI_API_KEY"]
    api_version = os.environ["AZURE_OPENAI_API_VERSION"]

    client = openai.AzureOpenAI( 
        azure_endpoint=aoai_endpoint,
        api_key=aoai_api_key,
        api_version= api_version
    )
    
embedding_model: str = "text-embedding-ada-002" 
chat_model: str = "gpt-4o"

connection_string = os.environ["STORAGE_CONN_STR"]
# Temporarily set as identity access fails
storage_sas_token = os.environ["STORAGE_SAS_TOKEN"] 
separator_word = os.environ["SEPARATOR_WORD"]

### PERFORM A HYBRID SEARCH

## Error Handling for Partial Content

In [None]:
import json
query = "ペットボトルの投棄方法は 1 から 9 番のどれですか？" 
answer_context = utils.search_index_semanticAndFallBack(query, client, embedding_model)
answer_context

### RAG Query to GPT-4o

In [4]:
with open("./system_message_0c.txt", "r", encoding = 'utf-8') as f:
    system_message = f.read()

In [5]:
messages = []
messages.append({"role": "system","content": system_message})

content = {}
content["question"] = query
content["context"] = answer_context
messages.append({"role": "user", "content": str(content)})

#print(json.dumps(messages, indent=4, ensure_ascii=False))

In [None]:
response = utils.gpt4_turbo_query(messages, client, chat_model)
answer = response.choices[0].message.content

answer = json.loads(answer)
print(answer)

In [None]:
for num, item in enumerate(answer["answers"]):
    if item["answer"].startswith("Sorry"):
        break
    print("answer["+ str(num) + "]: ", item["answer"])
    print("  page["+ str(num) + "]: ", item["page"])

### Get URL of the 'title' image

In [None]:
container_name = "manual-test"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
image_urls = []

for item in answer["answers"]:
    if item["answer"].startswith("Sorry"):
        break
    blob_name = item["page"]
    image_titles_and_urls = utils.list_blobs_titles_and_urls(blob_service_client, container_name, blob_name)
    if image_titles_and_urls:
        image_urls.append(image_titles_and_urls)

image_urls

## Sending a new message to GPT-4o

In [9]:
with open("./system_message_01.txt", "r", encoding = 'utf-8') as f:
    system_message = f.read()

In [None]:
answer_string = ', '.join(json.dumps(item, ensure_ascii=False) for item in answer["answers"] \
                          if not item["answer"].startswith("Sorry"))
answer_string

In [11]:
messages = []
messages.append({"role": "system","content": system_message})

content = []
content.append({"type": "text", "text": "query: " + query})
content.append({"type": "text", "text":"answer from gpt-4: " + answer_string})

for url in image_urls:    # pick up the relevant images
    storagepath = url['title']
    storagepath_stem = os.path.splitext(storagepath)[0]
    if not (storagepath_stem[-3] == separator_word and storagepath_stem[-2:].isdigit()):
        storagepath_stem += separator_word + '01' # storagepath doesn't have a number suffix
    for item in answer["answers"]:
        if storagepath_stem == item['page']:
            content.append({"type": "text", "text": storagepath_stem})
            content.append({"type": "image_url", "image_url": {"url": url['url']+storage_sas_token}})
            break

messages.append({"role": "user","content":content})

In [None]:
messages

### Ask gpt-4o

In [None]:
response = utils.gpt4_turbo_query(messages, client, chat_model)
answer = response.choices[0].message.content

answer = json.loads(answer)
print(answer)

In [None]:
print(answer["answer"])
print(answer["citation"])

### Download and show the image

In [None]:
for url in image_urls:    # pick up the relevant images
    storagepath = url['title']

    utils.list_blobs_download(blob_service_client, container_name, storagepath)
    print("\n",storagepath)
    display(Image(filename=storagepath))

## Maintain the massage

In [None]:
utils.append_conversation_history(messages, response, role="assistant")
print(messages)

### Send the consecutive query

In [None]:
messages.append({"role": "user","content":[
    {"type": "text", "text": "ponta ポイントサービスについて教えてください。"}, 
]})
response = utils.gpt4v_query(messages, aoai_endpoint, aoai_api_key, api_version)
print(response.json()["choices"][0]["message"]["content"])

## Backup script

Download all the images that include a page relevant to the questions

In [None]:
for url in image_urls:
    utils.list_blobs_download(blob_service_client, container_name, url['title'])
    display(Image(filename=url['title']))

Download Images

In [None]:
utils.list_blobs_download(blob_service_client, container_name, blob_name, split_word)

List the urls of the images

In [None]:
image_urls = utils.list_blobs_urls(blob_service_client, container_name, blob_name, split_word)
print(image_urls)

List the urls relevant to the query

In [None]:
image_urls_list = []
for result in results:
    image_urls_list += utils.list_blobs_urls(blob_service_client, container_name, result['title'], split_word)
    #image_urls_list.append(image_urls)

print(image_urls_list)

### END OF SCRIPT