In [None]:
import os, json
from IPython.display import Image
import openai
from azure.storage.blob import BlobServiceClient

import utils
from dotenv import load_dotenv
load_dotenv(".env")

### SET VARIABLES

In [None]:
use_azure_active_directory = False  # Set this flag to True if you are using Azure Active Directory
if not use_azure_active_directory:
    aoai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
    aoai_api_key = os.environ["AZURE_OPENAI_API_KEY"]
    api_version = os.environ["AZURE_OPENAI_API_VERSION"]

    aoai_endpoint_4v = os.environ["AZURE_OPENAI_ENDPOINT_4V"]
    aoai_api_key_4v = os.environ["AZURE_OPENAI_API_KEY_4V"]
    api_version_4v = os.environ["AZURE_OPENAI_API_VERSION_4V"]

    client = openai.AzureOpenAI( 
        azure_endpoint=aoai_endpoint,
        api_key=aoai_api_key,
        api_version= api_version
    )
    
embedding_model: str = "text-embedding-ada-002" 
chat_model: str = "gpt-4-0125"

connection_string = os.environ["STORAGE_CONN_STR"]
# Temporarily set as identity access fails
storage_sas_token = os.environ["STORAGE_SAS_TOKEN"] 


### PERFORM A HYBRID SEARCH

In [None]:
import json
# Semantic Hybrid Search
#query = "What is the disposal of PET bottle?"  
query = "ご契約者が冬の円貨定期預金金利優遇キャンペーンを知る機会は何になりますか"
#query = "受電で対応できる来店予約は何ですか"

answer_context = []
results = utils.search_index(query, client, embedding_model)

for result in results:
    titles_and_content = {}
    titles_and_content["title"] = result["title"]
    titles_and_content["content"] = result["content"]
    #titles_and_content["reranking_score"] = result["@search.reranker_score"]
    answer_context.append(titles_and_content)

#print(json.dumps(answer_context, indent=4, ensure_ascii=False))

### RAG Query to GPT-4-turbo

In [None]:
with open("./system_message_02.txt", "r", encoding = 'utf-8') as f:
    system_message = f.read()

In [None]:
messages = []
messages.append({"role": "system","content": system_message})

content = {}
content["question"] = query
content["context"] = answer_context
messages.append({"role": "user", "content": str(content)})

#print(json.dumps(messages, indent=4, ensure_ascii=False))

In [None]:
response = utils.gpt4_turbo_query(messages, client, chat_model)
answer = response.choices[0].message.content

answer = json.loads(answer)
print(answer)

In [None]:
print(answer["answer"])

### Get URL of the 'title' image

In [None]:
container_name = "manual-test"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
blob_name = answer["page"]
split_word = "_"

image_urls = utils.list_blobs_titles_and_urls_ns(blob_service_client, container_name, blob_name, split_word)
image_urls


## Sending a new message to GPT-4v

### Construct a new message with a single image

In [None]:
with open("./system_message_01.txt", "r", encoding = 'utf-8') as f:
    system_message = f.read()

In [None]:
messages = []
messages.append({"role": "system","content":[{"type": "text", "text": system_message}]})

content = []
content.append({"type": "text", "text": "query: " + query})
content.append({"type": "text", "text":"answer from gpt-4: " + answer["answer"]})

for url in image_urls:    # pick up the most relevant image
    storagepath = url['title']
    storagepath_stem = os.path.splitext(storagepath)[0]
    if not (storagepath_stem[-3] == split_word and storagepath_stem[-2:].isdigit()):
        storagepath_stem += split_word + '01' # storagepath doesn't have a number suffix
    if storagepath_stem == answer['page']:
        content.append({"type": "text", "text": storagepath})
        content.append({"type": "image_url", "image_url": url['url']+storage_sas_token})
        break

messages.append({"role": "user","content":content})

In [None]:
messages

### Ask gpt-4v

In [None]:
response = utils.gpt4v_query(messages, aoai_endpoint_4v, aoai_api_key_4v, api_version_4v)
#answer = response.json()["choices"][0]["message"]["content"]
answer = response.json()["choices"][0]["enhancements"]["grounding"]["lines"][0]["text"]
print(answer)

In [None]:
response.json()["choices"][0]

### Download and show the image

In [None]:
utils.list_blobs_download(blob_service_client, container_name, storagepath)
display(Image(filename=storagepath))

## Maintain the massage

In [None]:
utils.append_conversation_history(messages, response, role="assistant")
print(messages)

### Send the consecutive query

In [None]:
messages.append({"role": "user","content":[
    {"type": "text", "text": "ponta ポイントサービスについて教えてください。"}, 
]})
response = utils.gpt4v_query(messages, aoai_endpoint, aoai_api_key, api_version)
print(response.json()["choices"][0]["message"]["content"])

## Backup script

Download all the images that include a page relevant to the questions

In [None]:
for url in image_urls:
    utils.list_blobs_download(blob_service_client, container_name, url['title'])
    display(Image(filename=url['title']))

Download Images

In [None]:
utils.list_blobs_download(blob_service_client, container_name, blob_name, split_word)

List the urls of the images

In [None]:
image_urls = utils.list_blobs_urls(blob_service_client, container_name, blob_name, split_word)
print(image_urls)

List the urls relevant to the query

In [None]:
image_urls_list = []
for result in results:
    image_urls_list += utils.list_blobs_urls(blob_service_client, container_name, result['title'], split_word)
    #image_urls_list.append(image_urls)

print(image_urls_list)

### END OF SCRIPT