In [None]:
#start with importing the required libraries 
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
import chromadb
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient
from langchain.prompts import PromptTemplate
from IPython.display import Image

#creating global variables for further use 
OPENAI_API_KEY="<OPENAI_KEY>""
LANG_ENDPOINT = "<LANGUAGE_SERVICES_ENDPOINT>"
LANG_KEY = "<LANGUAGE_SERVICES_KEY>"
deptID = 10
root_path = "<LOCAL_STORAGE_PATH>"+str(deptID)
deptName = "Egyptian_Art"

In [None]:
#init the variables for running the queries using OpenAI for the MET collection 
embeddings = OpenAIEmbeddings(openai_api_key = OPENAI_API_KEY)

client = chromadb.PersistentClient(path="<PATH_TO_CHROMADB>")
client.heartbeat()

db = Chroma(
    client=client,
    collection_name=deptName,
    embedding_function=embeddings
)

#info about the metadata fields to be used by the retreiver 
metadata_field_info = [
    AttributeInfo(
        name="part_index",
        description="The partition for where the image is stored",
        type="string",
    )
]
document_content_description = "Brief summary of an artifact at the MET in the Egyptian Art Department"

#init the llm and retreiver 
llm = OpenAI(temperature=0, openai_api_key = OPENAI_API_KEY)
retriever = SelfQueryRetriever.from_llm(
    llm, db, document_content_description, metadata_field_info, verbose=True
)

#init the Retreival QnA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever
)

In [None]:
#create a Named Entity Recognition call given the specified articles 
def NER(articles):
    #init the list of entities and TextAnalytics client using Azure Language Services using the endpoint and key
    entities = []
    text_analytics_client = TextAnalyticsClient(endpoint=LANG_ENDPOINT, credential=AzureKeyCredential(LANG_KEY))
    #call the recognize entities
    result = text_analytics_client.recognize_entities(articles)
    
    #iterate for each item in the result and add it to the list of entities 
    for idx, doc in enumerate(result):
        if not doc.is_error:
            entities.append((doc.entities))
    
    return entities

In [None]:
#init another set of variables for querying the UCL collection 
db_deu = Chroma(
    client=client,
    collection_name="ucl",
    embedding_function=embeddings
)

metadata_field_info_deu = [
    AttributeInfo(
        name="id",
        description="The ID of the object in the collection",
        type="string or list[string]",
    )
]

document_content_description_deu = "Info about Anceint Egypt from the UCL"
retriever_deu = SelfQueryRetriever.from_llm(
    llm, db_deu, document_content_description_deu, metadata_field_info_deu, verbose=True
)

In [None]:
#define the template that will alter the logic and tone of the agent 
template_deu = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT_deu = PromptTemplate.from_template(template_deu)
qa_chain_deu = RetrievalQA.from_chain_type(
    llm,
    retriever=db_deu.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT_deu}
)

In [None]:
question = input('Hello, Please post a question for the MET Museum QnA bot: ')

print("Thanks for your question. I am currently processing the information ... \n \n")
result_direct_retreival = retriever.get_relevant_documents(question)

In [None]:
print(result_direct_retreival)

#print the results and related images 
for item in result_direct_retreival:

    #print("The following results are powered by the MET's API, which can be found here: https://metmuseum.github.io")
    currentPatition = (item.dict()['metadata']['part_index'])
    try: 
        currentID = (item.dict()['metadata']['id'])
    except Exception as e:
        print(e)
        continue 
    path = root_path+"/Images/P"+str(currentPatition)+"/"+str(currentID)+".jpg"
    try: 
        display(Image(filename=path))
        print(item.dict()['page_content'])
        print("\n")
    except Exception as e: 
        print("")

In [None]:
#request the question from the user 
question = input('Hello, Please post a question for the MET Museum QnA bot: ')

print("Thanks for your question. I am currently processing the information ... \n \n")

#Refine Map Reduce of 4 calls, good for verbose answers - run for an initial verbose response to the question 
qa_chain_re = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(),
    chain_type="refine"
)

printed = False

#print the result
result_refine_met = qa_chain_re.invoke({"query": question})
if(result_refine_met != None): 
    print("The following results are powered by the MET's API, which can be found at this reference https://metmuseum.github.io")
    print(result_refine_met["result"])
    printed = True 

#run a direct retreival for the question from the MET collection     
result_direct_retreival = retriever.get_relevant_documents(question)

#print the results and related images 
for item in result_direct_retreival:
    if(printed==False): 
        print("The following results are powered by the MET's API, which can be found here: https://metmuseum.github.io")
        printed = True
    currentPatition = (item.dict()['metadata']['part_index'])
    try: 
        currentID = (item.dict()['metadata']['id'])
    except Exception as e:
        print(e)
        continue 
    path = root_path+"/Images/P"+str(currentPatition)+"/"+str(currentID)+".jpg"
    try: 
        display(Image(filename=path))
        print(item.dict()['page_content'])
        print("\n")
    except Exception as e: 
        print("")

print("\n\n")
print("I am processing additional information about the topics in your question...")
print("\n\n")

#Run NER to capture the relvant entities in the question 
result_ner = NER([question])
entities = result_ner[0]

printed = False
entities_string = [] 

#iterate over each entitiy and run a Refined MapReduce call of 4 calls over each entity using the UCL collection 
for entity in entities: 
    construct_query = ""
    if(entity['text']!=None):
        construct_query = construct_query + "Provide me info about "+entity['text']
        entities_string.append(entity['text'])
#     if(entity['category']!=None): 
#         construct_query = construct_query + " which has the category of a "+entity['category']
#     if(entity['subcategory']!=None): 
#         construct_query = construct_query + " and subcategory of a "+entity['subcategory']
    qa_chain_re = RetrievalQA.from_chain_type(
    llm,
    retriever=db_deu.as_retriever(),
    chain_type="refine"
    )
    result_refine_deu = qa_chain_re.invoke({"query": construct_query})
    
    #print the results
    if(result_refine_deu != None): 
        if(printed == False): 
            print("The following additional information is powered by the DEU Project from UCL, which can be found here: https://www.ucl.ac.uk/museums-static/digitalegypt/alphabet.html \n")
            printed = True
#         print(result_refine_deu)
        print(result_refine_deu["result"])
    
    
#join all the entities and run the UCL call for all entities in one call 
construct_query_updated =  "Provide me info about "+ ", ".join(entities_string)
qa_chain_re_updated = RetrievalQA.from_chain_type(
    llm,
    retriever=db_deu.as_retriever(),
    chain_type="refine"
)
result_refine_deu_updated = qa_chain_re_updated.invoke({"query": construct_query})
  
#print the results 
if(result_refine_deu_updated != None): 
    if(printed == False): 
        print("The following additional information is powered by the DEU Project from UCL, which can be found here: https://www.ucl.ac.uk/museums-static/digitalegypt/alphabet.html \n")
        printed = True
#       print(result_refine_deu)
        print(result_refine_deu_updated["result"])