## Setup

In [98]:
from st_helper import *
import os

In [99]:
from dotenv import load_dotenv
load_dotenv(r"C:\Users\ITLS104415\Desktop\GitHub Repositories\ESIA Assistant\credentials.env")

True

In [100]:
azure_endpoint = os.environ['AZURE_ENDPOINT']
openai_api_key = os.environ['OPENAI_API_KEY']

openai_deployment_name = os.environ['GPT_DEPLOYMENT_NAME']
openai_api_version = os.environ['OPENAI_API_VERSION']
embedding_model = os.environ['EMBEDDING_MODEL']
embedding_deployment_name = os.environ['EMBEDDING_DEPLOYMENT_NAME']

search_endpoint = os.environ['SEARCH_ENDPOINT']
search_api_key = os.environ['SEARCH_API_KEY']
search_api_version = os.environ['SEARCH_API_VERSION']
search_service_name = os.environ['SEARCH_SERVICE_NAME']

search_credential = AzureKeyCredential(search_api_key)

connection_string = os.environ['CONNECTION_STRINGS']
container_name = os.environ['CONTAINER_NAME']
storage_name = os.environ['STORAGE_NAME']

# Parameters
max_tokens = 4096
dimensionality = 1536

In [101]:
def compute_tokens():
    
    input = next(client.list_runs(project_name="ESIA Agent", filter='eq(name, "RunnableSequence")')).prompt_tokens
    print(f"Input tokens: {input}")
    output = next(client.list_runs(project_name="ESIA Agent", filter='eq(name, "RunnableSequence")')).completion_tokens
    print(f"Output tokens: {output}")

## Test

### Classes

In [102]:
index_name = "climate-city-contracts"

indexes = [index_name]

llm = AzureChatOpenAI(deployment_name=openai_deployment_name, openai_api_version=openai_api_version,
                        openai_api_key=openai_api_key, azure_endpoint=azure_endpoint, temperature = 0)


In [103]:
question = "Is there any mention of 'carbon capture and storage' in this climate city contract?"

In [104]:
def get_search_results(query: str, 
                       filters: str,
                       indexes: list,
                       k: int,
                       reranker_threshold: int = 1,
                       sas_token: str = "",
                       ) -> List[dict]:
    """Performs multi-index hybrid search and returns ordered dictionary with the combined results"""
    
    # Define the request headers
    headers = {
        "Content-Type": "application/json",
        "api-key": search_api_key  # Replace with your actual API key
    }

    params = {'api-version': search_api_version}
    
    agg_search_results = dict()

    # Define the request payload
    search_payload = {
        "search": query,
        "select": "id, doc_path, city, chunk",
        "filter": filters,
        "vectorQueries": [{"kind": "text", "k": k, "fields": "embedding", "text": query}],
        "count": "true",
        "top": k
    }
    
    response = requests.post(search_endpoint + "indexes/" + index_name + "/docs/search",
                         data=json.dumps(search_payload), headers=headers, params=params)

    search_results = response.json()
    agg_search_results[index_name] = search_results

    reranker_threshold = 0

    content = dict()
    ordered_content = OrderedDict()

    for index, search_results in agg_search_results.items():
        for result in search_results['value']:
            # Show results that are at least N% of the max possible score=4
            if result['@search.score'] > reranker_threshold:
                content[result['id']] = {
                    "chunk": result['chunk'],
                    "location": result['doc_path'],
                    "city": result['city'],
                    "score": result['@search.score'],
                    "index": index
                }

    topk = k

    count = 0  # To keep track of the number of results added
    for id in sorted(content, key=lambda x: content[x]["score"], reverse=True):
        ordered_content[id] = content[id]
        count += 1
        if count >= topk:  # Stop after adding topK results
            break

    return ordered_content   

In [105]:
class CustomAzureSearchRetriever(BaseRetriever):

    indexes: List
    filters: str
    topK: int
    reranker_threshold: int
    sas_token: str = ""

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:

        ordered_results = get_search_results(
            query, indexes=self.indexes, filters=self.filters, k=self.topK, reranker_threshold=self.reranker_threshold, sas_token=self.sas_token)

        top_docs = []
        for key, value in ordered_results.items():
            location = value["location"] if value["location"] is not None else ""
            try:
                top_docs.append(Document(page_content=value["chunk"], metadata={
                    "source": location, "score": value["score"]}))
            except:
                print("An exception occurred")
 
        # print(top_docs) 

        return top_docs
    

In [106]:
class GetDocSearchResults_Tool(BaseTool):
    name = "docsearch"
    description = "Tool to search for sources to be used to answer questions"
    args_schema: Type[BaseModel] = SearchInput

    indexes: List[str] = []
    filters: str
    k: int = 20
    reranker_th: int = 1
    sas_token: str = ""

    def _run(
        self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:

        retriever = CustomAzureSearchRetriever(indexes=self.indexes, filters=self.filters, topK=self.k, reranker_threshold=self.reranker_th,
                                               sas_token=self.sas_token, callback_manager=self.callbacks)
        tool_results = retriever.invoke(input=query)
        
        return tool_results

In [107]:
cities=["Parma", "Barcelona", "Stockholm"]
lower_cities = [city.lower() for city in cities]
lower_cities

for city in lower_cities[:1]:
    fltrs = f"city eq '{city}'"

try:
    for city in lower_cities[1:]:
        part = f" or city eq '{city}'"
        fltrs += part
except:
    pass

print(fltrs)

city eq 'parma' or city eq 'barcelona' or city eq 'stockholm'


In [110]:
fltrs

"city eq 'parma' or city eq 'barcelona' or city eq 'stockholm'"

In [136]:
question = "Where is Parma?"

prompt = AGENT_DOCSEARCH_PROMPT

k = 20
rth = 0

cities=["Parma", "Barcelona", "Stockholm"]
lower_cities = [city.lower() for city in cities]
lower_cities

for city in lower_cities[:1]:
    fltrs = f"city eq '{city}'"

try:
    for city in lower_cities[1:]:
        part = f" or city eq '{city}'"
        fltrs += part
except:
    pass

# Retrieval

tools = [GetDocSearchResults_Tool(
indexes=indexes, filters=fltrs, k=k, reranker_th=rth, sas_token='na')]

agent = create_openai_tools_agent(llm, tools, prompt)

agent_executor = AgentExecutor(
agent=agent, tools=tools, handle_parsing_errors=True, verbose=False)

with_message_history = RunnableWithMessageHistory(
agent_executor,
get_session_history,
input_messages_key="question",
history_messages_key="history"
)

session_id = 124

response = with_message_history.invoke(
    {"question": question},
    config={"configurable": {"session_id": session_id}}
)

history = update_history(session_id, question, response["output"], indexes)

history = history[-3:]

full_response = {
    "question": question,
    "output": response["output"],
    "history": history
}

response_text = full_response['output']   
final_response = f"{response_text}"

# Sources

retriever = CustomAzureSearchRetriever(indexes=indexes, filters=fltrs, topK=k, reranker_threshold=rth,
                                        sas_token="")
tool_results = retriever.get_relevant_documents(query=question)

In [137]:
final_response

'Parma is a city located in the Emilia-Romagna region of northern Italy. It is situated in the Po Valley and is known for its rich cultural heritage, historical architecture, and culinary traditions. The city of Parma is actively involved in climate neutrality commitments and has implemented various initiatives and projects to achieve its climate goals.\n\nReference:\n- Climate City Contract. City of Parma'

In [128]:
tool_results

[Document(metadata={'source': 'C:\\Users\\ITLS104415\\Desktop\\Progetti\\CCC\\barcelona_nzc_ccc_ok.pdf', 'score': 0.02878289483487606}, page_content='Climate City Contract. City of Barcelona           164 \uf0b7 Blue carbon" programme, promoting marine carbon sinks (Posidonia) on the \ncoast of Barcelona (Garraf and Maresme). This is the only action programme \nthat goes well beyond the municipal area. '),
 Document(metadata={'source': 'C:\\Users\\ITLS104415\\Desktop\\Progetti\\CCC\\barcelona_nzc_ccc_ok.pdf', 'score': 0.027071520686149597}, page_content='Climate City Contract. City of Barcelona           5 Annex 1: Climate Action Plan for the ci ty of \nBarcelona  \n \nIntroduction  \n \nCity Context  \nThe City of Barcelona has an area of 101.7 square kilometers and a population of \n1.66 million (as of January 1, 2023), which represents a population density of 13,327 \ninhabitants per square kilometer. Considerin g the floating population (students and \nother non -registered inhabit

In [138]:
history

[{'question': 'Where is Barcelona?',
  'output': 'Barcelona is a city located in the northeastern part of Spain, specifically in the region of Catalonia. It has an area of 101.7 square kilometers and a population of 1.66 million (as of January 1, 2023), with a population density of 13,327 inhabitants per square kilometer. The city is known for its rich cultural heritage, vibrant architecture, and beautiful coastline along the Mediterranean Sea.\n\nReference:\n- Climate City Contract. City of Barcelona',
  'indexes': ['climate-city-contracts']},
 {'question': 'Where is Barcelona?',
  'output': 'Barcelona is a city located in the northeastern part of Spain, specifically in the region of Catalonia. It has an area of 101.7 square kilometers and a population of 1.66 million (as of January 1, 2023), with a population density of 13,327 inhabitants per square kilometer. Considerin g the floating population (students and other non-registered inhabitants), the real population was estimated in 20

In [131]:
history[-3:]

[{'question': "Is there any mention of 'carbon capture and storage' in this climate city contract?",
  'output': 'Yes, the Climate City Contract mentions "Bio Energy Carbon Capture and Storage (BECCS)" as a technique for permanently removing biogenic CO2 from the atmosphere. The document specifically states that Stockholm\'s district energy provider, Stockholm Exergi, is developing a large-scale BECCS project to create a world-class, full-scale BECCS facility at Stockholm Exergi’s existing heat and power biomass plant (KVV8) in Stockholm. This project aims to capture and permanently store large quantities of biogenic CO2, resulting in carbon removal from the atmosphere and the creation of so-called “negative emissions” (source: Stockholm NZC Climate City Contract).\n\nThe document also mentions that the BECCS Stockholm project has the potential to remove around 800,000 tons of CO2 annually and is an important early adopter that has the potential to lead many other CCS projects to follo

In [132]:
response

{'question': 'Where is Barcelona?',
 'history': [HumanMessage(content="Is there any mention of 'carbon capture and storage' in this climate city contract?"),
  AIMessage(content='Yes, the Climate City Contract mentions "Bio Energy Carbon Capture and Storage (BECCS)" as a technique for permanently removing biogenic CO2 from the atmosphere. The document specifically states that Stockholm\'s district energy provider, Stockholm Exergi, is developing a large-scale BECCS project to create a world-class, full-scale BECCS facility at Stockholm Exergi’s existing heat and power biomass plant (KVV8) in Stockholm. This project aims to capture and permanently store large quantities of biogenic CO2, resulting in carbon removal from the atmosphere and the creation of so-called “negative emissions” (source: Stockholm NZC Climate City Contract).\n\nThe document also mentions that the BECCS Stockholm project has the potential to remove around 800,000 tons of CO2 annually and is an important early adopte

### Functions

In [33]:
query = "Is there any mention of 'carbon capture and storage' in this climate city contract?"

In [35]:
# GET_SEARCH_RESULTS

# Define the request headers
headers = {
    "Content-Type": "application/json",
    "api-key": search_api_key  # Replace with your actual API key
}

params = {'api-version': search_api_version}

agg_search_results = dict()

# Define the request payload
search_payload = {
    "search": query,
    "select": "id, doc_path, city, chunk",
    "filter": filters,
    "vectorQueries": [{"kind": "text", "k": k, "fields": "embedding", "text": query}],
    "count": "true",
    "top": k
}

response = requests.post(search_endpoint + "indexes/" + index_name + "/docs/search",
                        data=json.dumps(search_payload), headers=headers, params=params)

search_results = response.json()
agg_search_results[index_name] = search_results

reranker_threshold = 0

content = dict()
ordered_content = OrderedDict()

for index, search_results in agg_search_results.items():
    for result in search_results['value']:
        # Show results that are at least N% of the max possible score=4
        if result['@search.score'] > reranker_threshold:
            content[result['id']] = {
                "chunk": result['chunk'],
                "location": result['doc_path'],
                "city": result['city'],
                "score": result['@search.score'],
                "index": index
            }

topk = k

count = 0  # To keep track of the number of results added
for id in sorted(content, key=lambda x: content[x]["score"], reverse=True):
    ordered_content[id] = content[id]
    count += 1
    if count >= topk:  # Stop after adding topK results
        break

ordered_content   

OrderedDict([('MzU5LUM6XFVzZXJzXElUTFMxMDQ0MTVcRGVza3RvcFxQcm9nZXR0aVxDQ0NcYmFyY2Vsb25hX256Y19jY2Nfb2sucGRm',
              {'chunk': 'Climate City Contract. City of Barcelona           164 \uf0b7 Blue carbon" programme, promoting marine carbon sinks (Posidonia) on the \ncoast of Barcelona (Garraf and Maresme). This is the only action programme \nthat goes well beyond the municipal area. ',
               'location': 'C:\\Users\\ITLS104415\\Desktop\\Progetti\\CCC\\barcelona_nzc_ccc_ok.pdf',
               'city': 'barcelona',
               'score': 0.03306011110544205,
               'index': 'climate-city-contracts'}),
             ('NzUtQzpcVXNlcnNcSVRMUzEwNDQxNVxEZXNrdG9wXFByb2dldHRpXENDQ1xiYXJjZWxvbmFfbnpjX2NjY19vay5wZGY=',
              {'chunk': 'Climate City Contract. City of Barcelona           27 C1 Sustainable, safe and connected mobility shock plan in urban and metropolitan environments, C2 Housing rehabilitation and urban regeneration plan, C6 Sustainable, safe and connect

In [38]:
class CustomAzureSearchRetriever(BaseRetriever):

    indexes: List
    filters: str
    topK: int
    reranker_threshold: int
    sas_token: str = ""

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:

        ordered_results = get_search_results(
            query, indexes=self.indexes, filters=self.filters, k=self.topK, reranker_threshold=self.reranker_threshold, sas_token=self.sas_token)

        top_docs = []
        for key, value in ordered_results.items():
            location = value["location"] if value["location"] is not None else ""
            try:
                top_docs.append(Document(page_content=value["chunk"], metadata={
                    "source": location, "score": value["score"]}))
            except:
                print("An exception occurred")
 
        # print(top_docs) 

        return top_docs

In [45]:
retriever = CustomAzureSearchRetriever(indexes=indexes, filters=filters, topK=k, reranker_threshold=0,
                                               sas_token="")
tool_results = retriever.invoke(input=query)
tool_results

[Document(metadata={'source': 'C:\\Users\\ITLS104415\\Desktop\\Progetti\\CCC\\barcelona_nzc_ccc_ok.pdf', 'score': 0.03306011110544205}, page_content='Climate City Contract. City of Barcelona           164 \uf0b7 Blue carbon" programme, promoting marine carbon sinks (Posidonia) on the \ncoast of Barcelona (Garraf and Maresme). This is the only action programme \nthat goes well beyond the municipal area. '),
 Document(metadata={'source': 'C:\\Users\\ITLS104415\\Desktop\\Progetti\\CCC\\barcelona_nzc_ccc_ok.pdf', 'score': 0.03067915514111519}, page_content='Climate City Contract. City of Barcelona           27 C1 Sustainable, safe and connected mobility shock plan in urban and metropolitan environments, C2 Housing rehabilitation and urban regeneration plan, C6 Sustainable, safe and connected mobility, C7 Deployment and integration of renewable energies, C8 Electricity infrastructures, promotion of smart grids and deployment of flexibility and storage, C9 Renewable hydrogen roadmap and its 

In [48]:
output = GetDocSearchResults_Tool(indexes=indexes, filters=fltrs, k=k, reranker_th=rth, sas_token='na')
output

GetDocSearchResults_Tool(indexes=['climate-city-contracts'], filters="city eq 'barcelona'", reranker_th=0, sas_token='na')

In [49]:
tools = [GetDocSearchResults_Tool(
indexes=indexes, filters=fltrs, k=k, reranker_th=rth, sas_token='na')]

agent = create_openai_tools_agent(llm, tools, prompt)

agent_executor = AgentExecutor(
agent=agent, tools=tools, handle_parsing_errors=True, verbose=False)

with_message_history = RunnableWithMessageHistory(
agent_executor,
get_session_history,
input_messages_key="question",
history_messages_key="history"
)

session_id = 123

response = with_message_history.invoke(
    {"question": question},
    config={"configurable": {"session_id": session_id}}
)

In [50]:
response

{'question': "Is there any mention of 'carbon capture and storage' in this climate city contract?",
 'history': [HumanMessage(content="Is there any mention of 'carbon capture and storage' in this climate city contract?"),
  AIMessage(content='Yes, the Climate City Contract for Stockholm includes a mention of "Bio Energy Carbon Capture and Storage (BECCS)" as a technique for permanently removing biogenic CO2 from the atmosphere. The document states that Stockholm\'s district energy provider, Stockholm Exergi, is developing a large-scale BECCS project to create a world-class, full-scale BECCS facility at Stockholm Exergi’s existing heat and power biomass plant (KVV8) in Stockholm. This project aims to capture and permanently store large quantities of biogenic CO2, resulting in carbon removal from the atmosphere and the creation of "negative emissions." The document also highlights that the BECCS Stockholm project has the potential to lead many other CCS projects to follow and contribute 