# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Dependencies

In [2]:
!pip -q install langchain huggingface_hub openai google-search-results tiktoken wikipedia

In [3]:
!pip show langchain

Name: langchain
Version: 0.0.348
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: aiohttp, async-timeout, dataclasses-json, jsonpatch, langchain-core, langsmith, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 


In [4]:
# !pip install openai==1.3.5 google-generativeai transformers
!pip install openai==0.27.8 google-generativeai transformers



In [5]:
!pip install langchainhub



In [6]:
!pip install gitpython



In [7]:
!pip install flatten-json



In [8]:
!pip install bpemb sentence_transformers numpy



In [9]:
!pip install pinecone-client==2.0.11 jq



## Libraries

In [10]:
import langchain
import openai
import os
import git
import json
import sqlite3
import time

## Home

In [11]:
class GitHome():

    def __init__(self,
                 work_dir,
                 branch_name,
                 repo_name,
                 repo_home,
                 user_name,
                 user_token):
        ### Save
        os.environ["WORK_DIR"] = work_dir
        os.environ["BRANCH_NAME"] = branch_name
        os.environ["REPO_NAME"] = repo_name
        os.environ["GIT_HOME"] = repo_home
        os.environ["USER_NAME"] = user_name
        os.environ["USER_TOKEN"] = user_token # Github Personal Access Token
        ### Compose
        os.environ["GIT_REPO"] = os.environ["GIT_HOME"] + os.environ["REPO_NAME"]
        os.environ["REPO_DIR"] = os.environ["WORK_DIR"] + "/" + os.environ["REPO_NAME"]
        os.environ["CLONE_FROM"] = "https://" + os.environ["USER_NAME"] + ":" + os.environ["USER_TOKEN"] + "@github.com/" + os.environ["USER_NAME"] + "/" + os.environ["REPO_NAME"] + ".git"

In [12]:
GitHome(work_dir="/content/drive/MyDrive/StanfordLLM/thought-distillation",
        repo_name="thought-distillation",
        repo_home="https://github.com/pablo-tech/",
        branch_name="main",
        user_name="pablo-tech",
        user_token="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")

<__main__.GitHome at 0x7f6bff64b850>

## Git

In [13]:
os.environ["WORK_DIR"]

'/content/drive/MyDrive/StanfordLLM/thought-distillation'

In [14]:
try:
  os.chdir(os.environ["WORK_DIR"])
except:
  pass

!rm -rf $REPO_DIR


In [15]:
git.Repo.clone_from(os.environ["CLONE_FROM"], os.environ["REPO_DIR"])

<git.repo.base.Repo '/content/drive/MyDrive/StanfordLLM/thought-distillation/thought-distillation/.git'>

In [16]:
os.chdir(os.environ["REPO_DIR"] + "/source/main/py")

# Dependencies

In [17]:
from model_base import AzureBase
from model_bot import ChatBot
from tool_wikipedia import EncyclopediaToolFactory
from tool_search import SearchToolFactory
from tool_sql import SqlToolFactory

# LLM

In [18]:
open_ai = AzureBase()

inference_llm_35 = open_ai.inference_llm_35()
chat_llm_40 = open_ai.chat_llm_40()

                engine was transferred to model_kwargs.
                Please confirm that engine is what you intended.
                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [19]:
inference_llm_35.invoke("hi, i am bob")

'\n\nHi Bob! How are you?'

In [20]:
chat_llm_40.invoke("hi, i am bob")

AIMessage(content='Hello Bob! How can I assist you today?')

# Chat bot

### Simple Test

In [21]:
agent_llm = inference_llm_35
tool_llm = inference_llm_35

In [22]:
bot = ChatBot(agent_llm=agent_llm,
              agent_tools=EncyclopediaToolFactory(tool_llm).get_tools(),
              is_verbose=True)

queries = ["hi, my name is Bob",
           "what is my name?"]

for q in queries:
    response = bot.invoke(q)
    print("user: " + str(q))
    print("agent: " + str(response.get_answer()))
    print(response)




EXECUTOR_RUN_DETAIL=>
 - RUN_ANSWER: Hi Bob! How can I help you?
 - RUN_NORMAL: True
 - RUN_JOURNEY: 
Thought: Hi Bob! How can I help you?
Action: Finish[Hi Bob! How can I help you?]
Observation: Hi Bob! How can I help you?
 - RUN_MEASURE => 
	 iteration_count: 1
	 hallucination_count: 0
	 min_input_len: 3872
	 max_input_len: 3872
	 total_input_len: 3872
	 min_output_len: 27
	 max_output_len: 27
	 total_output_len: 27
	 min_model_time: 0.417
	 max_model_time: 0.417
	 total_model_time: 0.417
 - RUN_EXCEPTION =>


user: hi, my name is Bob
agent: Hi Bob! How can I help you?
EXECUTOR_RUN_DETAIL=>
 - RUN_ANSWER: Hi Bob! How can I help you?
 - RUN_NORMAL: True
 - RUN_JOURNEY: 
Thought: Hi Bob! How can I help you?
Action: Finish[Hi Bob! How can I help you?]
Observation: Hi Bob! How can I help you?
 - RUN_MEASURE => 
	 iteration_count: 1
	 hallucination_count: 0
	 min_input_len: 3872
	 max_input_len: 3872
	 total_input_len: 3872
	 min_output_len: 27
	 max_output_len: 27
	 total_output_len: 2



  lis = BeautifulSoup(html).find_all('li')


TOOL_WIKIPEDIA_RUN_DETAIL=>
 - RUN_ANSWER: ['A name is a term used for identification by an external observer. They can identify a class or category of things, or a single thing, either uniquely, or within a given context. The entity identified by a name is called its referent. A personal name identifies, not necessarily uniquely, a specific individual human. The name of a specific entity is sometimes called a proper name (although that term has a philosophical meaning as well) and is, when consisting of only one word, a proper noun.', 'North American English (NAmE, NAE) is the most generalized variety of the English language as spoken in the United States and Canada. Because of their related histories and cultures, plus the similarities between the pronunciations (accents), vocabulary, and grammar of American English and Canadian English, the two spoken varieties are often grouped together under a single category. Canadians are generally tolerant of both British and American spellings

### Discrete Information Retrieval

##### Tool

In [23]:
agent_llm = inference_llm_35
discretize_llm = inference_llm_35
parsing_llm = inference_llm_35

In [24]:
tool_factory = SqlToolFactory(discretize_llm, parsing_llm)

product_tools = tool_factory.get_tools()

backpacks-men.json=540
backpacks-women.json=95
bedsheets-unisex.json=1649
candle_holders-unisex.json=162
candles-unisex.json=241
chocolates-unisex.json=236
clutches-women.json=118
dinner_sets-unisex.json=17
drinking_glass-unisex.json=158
dryfruits-unisex.json=80
fragrances-men.json=382
fragrances-women.json=418
gaming-unisex.json=148
handbags-women.json=437
headphones_earphones-unisex.json=930
home_fragrances-unisex.json=51
instant_camera-unisex.json=16
mobiles-unisex.json=721
product_joined.json=540
silver_artifacts-unisex.json=14
silver_bullion-unisex.json=53
speaker_mediaplayer-unisex.json=380
sweets-unisex.json=238
tab_ereader-unisex.json=33
tea_sets-unisex.json=21
wallets-men.json=732
wallets-women.json=671
watch-kids.json=181
watch-men.json=1611
watch-unisex.json=776
watch-women.json=1095
DATASET_SIZE=12744
backpacks-men.json=540
DATASET_SIZE=540
reading... backpacks-men.json
backpacks-women.json=95
DATASET_SIZE=95
reading... backpacks-women.json
bedsheets-unisex.json=1649
DATASE

In [25]:
product_tools[0].run(tool_input="what non-black 15 liter under $400 bags do you have",
                     query_filter=['backpacks-men.json']).get_answer()

---> subdomain_name=backpacks-men.json prompt=7625 query_sql=SELECT context.id, context.price, context.title, inference.closure_type, inference.collection_name, inference.height, inference.id, inference.is_for_dad, inference.length, inference.material_type, inference.product_brand, inference.product_capacity, inference.product_collection, inference.product_color, inference.product_height, inference.product_length, inference.product_material, inference.product_size, inference.product_type, inference.product_width, inference.strap_type, inference.sub_domain, inference.width FROM  CLIQ_CONTEXT AS context JOIN CLIQ_INFERENCE_BACKPACKS_MEN_JSON AS inference  ON context.id = inference.id  WHERE product_capacity = '15 ltrs' AND product_color != 'black' AND price < 400 LIMIT 1;


[{"product_capacity = '15 ltrs' AND product_color != 'black' AND price < 400"},
 [{'id': 'TATACLIQ-PRIMARY-BPZEUSHBLU-Aristocrat',
   'price': 324.0,
   'title': 'Aristocrat Zeus 15 Ltrs Blue Small Backpack',
   'closure_type': 'zip',
   'height': '48',
   'length': '31',
   'material_type': 'polyester',
   'product_brand': 'aristocrat',
   'product_capacity': '15 ltrs',
   'product_collection': 'zeus',
   'product_color': 'blue',
   'product_size': 'small',
   'product_type': 'backpack',
   'strap_type': 'adjustable',
   'sub_domain': 'backpacks-men.json',
   'width': '15.5'}]]

##### Bot

In [26]:
bot = ChatBot(agent_llm=chat_llm_40,
              agent_tools=product_tools,
              is_verbose=True)

answer = bot.invoke("what non-black 15 liter under $400 bags do you have")

print(answer)

---> subdomain_name=backpacks-men.json prompt=7636 query_sql=SELECT context.id, context.price, context.title, inference.closure_type, inference.collection_name, inference.height, inference.id, inference.is_for_dad, inference.length, inference.material_type, inference.product_brand, inference.product_capacity, inference.product_collection, inference.product_color, inference.product_height, inference.product_length, inference.product_material, inference.product_size, inference.product_type, inference.product_width, inference.strap_type, inference.sub_domain, inference.width FROM  CLIQ_CONTEXT AS context JOIN CLIQ_INFERENCE_BACKPACKS_MEN_JSON AS inference  ON context.id = inference.id  WHERE product_capacity = '15 ltrs' AND product_color != 'black' AND price < 400 LIMIT 1;
TOOL_CLIQ_RUN_DETAIL=>
 - RUN_ANSWER: [{"product_capacity = '15 ltrs' AND product_color != 'black' AND price < 400"}, [{'id': 'TATACLIQ-PRIMARY-BPZEUSHBLU-Aristocrat', 'price': 324.0, 'title': 'Aristocrat Zeus 15 Ltrs B

In [27]:
answer.get_answer()

'Aristocrat Zeus 15 Ltrs Blue Small Backpack'

# Evaluation (Hotpot)

### Product

##### Gift loader

In [28]:
from domain_knowledge import GiftDataset2

gift2 = GiftDataset2(["backpacks-men.json"])

product_data = gift2.get_corpus("backpacks-men.json")

product_data = [eval(p) for p in product_data.values()]

In [29]:
# eval(list(product_data.values())[0])

In [30]:
out_file = "/content/drive/MyDrive/StanfordLLM/qa_data/gift2_qa/product_joined.json"

with open(out_file, "w") as outfile:
    json.dump(product_data, outfile)

##### Pinecone loader

In [31]:
# from langchain.document_loaders import TextLoader
# from langchain.document_loaders.csv_loader import CSVLoader
# from langchain.document_loaders import JSONLoader
# from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

# from vector_embed import BytePairEmbedding

# import pinecone
# from langchain.vectorstores import Pinecone

# from uuid import uuid4


# class VectorDb():

#     def __init__(self,
#                 chunk_size=1000,
#                 chunk_overlap=100):
#         self.embedding_model = BytePairEmbedding
#         self.text_splitter = RecursiveCharacterTextSplitter(chunk_size = chunk_size,
#                                                             chunk_overlap  = chunk_overlap,
#                                                             length_function = len,
#                                                             separators = ["\n\n", "\n", " ", ""],
#                                                             is_separator_regex = False)
#         self.embed_dimension = len(self.get_vector("x"))

#     def read_files(self, file_names,
#                   directory_path='/content/drive/MyDrive/StanfordLLM/qa_data/legal_qa/'):
#         file_names = [directory_path+name for name in file_names]
#         return self.text_documents(file_names)

#     def read_faq(self, file_names):
#         return self.csv_documents(file_names)

#     def read_products(self,
#                       file_names,
#                       directory_path='/content/drive/MyDrive/StanfordLLM/qa_data/faq_qa/'):
#         file_names = [directory_path+name for name in file_names]
#         return self.json_documents(file_names)

#     def text_documents(self, file_names):
#         split_documents = []
#         for file_name in file_names:
#           whole_document = TextLoader(file_name).load()
#           split_documents += self.text_splitter.split_documents(whole_document)
#         return split_documents

#     def csv_documents(self, file_names):
#         split_documents = []
#         for file_name in file_names:
#           whole_document = CSVLoader(file_name).load()
#           split_documents += self.text_splitter.split_documents(whole_document)
#         return split_documents

#     def product_metadata_func(self, record: dict, metadata: dict) -> dict:
#         # can't do price,
#         metadata["brand"] = record.get("brand")
#         metadata["store"] = record.get("store")
#         metadata["gender"] = record.get("gender")
#         metadata["category"] = record.get("category")
#         metadata["product_id"] = record.get("product_id")
#         return metadata

#     def json_documents(self, file_names):
#         # https://python.langchain.com/docs/modules/data_connection/document_loaders/json
#         split_documents = []
#         for file_name in file_names:
#           loader = JSONLoader(
#                 file_path=file_name,
#                 jq_schema=".[]", # ".[].description", # ".[]", #
#                 content_key="description",
#                 metadata_func=self.product_metadata_func)
#           whole_documents = loader.load()
#           split_documents += whole_documents
#           # split_documents += self.text_splitter.split_documents(whole_document)
#         return split_documents

#     def get_vector(self, text):
#         return self.embedding_model.embed_query(text)

#     def calc_embeds(self, texts):
#         return [self.get_list_vector(text) for text in texts]

#     def get_list_vector(self, text):
#         return self.get_vector(text).tolist()

#     def new_ids(self, items):
#         return [str(uuid4()) for _ in range(len(items))]


# class PineconeEnv(VectorDb):

#     def __init__(self,
#                 api_key="9be7c0e1-612e-43f4-ae72-b572832f3131",
#                 environment="gcp-starter"):
#         super().__init__()
#         self.api_key = api_key
#         self.environment = environment


# class PineconeCore(PineconeEnv):

#     def __init__(self,
#                  index_name,
#                  is_create,
#                  similarity_metric='cosine', # "euclidean"
#                  shard_count=1):
#         super().__init__()
#         self.index_name = index_name
#         self.is_create = is_create
#         self.similarity_metric = similarity_metric
#         self.shard_count = shard_count
#         self.db_init()
#         self.db_index = pinecone.Index(self.index_name,
#                                        pool_threads=1)

#     def db_init(self):
#         pinecone.init(api_key=self.api_key, environment=self.environment)
#         if self.is_create:
#           try:
#             pinecone.delete_index(self.index_name)
#             # index = pinecone.GRPCIndex(index_name)
#           except:
#             pass
#           pinecone.create_index(self.index_name,
#                                 dimension=self.embed_dimension,
#                                 metric=self.similarity_metric,
#                                 shards=self.shard_count)

#     def get_index(self):
#         return self.db_index

#     def __str__(self):
#         return f"""
#   {pinecone.list_indexes()}
#   {self.get_index().describe_index_stats()}
#   """


# class PineconeIO(PineconeCore):

#     def __init__(self,
#                  index_name,
#                  is_create):
#         super().__init__(index_name, is_create)
#         self.CHUNK_COL = "chunk"
#         self.TEXT_COL = "text"

#     def join_upsert(self, ids, embeds, metadatas):
#         insertable = zip(ids, embeds, metadatas)
#         # print("000" + str(list(insertable)[0][2]))
#         self.get_index().upsert(vectors=insertable,
#                                 async_req=False)

#     def batch_upsert(self, items, metadatas,
#                      upsert_func, batch_size=100):
#         i = 0
#         while i < len(items):
#           j = i+batch_size
#           if j > len(items):
#             j = len(items)
#           upsert_func(items[i:j], metadatas[i:j])
#           i+=batch_size

#     def search(self, search_txt, k, search_filter={}):
#         return self.select_by_text(search_txt, k, search_filter)

#     def select_by_text(self,
#                        search_txt,
#                        k,
#                        search_filter,
#                        include_metadata=True,
#                        include_vectors=False):
#         return self.select_by_vector(search_vec=self.get_list_vector(search_txt),
#                                      k=k,
#                                      search_filter=search_filter,
#                                      include_metadata=include_metadata,
#                                      include_vectors=include_vectors)

#     def select_by_vector(self,
#                          search_vec,
#                          k,
#                          search_filter,
#                          include_metadata,
#                          include_vectors):
#         print("search_filter="+str(search_filter))
#         results_with_scores = self.get_index().query(vector=search_vec,
#                                                      top_k=k,
#                                                      filter=search_filter,
#                                                      include_metadata=include_metadata,
#                                                      include_values=include_vectors)
#         return results_with_scores

#     def fetch_by_id(self, ids):
#         return self.get_index().fetch(ids)

#     def delete_by_id(self, ids):
#         return self.get_index().delete(ids)



# class PineconeDb(PineconeIO):

#     def __init__(self, index_name, is_create=False):
#         super().__init__(index_name,
#                          is_create)

#     def load_docs(self, items, metas):
#         if len(metas) == 0:
#             metas=[{} for _ in range(len(items))]
#         self.batch_upsert(items, metas,
#                           upsert_func=self.doc_upsert)

#     def doc_upsert(self, docs, metadatas):
#         ids = self.new_ids(docs)
#         embeds = self.calc_embeds([doc.page_content for doc in docs])
#         metadatas = self.docs_metadata(docs, metadatas)
#         self.join_upsert(ids, embeds, metadatas)

#     def docs_metadata(self, docs, metadatas):
#         return [self.doc_metadata(docs[i], metadatas[i], i)
#                 for i in range(len(docs))]

#     def doc_metadata(self, doc, metadata, chunk):
#         return { self.CHUNK_COL: chunk,
#                  self.TEXT_COL: doc.page_content,
#                  **doc.metadata,
#                  **metadata }

In [32]:
from vector_db import PineconeDb

pinecone_db = PineconeDb(index_name="quickstart",
                         is_create=True)

In [33]:
print(pinecone_db.__str__())


  ['quickstart']
  {'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'totalVectorCount': 0.0}
  


In [34]:
import pinecone.info

version_info = pinecone.info.version()

assert version_info.server == version_info.client

version_info

VersionResponse(server='2.0.11', client='2.0.11')

In [35]:
# pine_docs = pinecone_db.read_files(file_names=['pay-user.txt',
#                                                'payoff-sale.txt',
#                                                'refer-and-earn.txt',
#                                                'sale-parade.txt',
#                                                'savings-calculator.txt',
#                                                'seller-terms.txt',
#                                                'tdl-privacy.txt',
#                                                'terms-conditions.txt',
#                                                'tpl-privacy.txt'],
#                                    directory_path='/content/drive/MyDrive/StanfordLLM/qa_data/legal_qa/')

# # pinecone_db.load_docs(items=pine_docs,
# #                       metas=[])

# pine_docs[0]
# # pinecone_db.load_docs(pine_docs[0:100])

In [36]:
# pine_docs = pinecone_db.read_files(file_names=['product_joined.json'],
#                                    directory_path='/content/drive/MyDrive/StanfordLLM/qa_data/gift2_qa/')

# pine_docs[0]

In [37]:
pine_docs = pinecone_db.read_products(file_names=['product_joined.json'],
                                      directory_path='/content/drive/MyDrive/StanfordLLM/qa_data/gift2_qa/')

pine_docs[1]

Document(page_content='Bag Type : Backpacks, Capacity : 30litres, Closure Type : Zip, Color : Grey, Compatible Laptop Size : 15.6inches, Feature1 : Rain Cover : Yes, Height : 48cm, Length : 29cm, Material Type : Fabric, No of Compartments : 2, Size : Medium, Strap Type : Adjustable, Width : 16cm,  styleNote: Let people applaud you for your impeccable choice of style with this unisex grey laptop backpack with a rain cover from Skybags. Adorned with a printed pattern, this backpack features 2 compartments, multiple pockets and side water bottle holders to accommodate a 15.6-inch laptop and other essentials in a safe manner. While good quality polyester assures durability, the adjustable padded shoulder straps promise carrying comfort. Moreover, it comes with a zip closure that will keep your valuable belongings secure.', metadata={'source': '/content/drive/MyDrive/StanfordLLM/qa_data/gift2_qa/product_joined.json', 'seq_num': 2, 'brand': 'Skybags', 'store': 'TATACLIQ PRIMARY', 'gender': '

In [38]:
pinecone_db.load_docs(items=pine_docs,
                      metas=[])

In [39]:
questions = ["black backpack"
             ]

In [40]:
for q in questions:
    answer = pinecone_db.search(q, k=3)
    # print(answer)
    hit = answer['matches'][0]
    print("Q: " + q)
    print("A(text): " + hit['metadata']['text'].strip())
    print("A(source): " + hit['metadata']['source'].strip())
    print("A(score): " + str(hit['score']))
    print("A(id): " + hit['id'].strip())
    print("A(chunk): " + str(hit['metadata']['chunk']))
    print("A(meta): " + str(hit['metadata'].keys()))
    try:
      print("A(begins): " + str(hit['metadata']['begins']))
    except:
      pass

    print("---")

search_filter={}
Q: black backpack
A(text): Bag Type : Backpacks, Closure Type : Zip, Collection Name : Streak Plus, Color : Black, Height : 13cm, Length : 46cm, Material Type : Polyester, Size : Medium, Strap Type : Adjustable, Width : 33cm,  styleNote: Get noticed for all the good reasons with this unisex's backpack from Wildcraft. Adorned with a solid pattern, this black backpacks features  compartments and pockets to accommodate all essentials in a safe manner. While good quality polyester assures durability, the adjustable strap promises carrying comfort. Besides, it comes with a zip closure that will keep your valuable belongings secure.
A(source): /content/drive/MyDrive/StanfordLLM/qa_data/gift2_qa/product_joined.json
A(score): 0.667424142
A(id): 1cdff8e9-310f-4ac8-a646-00375d94d5c4
A(chunk): 69.0
A(meta): dict_keys(['brand', 'category', 'chunk', 'gender', 'product_id', 'seq_num', 'source', 'store', 'text'])
---


### Hotpot

##### Wikipedia

In [41]:
agent_llm = inference_llm_35
tool_llm = inference_llm_35

In [42]:
bot = ChatBot(agent_llm=agent_llm,
              agent_tools=EncyclopediaToolFactory(tool_llm).get_tools(),
              is_verbose=True)

bot.invoke("Where was the XXXI Olympic held").get_answer()

TOOL_WIKIPEDIA_RUN_DETAIL=>
 - RUN_ANSWER: ["This is the list of Olympic records in weightlifting. Records are maintained in each weight class for the snatch lift, clean and jerk lift, and the total for both lifts.\nThe weight classes for men on the Olympic program were adjusted for the 2000 Games, so Olympic records only exist based on the results during and after that. Women's weightlifting made its Olympic debut at the 2000 Games in Sydney, with seven weight classes that have not changed since.\n\n\n== Current records ==\n\n\n=== Men ===\n♦ denotes a performance that is also a current world record.", 'This is a list of world records in Olympic weightlifting. These records are maintained in each weight class for the snatch lift, clean and jerk lift, and the total for both lifts.\nThe International Weightlifting Federation (IWF) restructured its weight classes in 1993, 1998 and 2018, nullifying earlier records.\n\n\n== Current records ==\nKey to tables:\n  Awaiting ratification\n  Not

'Tokyo, Japan'

In [43]:
bot = ChatBot(agent_llm=chat_llm_40,
              agent_tools=EncyclopediaToolFactory(chat_llm_40).get_tools(),
              is_verbose=True)

bot.invoke("What was the name of the Olympic event held in Rio").get_answer()

TOOL_WIKIPEDIA_RUN_DETAIL=>
 - RUN_ANSWER: ['The 2020 Summer Olympics, officially the Games of the XXXII Olympiad and also known as Tokyo 2020, was an international multi-sport event held from 23 July to 8 August 2021 in Tokyo, Japan, with some preliminary events that began on 21 July 2021. Tokyo was selected as the host city during the 125th IOC Session in Buenos Aires, Argentina, on 7 September 2013.Originally scheduled to take place from 24 July to 9 August 2020, the event was postponed to 2021 on 24 March 2020 due to the global COVID-19 pandemic, the first such instance in the history of the Olympic Games (previous games had been cancelled but not rescheduled). However, the event retained the Tokyo 2020 branding for marketing purposes. It was largely held behind closed doors with no public spectators permitted due to the declaration of a state of emergency in the Greater Tokyo Area in response to the pandemic, the first and only Olympic Games to be held without official spectators.

'2016 Summer Olympics'

In [44]:
bot = ChatBot(agent_llm=chat_llm_40,
              agent_tools=EncyclopediaToolFactory(chat_llm_40).get_tools(),
              is_verbose=True)

bot.invoke("When was the flag bearer of Rio Olympics born").get_answer()

TOOL_WIKIPEDIA_RUN_DETAIL=>
 - RUN_ANSWER: ["During the Parade of Nations within the Rio de Janeiro 2016 Summer Olympics opening ceremony, athletes and officials from each participating team marched in the Maracanã Stadium preceded by their flag and placard bearer. Each flag bearer had been chosen either by the team's National Olympic Committee or by the athletes themselves.\n\n\n== Parade order ==\nGreece entered first, as the nation of the ancient and first modern Olympic Games. The host nation Brazil marched last. Other teams entered in alphabetical order in the language of the host country (Portuguese), according with tradition and IOC guidelines.", 'This is a list of flag bearers who have represented Canada at the Olympics.\n\n\n== Opening ceremonies ==\nFlag bearers carry the national flag of their country at the opening ceremony of the Olympic Games.\n\n\n== Closing ceremonies ==\nFlag bearers carry the national flag of their country at the closing ceremony of the Olympic Games.

In [45]:
bot = ChatBot(agent_llm=chat_llm_40,
              agent_tools=EncyclopediaToolFactory(chat_llm_40).get_tools(),
              is_verbose=True)

bot.invoke("When was the flag bearer of Rio Olympics born").get_answer()

TOOL_WIKIPEDIA_RUN_DETAIL=>
 - RUN_ANSWER: ["During the Parade of Nations within the Rio de Janeiro 2016 Summer Olympics opening ceremony, athletes and officials from each participating team marched in the Maracanã Stadium preceded by their flag and placard bearer. Each flag bearer had been chosen either by the team's National Olympic Committee or by the athletes themselves.\n\n\n== Parade order ==\nGreece entered first, as the nation of the ancient and first modern Olympic Games. The host nation Brazil marched last. Other teams entered in alphabetical order in the language of the host country (Portuguese), according with tradition and IOC guidelines.", 'This is a list of flag bearers who have represented Canada at the Olympics.\n\n\n== Opening ceremonies ==\nFlag bearers carry the national flag of their country at the opening ceremony of the Olympic Games.\n\n\n== Closing ceremonies ==\nFlag bearers carry the national flag of their country at the closing ceremony of the Olympic Games.



TOOL_WIKIPEDIA_RUN_DETAIL=>
 - RUN_ANSWER: ["The opening ceremony of the 2016 Summer Olympics took place on the evening of Friday 5 August 2016 in the Maracanã Stadium, Rio de Janeiro, starting at 20:00 BRT (23:00 UTC). As mandated by the Olympic Charter, the proceedings combined the formal ceremonial opening of this international sporting event (including welcoming speeches, hoisting of the flags and the parade of athletes, as well as a new feature—the presentation of the International Olympic Committee's Olympic Laurel distinction) with an artistic spectacle to showcase the host nation's culture and history. The Games were officially opened by Acting President of Brazil Michel Temer.Directed by Fernando Meirelles, Daniela Thomas and Andrucha Waddington, the ceremony featured presentations of the history and culture of Brazil, including its landscape and forests, the history of the Brazilian people dating back to the arrival of the Portuguese, music and samba, and the favelas among ot



TIMEOUT...


##### Search

In [46]:
agent_llm = chat_llm_40
tool_llm = chat_llm_40

In [47]:
bot = ChatBot(agent_llm=agent_llm,
              agent_tools=SearchToolFactory(tool_llm).get_tools(),
              is_verbose=True)

bot.invoke("Which male bearer participated in Men's 100kg event in the Rio Olympic game").get_answer()



TOOL_SERP_RUN_DETAIL=>
 - RUN_ANSWER: [["The men's 100 kg competition in judo at the 2016 Summer Olympics in Rio de Janeiro was held on 11 August at the Carioca Arena 2.", "The men's +100 kg competition in judo at the 2016 Summer Olympics in Rio de Janeiro was held on 12 August at the Carioca Arena 2.", 'Find out who took home gold, silver and bronze in 2016. Official results of the judo + 100kg (heavyweight) men event at the Rio Summer Olympics.', 'Want to watch live sport and original documentaries for free? Check out our website: https://oly.ch/WatchLiveSport Teddy Riner, ...', 'Official Judo results from the Rio 2016 Olympics. Full list of gold, silver and bronze medallists as well as photos and videos of medal-winning moments.', "Men's Judo -100kg Bronze Contest B | Rio 2016 Replays ... Ryunosuke Haga wins bronze for Japan in the men's judo -100kg contest. Show more.", "don't ever let non judo players watch this video. 17:00 · Go to channel · Women's 3m Springboard Diving Final | 

'Ryunosuke Haga'

In [48]:
bot = ChatBot(agent_llm=agent_llm,
              agent_tools=SearchToolFactory(tool_llm).get_tools(),
              is_verbose=True)

bot.invoke("For the 2012 and 2016 Olympic Event, when was the younger flag bearer born").get_answer()

NO_SNIPPET=>{'position': 3, 'title': 'Flag Bearers - LONDON 2012 Opening Ceremony', 'link': 'https://stillmed.olympic.org/Documents/Games_London_2012/Flagbearers.pdf', 'displayed_link': 'https://stillmed.olympic.org › Documents › Flag...', 'about_page_link': 'https://www.google.com/search?q=About+https://stillmed.olympic.org/Documents/Games_London_2012/Flagbearers.pdf&tbm=ilp', 'about_page_serpapi_link': 'https://serpapi.com/search.json?engine=google_about_this_result&google_domain=google.com&q=About+https%3A%2F%2Fstillmed.olympic.org%2FDocuments%2FGames_London_2012%2FFlagbearers.pdf', 'cached_page_link': 'https://webcache.googleusercontent.com/search?q=cache:_BNf8cg4G6oJ:https://stillmed.olympic.org/Documents/Games_London_2012/Flagbearers.pdf&hl=en&gl=us', 'source': 'Olympics.com'}
TOOL_SERP_RUN_DETAIL=>
 - RUN_ANSWER: [[[{'order': '1', 'country': 'Greece (GRE)', 'flag_bearer': 'Alexandros Nikolaidis', 'sport': 'Taekwondo'}, {'order': '2', 'country': 'Afghanistan (AFG)', 'flag_bearer'

'Michael Phelps'

# References
- https://youtu.be/Eug2clsLtFs?si=vuumOZNA6GXjaIay
- https://python.langchain.com/docs/modules/agents/agent_types/react