In [1]:
"""
## Only run this script the first time to generate the embeddings for the entities

from sentence_transformers import SentenceTransformer
import pandas as pd

model = SentenceTransformer("./results/domain_adaptation_model", device='cuda')
#model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", device='cuda')

df = pd.read_parquet('data/triples_corpus.parquet')
data = pd.read_parquet('data/mlt_data_publications.parquet', 
                       columns=['paperId', 'title', 'abstract', 'venue', 's2FieldsOfStudy',
                                'publicationDate', 'authors'])

entities = {i: e for e, i in zip(df.subject.tolist() + df.object.tolist(), df.subjectId.tolist() + df.objectId.tolist())}
emb_ents = model.encode(list(entities.values()))

pd.to_pickle(entities, 'data/vector_store/entities.pkl')
pd.DataFrame(emb_ents).to_parquet('data/vector_store/emb_ents.parquet')
"""

'\n## Only run this script the first time to generate the embeddings for the entities\n\nfrom sentence_transformers import SentenceTransformer\nimport pandas as pd\n\nmodel = SentenceTransformer("./results/domain_adaptation_model", device=\'cuda\')\n#model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", device=\'cuda\')\n\ndf = pd.read_parquet(\'data/triples_corpus.parquet\')\ndata = pd.read_parquet(\'data/mlt_data_publications.parquet\', \n                       columns=[\'paperId\', \'title\', \'abstract\', \'venue\', \'s2FieldsOfStudy\',\n                                \'publicationDate\', \'authors\'])\n\nentities = {i: e for e, i in zip(df.subject.tolist() + df.object.tolist(), df.subjectId.tolist() + df.objectId.tolist())}\nemb_ents = model.encode(list(entities.values()))\n\npd.to_pickle(entities, \'data/vector_store/entities.pkl\')\npd.DataFrame(emb_ents).to_parquet(\'data/vector_store/emb_ents.parquet\')\n'

In [1]:
import utils
import importlib
importlib.reload(utils)

<module 'utils' from '/home/raul/Escritorio/extra/misis/ml_tech/mlt_project/utils/__init__.py'>

## Ejemplo RAG:

In [2]:
usr_msg = "Which papers were published in the venue ’IEEE Transactions on Medical Imaging’?"

reception_answer = utils.request_agent(usr_msg, role="recepcionist",
                                       max_tokens=1000)
response = reception_answer['response']
if response=="redirect":
    emb_q = utils.CUSTOM_MODEL.encode(usr_msg)
    res_ir =  utils.get_top_k_relevant_info(emb_q, k=10)
    rag_prompt = f"Please read the following information:\n{res_ir}\nand use it to answer the user's question:\n{usr_msg}"
        
    response = utils.request_agent(rag_prompt, role="analyst", 
                                   temperature=0.3, response_format="text")

print()
print('**** Augmented response ****')
print(response['response'])

**** retrieved ids ****
 ['ac9748ea3945eb970cc32a37db7cfdfd0f22e74c', '7b8985fb105bd863501bb366f48fc55bbd935424', '0721c8eb12ea00b7c3769a5e40592b65d8e7a71b', '523c82c922f761deccd85f95d95b8a4bb34bef5f', 'ab657a056195325116f056cbfdca48a483453e3d', 'a2c3fd9a7d0813a88cbed195b08b0fe30d790aa7', 'ab27aad2ee38379825be9bff95e6a1f9f9981e1c', '3eaa82bffade4787ec1d20a86a5cd51afcdbfef8', '39524eeeeed96be8a2970caf0fa2673c9b4314b9', 'd607b773d2719a5948bab0c16500e4f00fd61df8']

**** Augmented response ****
The papers that were published in the venue 'IEEE Transactions on Medical Imaging' are:
- 'Ridge-based vessel segmentation in color images of the retina'
- 'Model-based quantitation of 3-D magnetic resonance angiographic images'
- 'Segmenting Retinal Blood Vessels With Deep Neural Networks'
- 'Geometrically correct 3-D reconstruction of intravascular ultrasound images by fusion with biplane angiography-methods and validation'
- 'Unsupervised Multiresolution Segmentation for Images with Low Depth of 

## Implementación del Agente RAG en un Bot de Telegram

In [2]:
@utils.BOT.message_handler(func=lambda msg: True)
def echo_all(message):
    print(' ***** User msg: ',message.text,' *****')
    usr_msg = message.text
    reception_answer = utils.request_agent(usr_msg, role="recepcionist", 
                                           max_tokens=1000)
    response = reception_answer['response']
    
    if response=="redirect":
        print('***** Looking for relevant information... *****')
        emb_q = utils.CUSTOM_MODEL.encode(usr_msg)
        res_ir =  utils.get_top_k_relevant_info(emb_q, k=15)[:35000]
        rag_prompt = f"Please read the following information:\n{res_ir}\nand use it to answer the user's question:\n{usr_msg}"
        
        response = utils.request_agent(rag_prompt, role="analyst", 
                                       temperature=0.4, response_format="text").get('response', 'Sorry, I could not find an answer to your question. Try again later.')
        
    print(' ***** Agent msg: ',response, ' *****')
    utils.BOT.reply_to(message, response)
    
utils.BOT.infinity_polling()

 ***** User msg:  in which fields of study does the author T. Feder work?  *****
***** Looking for relevant information... *****
**** retrieved ids ****
 ['72c3d786-cb17-3a13-9011-12dffc9f2357', '46765511-b961-3ff6-a23d-3ea8b6b2c950', 'fb9a4f64-7033-3b5e-a0a7-138defe2fc16', '173845e3-ff41-322f-b212-f6aa3f8cc104', 'ae9016bb-b759-36de-a691-0664de814539', 'f9a62c7a-27f1-3112-a2fa-1b057699dac6', '746ae1c2-f83c-34ec-8c49-eb14866a789a', '2024d957-f94b-35ee-b518-8e3e1bf7548b', '6d0d69d3-f24c-35d9-a301-356007c3ce28', 'a9c20f66-2028-3d81-a5c8-f42740f808f9', '54b1834c-8853-358a-83c2-b959e3d18092', 'e1b1b884-bd6d-3664-9040-f02258e28ce0', 'ccc06765f3490651a7eaaa964e9dded9508bd8be', '4bdbd65f-66c3-3866-bd8f-bc4b8f4c43ae', '66118079-6c93-39ea-b32a-e049be45b81b']
 ***** Agent msg:  The author T. Feder works in the field of Mathematics.  *****
 ***** User msg:  Which author has co authored with  R Mathies?  *****
***** Looking for relevant information... *****
**** retrieved ids ****
 ['3309273', '224

2024-05-25 11:07:58,068 (__init__.py:1086 MainThread) ERROR - TeleBot: "Infinity polling exception: HTTPSConnectionPool(host='api.telegram.org', port=443): Read timed out. (read timeout=25)"
2024-05-25 11:07:58,069 (__init__.py:1088 MainThread) ERROR - TeleBot: "Exception traceback:
Traceback (most recent call last):
  File "/home/raul/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 537, in _make_request
    response = conn.getresponse()
  File "/home/raul/.local/lib/python3.10/site-packages/urllib3/connection.py", line 466, in getresponse
    httplib_response = super().getresponse()
  File "/usr/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/usr/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/usr/lib/python3.10/http/client.py", line 279, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/usr/lib/python3.10/socket.py", line 705, 

 ***** User msg:  which authors works in computer vision AI and had published in arxiv?  *****
***** Looking for relevant information... *****
**** retrieved ids ****
 ['1699080', '144562609', '145689002', '145340672', '2052135690', 'a473f545318325ba23b7a6b477485d29777ba873', '2069319183', '3025260', '144467092', '2754804', '5c378ca2e4699eaf763de9f8ec02ca89860bb1cf', '1764560', '3229686', '81338045', '1721484']
 ***** Agent msg:  I have analyzed the provided data, and I found that there are no authors who have published specifically in arXiv and work in the field of Computer Vision and AI. However, I can provide you with a list of authors who work in Computer Vision and AI and have published in various conferences and journals.

1. Devendra Singh Chaplot: Works in Computer Vision and AI, and has published the paper 'Neural Topological SLAM for Visual Navigation' in Computer Vision and Pattern Recognition.

2. R. Salakhutdinov: Works in Computer Vision and AI, and has co-authored the pa

2024-05-25 11:08:51,792 (__init__.py:1092 MainThread) ERROR - TeleBot: "Infinity polling: polling exited"
2024-05-25 11:08:51,792 (__init__.py:1094 MainThread) ERROR - TeleBot: "Break infinity polling"
