In [1]:
"""
## Only run this script the first time to generate the embeddings for the entities

from sentence_transformers import SentenceTransformer
import pandas as pd

model = SentenceTransformer("./results/domain_adaptation_model", device='cuda')
#model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", device='cuda')

df = pd.read_parquet('data/triples_corpus.parquet')
data = pd.read_parquet('data/mlt_data_publications.parquet', 
                       columns=['paperId', 'title', 'abstract', 'venue', 's2FieldsOfStudy',
                                'publicationDate', 'authors'])

entities = {i: e for e, i in zip(df.subject.tolist() + df.object.tolist(), df.subjectId.tolist() + df.objectId.tolist())}
emb_ents = model.encode(list(entities.values()))

pd.to_pickle(entities, 'data/vector_store/entities.pkl')
pd.DataFrame(emb_ents).to_parquet('data/vector_store/emb_ents.parquet')
"""

'\n## Only run this script the first time to generate the embeddings for the entities\n\nfrom sentence_transformers import SentenceTransformer\nimport pandas as pd\n\nmodel = SentenceTransformer("./results/domain_adaptation_model", device=\'cuda\')\n#model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", device=\'cuda\')\n\ndf = pd.read_parquet(\'data/triples_corpus.parquet\')\ndata = pd.read_parquet(\'data/mlt_data_publications.parquet\', \n                       columns=[\'paperId\', \'title\', \'abstract\', \'venue\', \'s2FieldsOfStudy\',\n                                \'publicationDate\', \'authors\'])\n\nentities = {i: e for e, i in zip(df.subject.tolist() + df.object.tolist(), df.subjectId.tolist() + df.objectId.tolist())}\nemb_ents = model.encode(list(entities.values()))\n\npd.to_pickle(entities, \'data/vector_store/entities.pkl\')\npd.DataFrame(emb_ents).to_parquet(\'data/vector_store/emb_ents.parquet\')\n'

In [1]:
import utils
import importlib
importlib.reload(utils)

<module 'utils' from '/home/raul/Escritorio/extra/misis/ml_tech/mlt_project/utils/__init__.py'>

## Ejemplo RAG:

In [4]:
usr_msg = "Which papers were published in the venue ’IEEE Transactions on Medical Imaging’?"

reception_answer = utils.request_agent(usr_msg, role="recepcionist",
                                       max_tokens=1000)
response = reception_answer['response']
if response=="redirect":
    emb_q = utils.CUSTOM_MODEL.encode(usr_msg)
    res_ir =  utils.get_top_k_relevant_info(emb_q, k=10)
    rag_prompt = f"Please read the following information:\n{res_ir}\nand use it to answer the user's question:\n{usr_msg}"
        
    response = utils.request_agent(rag_prompt, role="analyst", 
                                   temperature=0.3, response_format="text")

print()
print('**** Augmented response ****')
print(response['response'])

**** retrieved ids ****
 ['ac9748ea3945eb970cc32a37db7cfdfd0f22e74c', '7b8985fb105bd863501bb366f48fc55bbd935424', '0721c8eb12ea00b7c3769a5e40592b65d8e7a71b', '523c82c922f761deccd85f95d95b8a4bb34bef5f', 'ab657a056195325116f056cbfdca48a483453e3d', 'a2c3fd9a7d0813a88cbed195b08b0fe30d790aa7', 'ab27aad2ee38379825be9bff95e6a1f9f9981e1c', '3eaa82bffade4787ec1d20a86a5cd51afcdbfef8', '39524eeeeed96be8a2970caf0fa2673c9b4314b9', 'd607b773d2719a5948bab0c16500e4f00fd61df8']

**** Augmented response ****
The papers that were published in the venue 'IEEE Transactions on Medical Imaging' are:
- 'Ridge-based vessel segmentation in color images of the retina'
- 'Segmenting Retinal Blood Vessels With Deep Neural Networks'
- 'Model-based quantitation of 3-D magnetic resonance angiographic images'
- 'Geometrically correct 3-D reconstruction of intravascular ultrasound images by fusion with biplane angiography-methods and validation'


## Implementación del Agente RAG en un Bot de Telegram

In [4]:
@utils.BOT.message_handler(func=lambda msg: True)
def echo_all(message):
    print(message.text, type(message))
    usr_msg = message.text
    reception_answer = utils.request_agent(usr_msg, role="recepcionist", 
                                       model='mixtral-8x7b-32768', max_tokens=1000)
    response = reception_answer['response']
    
    if response=="redirect":
        emb_q = utils.CUSTOM_MODEL.encode(usr_msg)
        res_ir =  utils.get_top_k_relevant_info(emb_q, k=10)
        rag_prompt = f"Please read the following information:\n{res_ir}\nand use it to answer the user's question:\n{usr_msg}"
        
        response = utils.request_agent(rag_prompt, role="analyst", 
                                       temperature=0.3, response_format="text").get('response', 'Sorry, I could not find an answer to your question. Try again later.')
        
    print(response, '**'*10)
    utils.BOT.reply_to(message, response)
    
utils.BOT.infinity_polling()

Hi! <class 'telebot.types.Message'>
Hi! How can I help you? You can ask questions about scientific publications, authors, fields of study, and venues. If I have relevant information, I will gladly provide it. ********************
Show me papers that have been published on arxiv.org and are about computer science. <class 'telebot.types.Message'>
[{'title': 'Paper 1 Title', 'authors': ['Author 1', 'Author 2'], 'venue': 'arXiv.org', 'field_of_study': 'Computer Science'}, {'title': 'Paper 2 Title', 'authors': ['Author 3', 'Author 4'], 'venue': 'arXiv.org', 'field_of_study': 'Computer Science'}, {'title': 'Paper 3 Title', 'authors': ['Author 5', 'Author 6'], 'venue': 'arXiv.org', 'field_of_study': 'Computer Science'}] ********************
What papers has Kirk Fiedler written? <class 'telebot.types.Message'>
Kirk Fiedler has written the following papers:

1. 'EyeSpy: supporting navigation through play'
2. "IS '97: model curriculum and guidelines for undergraduate degree programs in informati

2024-05-21 11:57:30,154 (__init__.py:1092 MainThread) ERROR - TeleBot: "Infinity polling: polling exited"
2024-05-21 11:57:30,154 (__init__.py:1094 MainThread) ERROR - TeleBot: "Break infinity polling"
