In [2]:
"""
## Only run this script the first time to generate the embeddings for the entities

from sentence_transformers import SentenceTransformer
import pandas as pd

model = SentenceTransformer("./results/domain_adaptation_model", device='cuda')
#model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", device='cuda')

df = pd.read_parquet('data/triples_corpus.parquet')
data = pd.read_parquet('data/mlt_data_publications.parquet', 
                       columns=['paperId', 'title', 'abstract', 'venue', 's2FieldsOfStudy',
                                'publicationDate', 'authors'])

entities = {i: e for e, i in zip(df.subject.tolist() + df.object.tolist(), df.subjectId.tolist() + df.objectId.tolist())}
emb_ents = model.encode(list(entities.values()))

pd.to_pickle(entities, 'data/vector_store/entities.pkl')
pd.DataFrame(emb_ents).to_parquet('data/vector_store/emb_ents.parquet')
"""

'\n## Only run this script the first time to generate the embeddings for the entities\n\nfrom sentence_transformers import SentenceTransformer\nimport pandas as pd\n\nmodel = SentenceTransformer("./results/domain_adaptation_model", device=\'cuda\')\n#model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2", device=\'cuda\')\n\ndf = pd.read_parquet(\'data/triples_corpus.parquet\')\ndata = pd.read_parquet(\'data/mlt_data_publications.parquet\', \n                       columns=[\'paperId\', \'title\', \'abstract\', \'venue\', \'s2FieldsOfStudy\',\n                                \'publicationDate\', \'authors\'])\n\nentities = {i: e for e, i in zip(df.subject.tolist() + df.object.tolist(), df.subjectId.tolist() + df.objectId.tolist())}\nemb_ents = model.encode(list(entities.values()))\n\npd.to_pickle(entities, \'data/vector_store/entities.pkl\')\npd.DataFrame(emb_ents).to_parquet(\'data/vector_store/emb_ents.parquet\')\n'

In [3]:
import utils
import importlib
importlib.reload(utils)



<module 'utils' from '/home/estudiante/mlt_project/utils/__init__.py'>

## RAG example:

In [18]:
usr_msg = "Which papers were published in the venue ’IEEE Transactions on Medical Imaging’?"

reception_answer = utils.request_agent(usr_msg, role="recepcionist",
                                       max_tokens=1000)
response = reception_answer['response']
if response=="redirect":
    emb_q = utils.CUSTOM_MODEL.encode(usr_msg)
    res_ir =  utils.get_top_k_relevant_info(emb_q, k=10)
    rag_prompt = f"Please read the following information:\n{res_ir}\nand use it to answer the user's question:\n{usr_msg}"
        
    response = utils.request_agent(rag_prompt, role="analyst", temperature=0.3)
    
print(response)

{'response': "The papers that were published in the venue 'IEEE Transactions on Medical Imaging' are: 'Ridge-based vessel segmentation in color images of the retina', 'Prospective motion correction of X-ray images for coronary interventions', 'Segmenting Retinal Blood Vessels With Deep Neural Networks', 'Model-based quantitation of 3-D magnetic resonance angiographic images', 'Geometrically correct 3-D reconstruction of intravascular ultrasound images by fusion with biplane angiography-methods and validation'."}


## RAG Agent Deployment in a Telegram Bot

In [8]:
@utils.BOT.message_handler(func=lambda msg: True)
def echo_all(message):
    print(message.text, type(message))
    usr_msg = message.text
    reception_answer = utils.request_agent(usr_msg, role="recepcionist", 
                                       model='mixtral-8x7b-32768', max_tokens=1000)
    response = reception_answer['response']
    
    if response=="redirect":
        emb_q = utils.CUSTOM_MODEL.encode(usr_msg)
        res_ir =  utils.get_top_k_relevant_info(emb_q, k=10)
        rag_prompt = f"Please read the following information:\n{res_ir}\nand use it to answer the user's question:\n{usr_msg}"
        
        response = utils.request_agent(rag_prompt, role="analyst", temperature=0.3)
        
    print(response, '**'*10)
    utils.BOT.reply_to(message, response)
    
utils.BOT.infinity_polling()

Hi! <class 'telebot.types.Message'>
Hi! How can I help you? You can ask questions about scientific publications, authors, fields of study, and venues. If I have relevant information, I will gladly provide it. ********************
In which fields of study does the author N. Flyer work? <class 'telebot.types.Message'>
I will look for the information you need. The author N. Flyer works in the field of study of Astrophysics. ********************
Wich author has co authored with lavarone A? <class 'telebot.types.Message'>
The authors who have co-authored with Lavarone A are: 
 - Gullo, S. 
 - Zappatore, M. 
 - Oliveto, R. 
 - Mosca, G. 
 - Squillero, M. 
 - Neri, F. ********************
Which authors' works are in artificial intelligence? <class 'telebot.types.Message'>
{'response': 'The authors whose works are in the field of artificial intelligence include Heather Barber, D. Kudenko, Milind Tambe, W. Johnson, Randolph M. Jones, F. Koss, J. Laird, P. Rosenbloom, K. Schwamb, and S. Seiden.'

2024-05-19 09:03:24,725 (__init__.py:1101 MainThread) ERROR - TeleBot: "Infinity polling: polling exited"
2024-05-19 09:03:24,728 (__init__.py:1103 MainThread) ERROR - TeleBot: "Break infinity polling"
