In [41]:
import os
import pandas as pd
import numpy as np
import faiss
from faiss import write_index
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
from config import CFG
import gradio as gr
import requests
import json
import GUI
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np


def _lcs(string, sub):
    """
    Computes longest common subsequence (LCS) for a pair of tokenized strings
    :param string : list of str : tokens from a string split using whitespace
    :param sub : list of str : shorter string, also split using whitespace
    :returns: length (list of int): length of the LCS between the two strings
    """

    if len(string) < len(sub):
        sub, string = string, sub

    str_len, sub_len = len(string), len(sub)
    lengths = [[0 for _ in range(sub_len + 1)] for _ in range(str_len + 1)]

    for j in range(1, sub_len + 1):
        for i in range(1, str_len + 1):
            if string[i - 1] == sub[j - 1]:
                lengths[i][j] = lengths[i - 1][j - 1] + 1
            else:
                lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1])

    return lengths[str_len][sub_len]


class Rouge(object):
    """
    Class for computing ROUGE-L score for a set of 
    candidate sentences for the MS COCO test set
    """

    def __init__(self):
        # vrama91: updated the value below based on discussion with Hovey
        self.beta = 1.2

    def calc_score(self, candidate, refs):
        """
        Compute ROUGE-L score given one candidate and references for an image
        :param candidate: str : candidate sentence to be evaluated
        :param refs: list of str : COCO reference sentences for the particular image to be evaluated
        :returns score: int (ROUGE-L score for the candidate evaluated against references)
        """

        assert(len(candidate) == 1)
        assert(len(refs) > 0)

        prec = []
        rec = []

        # split into tokens
        token_c = candidate[0].split()

        for reference in refs:
            # split into tokens
            token_r = reference.split()
            # compute the longest common subsequence
            lcs = _lcs(token_r, token_c)
            prec.append(lcs / float(len(token_c)))
            rec.append(lcs / float(len(token_r)))

        prec_max = max(prec)
        rec_max = max(rec)

        if prec_max != 0 and rec_max != 0:
            score = ((1 + self.beta ** 2) * prec_max * rec_max) / \
                float(rec_max + self.beta ** 2 * prec_max)
        else:
            score = 0.0

        return score

    def compute_score(self, gts, res):
        """
        Computes Rouge-L score given a set of reference and 
        candidate sentences for the dataset.
        :param gts: dict : ground_truth
        :param res: dict : results of predict
        :returns: average_score: float (mean ROUGE-L score)
        """

        score = []

        for idx in sorted(gts.keys()):
            hypo = res[idx]
            ref = gts[idx]
            score.append(self.calc_score(hypo, ref))

            # Sanity check
            assert(isinstance(hypo, list))
            assert(isinstance(ref, list))
            assert(len(hypo) == 1)
            assert(len(ref) > 0)

        average_score = np.mean(np.array(score))

        # convert to percentage
        return 100 * average_score, np.array(score)

    @staticmethod
    def method():
        return "ROUGE-L"

In [48]:
import pandas as pd

# Assuming your CSV file is named 'data.csv'
df = pd.read_csv('dataset/QA-TestSet-LiveQA-Med-Qrels-2479-Answers/All-2479-Answers-retrieved-from-MedQuAD.csv', engine='python')
print(df.head())

# Assuming 'df' is your original DataFrame

# Regular expression to match the structure of the 'Answer' column
pattern = r'Question:\s*(.*?)\s*URL:\s*(https?://[^\s]+)\s*Answer:\s*(.*)'

# Extracting the components into a new DataFrame
questions_df = df['Answer'].str.extract(pattern, expand=True)
questions_df.columns = ['Question', 'URL', 'Answer']

questions_df['Question'] = questions_df['Question'].str.replace(r'\(Also called:.*?\)', '', regex=True).str.strip()

                AnswerID                                             Answer
0  ADAM_0003147_Sec1.txt  Question: What is (are) Polycystic ovary syndr...
1  ADAM_0003147_Sec2.txt  Question: What causes Polycystic ovary syndrom...
2  ADAM_0002818_Sec2.txt  Question: What causes Noonan syndrome ?\r\nURL...
3  ADAM_0002818_Sec7.txt  Question: What are the complications of Noonan...
4  ADAM_0002818_Sec9.txt  Question: How to prevent Noonan syndrome ?\r\n...


In [51]:
questions_df.head(5)

Unnamed: 0,Question,URL,Answer
0,What is (are) Polycystic ovary syndrome ?,https://www.nlm.nih.gov/medlineplus/ency/artic...,Polycystic ovary syndrome is a condition in wh...
1,What causes Polycystic ovary syndrome ?,https://www.nlm.nih.gov/medlineplus/ency/artic...,PCOS is linked to changes in hormone levels th...
2,What causes Noonan syndrome ?,https://www.nlm.nih.gov/medlineplus/ency/artic...,Noonan syndrome is linked to defects in severa...
3,What are the complications of Noonan syndrome ?,https://www.nlm.nih.gov/medlineplus/ency/artic...,- Buildup of fluid in tissues of body (lymphed...
4,How to prevent Noonan syndrome ?,https://www.nlm.nih.gov/medlineplus/ency/artic...,Couples with a family history of Noonan syndro...


In [50]:

respuestas=questions_df['Answer'].tolist()

In [47]:
print(Rouge().calc_score(['hola que tal'],['hola que tal estas cariño','hola mi amor','how are you?']))

0.7176470588235294


In [2]:
import torch

# Verifica si CUDA está disponible
cuda_available = torch.cuda.is_available()

# Imprime si CUDA está disponible o no
print("CUDA disponible:", cuda_available)

# Imprime el dispositivo por defecto
if cuda_available:
    print("Dispositivo por defecto:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("Dispositivo por defecto: CPU")


CUDA disponible: True
Dispositivo por defecto: NVIDIA GeForce GTX 1650


In [3]:

client=GUI.MedicalChatBotGUI(model='llama2')

In [4]:
client.generate_faiss_db()
# CArga de datos y generacion de la BBDD vectorial

  return self.fget.__get__(instance, owner)()
100%|██████████| 238/238 [01:20<00:00,  2.95it/s]


Unnamed: 0,question,question_id,question_type,answer,focus,id,source,url,cui,semanticType,semanticGroup
0,What is (are) A guide to clinical trials for c...,0000001-1,information,"If you have cancer, a clinical trial may be an...",A guide to clinical trials for cancer,1,ADAM,https://www.nlm.nih.gov/medlineplus/ency/patie...,C0006826,T191,Disorders
6,What is (are) A1C test ?,0000003-1,information,A1C is a lab test that shows the average level...,A1C test,3,ADAM,https://www.nlm.nih.gov/medlineplus/ency/artic...,C0456984,T033,Disorders
8,What is (are) Aarskog syndrome ?,0000004-1,information,Aarskog syndrome is a very rare disease that a...,Aarskog syndrome,4,ADAM,https://www.nlm.nih.gov/medlineplus/ency/artic...,C0175701,T019,Disorders
9,What causes Aarskog syndrome ?,0000004-2,causes,Aarskog syndrome is a genetic disorder that is...,Aarskog syndrome,4,ADAM,https://www.nlm.nih.gov/medlineplus/ency/artic...,C0175701,T019,Disorders
10,What are the symptoms of Aarskog syndrome ?,0000004-3,symptoms,Symptoms of this condition include: Belly butt...,Aarskog syndrome,4,ADAM,https://www.nlm.nih.gov/medlineplus/ency/artic...,C0175701,T019,Disorders
...,...,...,...,...,...,...,...,...,...,...,...
46972,What is (are) Von Willebrand Disease ?,0000139-1,information,Von Willebrand disease (VWD) is a bleeding dis...,Von Willebrand Disease,139,NHLBI,http://www.nhlbi.nih.gov/health/health-topics/...,C0042974,T047,Disorders
46973,What causes Von Willebrand Disease ?,0000139-2,causes,Von Willebrand disease (VWD) is almost always ...,Von Willebrand Disease,139,NHLBI,http://www.nhlbi.nih.gov/health/health-topics/...,C0042974,T047,Disorders
46974,What are the symptoms of Von Willebrand Disease ?,0000139-3,symptoms,The signs and symptoms of von Willebrand disea...,Von Willebrand Disease,139,NHLBI,http://www.nhlbi.nih.gov/health/health-topics/...,C0042974,T047,Disorders
46975,How to diagnose Von Willebrand Disease ?,0000139-4,exams and tests,Early diagnosis of von Willebrand disease (VWD...,Von Willebrand Disease,139,NHLBI,http://www.nhlbi.nih.gov/health/health-topics/...,C0042974,T047,Disorders


In [5]:
demo = gr.ChatInterface(fn = client.make_inference, 
                        examples = ["What is diabetes?", "Is ginseng good for diabetes?", "What are the symptoms of diabetes?"], 
                        title = "Medical RAG Chatbot", 
                        description = "Medical RAG Chatbot is a chatbot that can help you with your medical queries. It is a rule-based chatbot that can answer your queries based on the information it has. It is not a replacement for a doctor. Please consult a doctor for any medical advice.",
                        )



In [6]:
demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Hey! I see you're interested in learning about poisonous plants. As a medical sciences bot, I can provide you with information on different types of poisonous plants and their effects on human health. Based on your query, I recommend checking out the following articles from the National Library of Medicine:

1. Poison ivy, oak, or sumac poisoning: This article provides information on the symptoms, treatment, and prevention of poison ivy, oak, or sumac exposure. It's important to note that small amounts of sap can remain under a person's fingernails for several days, so thorough cleaning is essential to remove the toxin.
2. Chinese restaurant syndrome: This article discusses the myths and facts surrounding MSG (monosodium glutamate), a food additive that has been linked to various symptoms in some people. According to the FDA, MSG is generally considered safe for most people when consumed in foods at normal levels.
3. Poinsettia plant exposure: This article addresses common concerns abo