In [1]:
import os
from dotenv import load_dotenv

from tree_rag import get_llm_chain, get_entity_retriever_prompt, get_search_terms_retriever_prompt, get_filtered_urls

In [2]:
# Load the Environment Variables
load_dotenv()
# Set up the OpenAI API key
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "")

In [3]:
user_insights = "I want to know about the integration of AI in healthcare, it's possible benefits and limitations and practical implementations."

In [4]:
entity_prompt = get_entity_retriever_prompt()
llm = get_llm_chain(entity_prompt)

In [5]:
response = llm.invoke({"user_insights": user_insights})
response.content

2024-06-05 21:09:22,725 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


"['AI in healthcare', 'benefits', 'limitations', 'practical implementations']"

In [6]:
key_entities = response.content

In [7]:
search_terms_prompt = get_search_terms_retriever_prompt()
llm = get_llm_chain(search_terms_prompt)

In [8]:
response = llm.invoke({"key_entities": key_entities})
response.content

2024-06-05 21:09:24,292 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


"['benefits of AI in healthcare', 'practical implementations of AI in healthcare', 'limitations of AI in healthcare']"

In [21]:
import ast
search_terms = ast.literal_eval(response.content)
search_terms

['benefits of AI in healthcare',
 'practical implementations of AI in healthcare',
 'limitations of AI in healthcare']

In [23]:
url_sources = []
for search_term in search_terms:
    url_sources += get_filtered_urls(search_term)

In [26]:
url_sources

['https://emeritus.org/blog/ai-and-ml-benefits-of-ai-in-healthcare/',
 'https://www.ibm.com/think/insights/ai-healthcare-benefits',
 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6616181/',
 'https://d3lkc3n5th01x7.cloudfront.net/wp-content/uploads/2023/02/15020226/AI-in-Healthcare-3.png?sa=X&ved=2ahUKEwiIosfK58SGAxUJa2wGHQmkDxEQ_B16BAgAEAI',
 'https://www.leewayhertz.com/ai-use-cases-in-healthcare/',
 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9908503/',
 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9908503/#Abs1title',
 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9908503/#Sec1title']

In [27]:
len(url_sources)

8

### So, 3 search results lead to 8 articles.
### We should not treat all the articles as independent, because they are related to the same search result.
### We should keep the branches seperate for each search_term, this way we ensure that we are covering each aspect of the user insights.
### Merging all the urls together and then performing operations will give us half baked results.