In [1]:

from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
import json
import re
import uuid
import requests
from bs4 import BeautifulSoup

In [2]:
import chromadb

In [3]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0.5,
    timeout=None,
    max_retries=3,
    api_key="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",
)

In [4]:
prompt = PromptTemplate.from_template('''
### SCRAPED TEXT: {page}

### TASK INSTRUCTIONS:
Based on the above content, please extract and return information about the drug in the following categories. For each, list specific details provided, or use "not specified" if no information is found and no other details addition. 

### REQUIRED INFORMATION:
1. **Drug Name** - Primary name of the drug and common brand names.
2. **Recommended Foods** - List any foods recommended when taking this drug (if none, specify "none").
3. **Foods to Avoid** - List any foods that should be avoided when taking this drug.
4. **Interacting Drugs** - Provide a list of other drugs that interact with this drug. If a specific waiting period is mentioned for timing between medications, include the wait time in hours; if none is specified, use -1.
5. **Common Side Effects** - Symptoms commonly reported by patients using this drug.
6. **Serious Side Effects** - Symptoms or reactions that may require immediate medical attention.
7. **Primary Uses** - The main medical conditions this drug is prescribed to treat.

### SAMPLE FORMAT:
{{
    'drug': 'SampleMedicine',
    'super_food': ['none'], 
    'bad_food': ['grapefruit juice', 'infant soy formula', 'cheese', 'yogurt'], 
    'interactive_drug': [
        {{'name': 'Calcium carbonate', 'delta': 4}}, 
        {{'name': 'Sevelamer', 'delta': 4}}, 
        {{'name': 'Lanthanum', 'delta': 4}}, 
        {{'name': 'Cholestyramine', 'delta': 4}}, 
        {{'name': 'Sodium polystyrene sulfonate', 'delta': 4}}
    ]
    'common_symptom': ['fever', 'increased or change in appetite', 'weight loss or weight gain', 'changes in menstrual periods', 'vomiting', 'diarrhea'], 
    'serious_symptom': ['hives', 'difficult breathing', 'diarrhea', 'high blood sugar', 'increased thirst'], 
    'treatment': ['Hypothyroidism', 'Thyroid cancer', 'Myxedema coma']
}}

### OUTPUT FORMAT:
Return the information in STRICT JSON format, following this example:

{{
    "drug": "ExampleDrugName",
    "super_food": ["list any good foods here"],
    "bad_food": ["list any foods to avoid here"],
    "interactive_drug": [
        {{ "name": "OtherDrugName", "delta": integer in hours or -1 }}
    ],
    "common_symptom": ["list common symptoms here"],
    "serious_symptom": ["list serious symptoms here"],
    "treatment": ["main treatments or conditions for this drug"]
}}
### IMPORTANT:
- Output **only JSON** with no extra commentary or explanation.
- If a category has no information, use "not specified" or an empty list as applicable.

''')

In [5]:
def data_from(drug):
    gen = llm.invoke(f"{drug}'s most common generic name only").content.lower().replace(" ","-")
    response = requests.get(f"https://www.drugs.com/{gen}.html")
    if response.status_code != 200:
        response = requests.get(f"https://www.drugs.com/{drug.replace(" ","-")}.html")
        if response.status_code != 200:
            return
    soup = BeautifulSoup(response.content, "html.parser")
    content = soup.find('div', id='content')
    text = content.get_text(separator=" ", strip=True)
    return text
        

In [6]:
def get_salt(medName):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Connection": "keep-alive",
    }
    try:
        response = requests.get(f"https://www.1mg.com/drugs/{medName}", headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, "html.parser")
        
        info_card = soup.find("div", class_="saltInfo")
        if info_card:
            drug = info_card.find('a')
            if drug:
                return drug.get_text().split('+')
            else:
                print("No salt link found.")
                return []
        else:
            print("Salt information not found.")
            return []

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return []

In [7]:
def llmModel(medName):
    chain_extract = prompt | llm
    y = get_salt(medName)
    for x in y:
        try:
            r = re.sub(r"\s*\(.*?\)", "", x.strip())
            res = chain_extract.invoke(input={'page':data_from(r)})
            return json.loads(res.content)
        except:
            print("Some Error Occured")


In [8]:
x = llmModel("thyronorm-50mcg-tablet-357013")
# x = llmModel("calpol-650-tablet-842047")
# x = llmModel("omnacortil-10-tablet-dt-150936")
# x = llmModel("eltroxin-50mcg-tablet-641335")
# x = llmModel("ace-q10-softgel-capsule-977025")

In [9]:
print(x)

{'drug': 'Levothyroxine', 'super_food': [], 'bad_food': ['grapefruit juice', 'infant soy formula', 'soybean flour', 'cotton seed meal', 'walnuts', 'high-fiber foods', 'milk', 'dairy products', 'cheese', 'yogurt'], 'interactive_drug': [{'name': 'Calcium carbonate', 'delta': 4}, {'name': 'Sevelamer', 'delta': 4}, {'name': 'Lanthanum', 'delta': 4}, {'name': 'Cholestyramine', 'delta': 4}, {'name': 'Colesevelam', 'delta': 4}, {'name': 'Colestipol', 'delta': 4}, {'name': 'Iron supplements', 'delta': 4}, {'name': 'Sucralfate', 'delta': 4}, {'name': 'Sodium polystyrene sulfonate', 'delta': 4}, {'name': 'Esomeprazole', 'delta': 4}, {'name': 'Lansoprazole', 'delta': 4}, {'name': 'Omeprazole', 'delta': 4}, {'name': 'Rabeprazole', 'delta': 4}, {'name': 'Orlistat', 'delta': -1}, {'name': 'Phenobarbital', 'delta': -1}, {'name': 'Rifampin', 'delta': -1}, {'name': 'Ketamine', 'delta': -1}, {'name': 'Steroid medicines', 'delta': -1}, {'name': 'Amitriptyline', 'delta': -1}, {'name': 'Maprotiline', 'delt

In [15]:
client = chromadb.PersistentClient(path="db")

In [16]:
collection = client.delete_collection(name="medDB")

In [17]:
collection = client.get_or_create_collection(name="medDB")

In [26]:
print(collection.count())

27


In [25]:
for i in x['interactive_drug']:
    collection.add(documents=str(i['name']), ids=[str(uuid.uuid4())], metadatas={"name":x['drug']})

In [None]:
json_strings = collection.query(query_texts=["calcium supplements"],n_results=2).get('documents')
# parsed_data = [json.loads(item) for item in json_strings]
# print(parsed_data)

In [None]:
# chain_extract = promt|llm
# res = chain_extract.invoke(input={'page':data_from("Hydrea")})
# print(res.content)
print(json_strings)
# res = llm.invoke("paracetamol generic name")
# print(res.content)