In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
import altair as alt
import numpy as np
from dotenv import load_dotenv

In [2]:
load_dotenv()

host = os.getenv("MYSQL_HOST", "localhost")
port = os.getenv("MYSQL_PORT", "3306")
user = os.getenv("MYSQL_USER","root")
password = ""
db = os.getenv("MYSQL_DB", "rxnorm")

engine = create_engine(
    f"mysql+pymysql://{user}:{password}@{host}:{port}/{db}?charset=utf8mb4"
)


In [3]:
from sqlalchemy import text

def Searchbar(term):
    # Use text() to safely wrap your query
    sql = text("SELECT * FROM RXNCONSO WHERE STR LIKE :term AND TTY = 'BN'")
    
    with engine.connect() as conn:
        # Pass the parameter as a dictionary
        result = pd.read_sql(sql, conn, params={'term': f'%{term}%'})
    return result

x = Searchbar("Tylenol")
list(x["STR"]) 

['Tylenol',
 'Tylenol PM',
 'Tylenol with Codeine',
 'Tylenol',
 'Tylenol PM',
 'Tylenol with Codeine',
 'Tylenol',
 'Tylenol PM',
 'Tylenol with Codeine']

In [4]:
def Fetch_Drug_Form(name):

    query1 = text("""
        SELECT RXCUI 
        FROM RXNCONSO 
        WHERE STR = :name AND TTY = 'BN'
    """)

    with engine.connect() as conn:
        result1 = pd.read_sql(query1, conn, params={"name": name})

    if result1.empty:
        return pd.DataFrame()

    drug_id = result1["RXCUI"].iloc[0]

    query2 = text("""
        SELECT DISTINCT
            r.RXCUI2 AS Drug_Form_ID, 
            r.RELA AS Relation, 
            c.STR AS Drug_Form
        FROM RXNCONSO c
        JOIN RXNREL r ON c.RXCUI = r.RXCUI2
        WHERE r.RXCUI1 = :drug_id 
          AND c.TTY = 'DP'
    """)

    with engine.connect() as conn:
        result = pd.read_sql(query2, conn, params={"drug_id": drug_id})

    # Create Product_Name first
    result["Product_Name"] = result["Drug_Form"].str.extract(r'\[(.*?)\]')
    result["Product_Name"] = result["Product_Name"].fillna(result["Drug_Form"])

    # Case-insensitive dedup
    result = (
        result.assign(Product_Name_lower=result["Product_Name"].str.lower())
              .drop_duplicates(subset="Product_Name_lower")
              .drop(columns="Product_Name_lower")
    )

    return result

Fetch_Drug_Form("Tylenol PM")

Unnamed: 0,Drug_Form_ID,Relation,Drug_Form,Product_Name
0,1092378,has_ingredient,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,Tylenol PM Extra Strength
1,1092378,has_ingredient,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,Tylenol PM
2,1092378,has_ingredient,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,"Tylenol PM Extra Strength, CVP HEALTH"
3,1092378,has_ingredient,DIPHENHYDRAMINE HYDROCHLORIDE 25 mg / ACETAMIN...,Lil Drug Store Tylenol PM Extra Strength


In [5]:
def Show_Ingredients(ID):
    query = f"""
    SELECT r.RXCUI2 as Ingredient_ID,r.RELA as Relation,c.STR as Ingredient
    from RXNCONSO c
    JOIN RXNREL r
    ON c.RXCUI = r.RXCUI2
    WHERE r.RXCUI1 = "{ID}" and c.TTY = "SCDC"
    GROUP by Ingredient_ID,Relation,Ingredient;
    """
    result1 = pd.read_sql(query, engine)
    return result1

df = Show_Ingredients(1092378)
ingredients = list(df["Ingredient_ID"])
df

Unnamed: 0,Ingredient_ID,Relation,Ingredient
0,315266,constitutes,acetaminophen 500 MG
1,901813,constitutes,diphenhydramine hydrochloride 25 MG


In [6]:
def Dose_Form(ID):
    query = f"""
    SELECT r.RXCUI2 as Ingredient_ID,r.RELA as Relation,c.STR as Ingredient
    from RXNCONSO c
    JOIN RXNREL r
    ON c.RXCUI = r.RXCUI2
    WHERE r.RXCUI1 = "{ID}" and c.TTY = "DF"
    GROUP by Ingredient_ID,Relation,Ingredient;
    """
    result = pd.read_sql(query, engine)
    DF = result["Ingredient"][0]
    return DF
Dose_Form(209387)

'Oral Tablet'

In [7]:
def get_generic(ID):
    query = f"""
    SELECT r.RXCUI2 as Ingredient_ID,c.STR as Ingredient
    from RXNCONSO c
    JOIN RXNREL r
    ON c.RXCUI = r.RXCUI2
    WHERE r.RXCUI1 = "{ID}" and c.TTY = "SCD"
    GROUP by Ingredient_ID,Ingredient;
    """
    res = pd.read_sql(query, engine)
    return res
    
get_generic(209387)

Unnamed: 0,Ingredient_ID,Ingredient
0,313782,acetaminophen 325 MG Oral Tablet


In [8]:
def Exact_Drugs(Ing_lst,ID):
    s = ""
    for i,j in enumerate(Ing_lst):
        if i == (len(Ing_lst) - 1):
            s+="r1.RXCUI1 = "+j
        else:
            s+="r1.RXCUI1 = "+j+" or "
            
    query = f"""
    WITH base AS (
        SELECT r2.RXCUI as ID, r2.STR as DP, r1.RXCUI1 as Ingredient_ID
        FROM RXNREL r1
        JOIN RXNCONSO r2
        ON r1.RXCUI2 = r2.RXCUI
        WHERE ({s}) and r2.TTY = "DP"
    ),
    keys_all AS (
        SELECT ID
        FROM base
        GROUP by ID
        HAVING COUNT(DISTINCT Ingredient_ID) = {len(Ing_lst)}
    )
    SELECT b.ID,b.DP
    FROM base b
    JOIN keys_all k
    ON b.ID = k.ID
    WHERE b.Id != {ID}
    GROUP BY b.ID, b.DP
    """
    
    res = pd.read_sql(query, engine)
    
    lst = []
    drp = []
    for j,i in enumerate(res["DP"]): 
        if "[" in i:
            d = i.split("[")
            lst.append(d[-1][:-1])
        else:
            lst.append("Generic")
            drp.append(j)

    res["Product_Name"] = lst
    
    Product = []
    for j,i in enumerate(res["Product_Name"]):
        if i.lower() in Product:
            drp.append(j)
        else:
            Product.append(i.lower())
    res = res.drop(drp)
    res = res.reset_index(drop=True)
    return res
    
df = Exact_Drugs(ingredients,1092378)
df

Unnamed: 0,ID,DP,Product_Name
0,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,Pain Relief PM Extra Strength
1,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,Pain Relief PM
2,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,CAREALL Non-Aspirin PM Extra Strength
3,1092189,DIPHENHYDRAMINE HYDROCHLORIDE 25 mg / ACETAMIN...,CounterAct PM
4,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,ACETAMINOPHEN PM
5,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,Pain Reliever PM
6,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,Pain Reliever PM Extra strength
7,1092189,DIPHENHYDRAMINE HYDROCHLORIDE 25 mg / ACETAMIN...,Vicks ZzzQuil Night Pain
8,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,EXTRA STRENGTH PAIN RELIEF PM
9,1092189,ACETAMINOPHEN 500 mg / DIPHENHYDRAMINE HYDROCH...,EXTRA STRENGTH PAIN RELIEVER PM


In [9]:
def Union_Drugs(Ing_lst,ID):
    s = ""
    for i,j in enumerate(Ing_lst):
        if i == (len(Ing_lst) - 1):
            s+="r1.RXCUI1 = "+j
        else:
            s+="r1.RXCUI1 = "+j+" or "
            
    query = f"""
    WITH base AS (
        SELECT r2.RXCUI as ID, r2.STR as DP, r1.RXCUI1 as Ingredient_ID
        FROM RXNREL r1
        JOIN RXNCONSO r2
        ON r1.RXCUI2 = r2.RXCUI
        WHERE ({s}) and r2.TTY = "DP"
    ),
    keys_all AS (
        SELECT ID
        FROM base
        GROUP by ID
        HAVING COUNT(DISTINCT Ingredient_ID) < {len(Ing_lst)}
    )
    SELECT b.ID,b.DP
    FROM base b
    JOIN keys_all k
    ON b.ID = k.ID
    WHERE b.Id != {ID}
    GROUP BY b.ID, b.DP
    """
    
    res = pd.read_sql(query, engine)
    
    lst = []
    drp = []
    for j,i in enumerate(res["DP"]): 
        if "[" in i:
            d = i.split("[")
            lst.append(d[-1][:-1])
        else:
            lst.append("Generic")
            drp.append(j)

    res["Product_Name"] = lst
    
    Product = []
    for j,i in enumerate(res["Product_Name"]):
        if i.lower() in Product:
            drp.append(j)
        else:
            Product.append(i.lower())
    res = res.drop(drp)
    res = res.drop_duplicates(subset="ID")
    res = res.reset_index(drop=True)
    return res
    
df = Union_Drugs(ingredients,1092378)
df

Unnamed: 0,ID,DP,Product_Name
0,198439,"ACETAMINOPHEN 500 mg ORAL CAPSULE, LIQUID FILL...",CVS Health Extra Strength Acetaminophen Softgels
1,198440,ACETAMINOPHEN 500 mg ORAL TABLET [Acetaminophe...,Acetaminophen ES
2,200977,"ACETAMINOPHEN 500 mg ORAL TABLET, FILM COATED ...",PANADOL Extra Strength
3,209443,ACETAMINOPHEN 500 mg ORAL TABLET [Extra Streng...,Extra Strength Mapap
4,209459,"ACETAMINOPHEN 500 mg ORAL TABLET, FILM COATED ...",Tylenol Extra Strength
5,209890,ACETAMINOPHEN 500 mg ORAL CAPSULE [MAPAP Extra...,MAPAP Extra Strength
6,247324,ACETAMINOPHEN 500 mg / DEXTROMETHORPHAN HYDROB...,Theraflu Flu Relief Max Daytime
7,307686,ACETAMINOPHEN 500 mg / CAFFEINE 65 mg ORAL TAB...,Tension Headache Aspirin Free
8,404172,ACETAMINOPHEN 500 mg / CAFFEINE 65 mg ORAL TAB...,Excedrin Tension Headache
9,901814,IBUPROFEN 200 mg / DIPHENHYDRAMINE HYDROCHLORI...,Ibuprofen PM


In [10]:
def Fetch_Ingredients(ID):
    query = f"""
    SELECT c.STR as Ingredient
    from RXNCONSO c
    JOIN RXNREL r
    ON c.RXCUI = r.RXCUI2
    WHERE r.RXCUI1 = "{ID}" and c.TTY = "SCDC"
    GROUP by Ingredient;
    """
    result1 = pd.read_sql(query, engine)
    return list(result1["Ingredient"])

Fetch_Ingredients(1092378)

['acetaminophen 500 MG', 'diphenhydramine hydrochloride 25 MG']

In [11]:
def Fetch_Ingredient_Name(ID):
    query = f"""
    SELECT STR as Ingredient
    from RXNCONSO
    WHERE RXCUI = "{ID}" and TTY = "SCDC"
    GROUP by Ingredient;
    """
    result = pd.read_sql(query, engine)
    return result["Ingredient"][0]

Fetch_Ingredient_Name(315266)

'acetaminophen 500 MG'

In [12]:
df = df.head(10)

In [13]:
from sklearn.preprocessing import MultiLabelBinarizer
import pandas as pd

def Fetch_Heatmap(df):
    # Ensure ID column exists
    if "ID" not in df.columns:
        raise ValueError("DataFrame must contain an 'ID' column")

    # 1. Fetch ingredients once per unique ID
    unique_ids = df["ID"].dropna().unique()

    def safe_fetch(i):
        res = Fetch_Ingredients(i)
        if res is None:
            return []
        if isinstance(res, str):
            return [res]
        try:
            return list(res)
        except TypeError:
            return []

    id_to_ingredients = {i: safe_fetch(i) for i in unique_ids}

    # 2. Map ingredients back to dataframe rows
    ingredient_lists = df["ID"].map(id_to_ingredients)

    # Replace any remaining NaN with empty list
    ingredient_lists = ingredient_lists.apply(
        lambda x: x if isinstance(x, list) else []
    )

    # 3. Create binary matrix
    mlb = MultiLabelBinarizer()
    heatmap_matrix = mlb.fit_transform(ingredient_lists)

    # 4. Convert to DataFrame
    heatmap_df = pd.DataFrame(
        heatmap_matrix,
        columns=mlb.classes_,
        index=df.index
    )

    # 5. Concatenate with original dataframe
    return pd.concat([df, heatmap_df], axis=1)

In [14]:
heatmap_df = Fetch_Heatmap(df)
heatmap_df

Unnamed: 0,ID,DP,Product_Name,acetaminophen 500 MG,caffeine 65 MG,dextromethorphan hydrobromide 15 MG,diphenhydramine hydrochloride 25 MG,ibuprofen 200 MG
0,198439,"ACETAMINOPHEN 500 mg ORAL CAPSULE, LIQUID FILL...",CVS Health Extra Strength Acetaminophen Softgels,1,0,0,0,0
1,198440,ACETAMINOPHEN 500 mg ORAL TABLET [Acetaminophe...,Acetaminophen ES,1,0,0,0,0
2,200977,"ACETAMINOPHEN 500 mg ORAL TABLET, FILM COATED ...",PANADOL Extra Strength,1,0,0,0,0
3,209443,ACETAMINOPHEN 500 mg ORAL TABLET [Extra Streng...,Extra Strength Mapap,1,0,0,0,0
4,209459,"ACETAMINOPHEN 500 mg ORAL TABLET, FILM COATED ...",Tylenol Extra Strength,1,0,0,0,0
5,209890,ACETAMINOPHEN 500 mg ORAL CAPSULE [MAPAP Extra...,MAPAP Extra Strength,1,0,0,0,0
6,247324,ACETAMINOPHEN 500 mg / DEXTROMETHORPHAN HYDROB...,Theraflu Flu Relief Max Daytime,1,0,1,0,0
7,307686,ACETAMINOPHEN 500 mg / CAFFEINE 65 mg ORAL TAB...,Tension Headache Aspirin Free,1,1,0,0,0
8,404172,ACETAMINOPHEN 500 mg / CAFFEINE 65 mg ORAL TAB...,Excedrin Tension Headache,1,1,0,0,0
9,901814,IBUPROFEN 200 mg / DIPHENHYDRAMINE HYDROCHLORI...,Ibuprofen PM,0,0,0,1,1
