In [207]:
import pandas as pd
import numpy as np
import requests
import plotly.express as px
from rapidfuzz import process
import warnings
import plotly.graph_objects as go
warnings.filterwarnings("ignore")

In [200]:
def parseAnswer(answer: str):
    recommendedDrugs = []
    splitted = answer.split("\n")
    i = 0
    while i < len(splitted):
        item = splitted[i]
        if "Drug:" in item:
            temp = {"drugName": item.split("Drug: ")[1]}
            for j in range(i+1, i+5):
                key = splitted[j].split(":")[0].split("- ")[1]
                value = splitted[j].split(": ")[1]
                if ((key == "Rating") | (key == "Useful Count")):
                    try:
                        value = float(value)
                    except:
                        value = np.nan
                temp[key] = value
            recommendedDrugs.append(temp)
            i += 5
        else:
            i += 1
    recommendedDrugs = pd.DataFrame(recommendedDrugs)
    return recommendedDrugs

def getOtherAlternates(topDrugs: list[str], df: pd.DataFrame):
    alternates = []
    drugs = df.name.unique()
    for drug in topDrugs:
        alternate, score, ind = process.extractOne(drug, drugs)
        if score > 80:
            df2 = df.loc[ind,:]
            alternates.append({
                "drug": drug,
                "alternate": alternate,
                "score": score,
                "ind": ind,
                "sideEffects": ",".join(df2.filter(regex=("sideEffect.*")).fillna("").unique()),
                "substitutes": ",".join(df2.filter(regex=("substitute.*")).fillna("").unique()),
                "uses": ",".join(df2.filter(regex=("use.*")).fillna("").unique()),
                "chemicalClass": df2["Chemical Class"],
                "habitForming": df2["Habit Forming"],
                "therapeuticClass": df2["Therapeutic Class"],
                "actionClass": df2["Action Class"]
            })
    return pd.DataFrame(alternates)

def querySourceDf(doc, drugsToSearch):
    sourceDf = pd.read_csv(f"./application/{doc}")
    sourceDf = sourceDf[sourceDf.drugName.isin(drugsToSearch)]
    sourceDf["date"] = sourceDf["date"].apply(lambda x:pd.to_datetime(pd.to_datetime(x).strftime("%Y-%m")))
    sourceDf = sourceDf.groupby(["drugName", "date"]).agg(
        avgRating = ("rating", "mean"),
        totalReviews = ("usefulCount","sum"),
        generalSentiment = ("sentiment_label","median")
    ).reset_index().sort_values(by=["drugName", "date"], ascending=True).reset_index(drop=True)
    return sourceDf

In [203]:
prompt = "Heart attack"
formData = {'query': prompt}
response = requests.post('http://127.0.0.1:5000/get_response', data=formData).json()

In [204]:
recommendedDrugs = parseAnswer(response["answer"])
df = pd.read_csv("./application/data/medicine_dataset.csv")
alternates = getOtherAlternates(recommendedDrugs.drugName.unique(), df)
finalDf = recommendedDrugs.merge(alternates, left_on="drugName", right_on="drug", how="left")
sourceDf = querySourceDf(response["doc"], finalDf.drugName.unique())

In [221]:
alternates

Unnamed: 0,drug,alternate,score,ind,sideEffects,substitutes,uses,chemicalClass,habitForming,therapeuticClass,actionClass
0,Lisinopril,davaindia lisinopril 2.5mg tablet,81.0,63019,"Itching,Irritation,Thinning of skin,Burning se...","Benkos Ointment,Tunesol-M Ointment,Clotus GM O...","Skin infections,",,No,DERMA,
1,Metoprolol,davaindia telmisartan+metoprolol succinate 40m...,81.0,58659,"Diarrhea,Abnormal liver function tests,Rash,","Xone Hospital 1000mg Injection,Ritecef 1000mg ...","Treatment of Bacterial infections,",Broad Spectrum (Third & fourth generation ceph...,No,ANTI INFECTIVES,Cephalosporins: 3 generation
2,Plavix,lavixan 200mg tablet,81.818182,123424,"Skin peeling,Application site reactions (burni...","Ketofly Soap from Leeford for Skin Infections,...","Fungal skin infections,",Azole derivatives {Imidazoles},No,DERMA,Fungal ergosterol synthesis inhibitor


In [208]:
fig = go.Figure([go.Scatter(x=sourceDf['date'], y=sourceDf['avgRating'])])

In [218]:
df2 = sourceDf.groupby("drugName").agg(
    avgRating = ("avgRating", "mean"),
    totalReviews = ("totalReviews", "sum"),
    generalSentiment = ("generalSentiment", "median")).reset_index()

In [219]:

fig.show()