In [None]:
import os
llama2_13b = "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d"
os.environ["REPLICATE_API_TOKEN"] = "r8_czDqVut5po3mlGwy70W0sMArwdsmAOy3wq5ZJ"

In [None]:
from IPython.display import display, Markdown
def md(t):
  display(Markdown(t))

In [None]:
import replicate
# langchain setup
from langchain.llms import Replicate
# Use the Llama 2 model hosted on Replicate
# Temperature: Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value
# top_p: When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens
# max_new_tokens: Maximum number of tokens to generate. A word is generally 2-3 tokens
llama_model = Replicate(
    model=llama2_13b,
    model_kwargs={"temperature": 0.5,"top_p": 1, "max_new_tokens":500}
)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
loaded_documents = []

DATA_PATH = 'drugLibTrain_raw.tsv'
loader = CSVLoader(file_path=DATA_PATH, csv_args={'delimiter': '\t'},source_column="condition")
documents = loader.load()
loaded_documents.extend(documents)

#if loading the second dataset - it gets signifiantly slower
#DATA_PATH = 'drugs_for_common_treatments.csv'
#loader = CSVLoader(file_path=DATA_PATH,source_column="drug_name")
#documents = loader.load()
#loaded_documents.extend(documents)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
splits = text_splitter.split_documents(loaded_documents)

model_name='sentence-transformers/all-MiniLM-L6-v2'
#model_name = "sentence-transformers/all-mpnet-base-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name,
                                       model_kwargs={'device': 'cpu'})

#DB_FAISS_PATH = 'vectorstore/db_faiss'
vectorstore = FAISS.from_documents(splits, embeddings)
#db.save_local(DB_FAISS_PATH)

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import HumanMessagePromptTemplate,SystemMessagePromptTemplate,ChatPromptTemplate

In [None]:
#1serious
general_system_template1 = r"""
Act as a medical assistant, given the drug name - please provide the summary of all the reviews,
highlighting the percent of negative and positive reviews.
Please wish health and provide one fun proverb at the end.
 ----
{context}
----
"""
general_user_template1 = "Question:```{question}```"
messages1 = [
            SystemMessagePromptTemplate.from_template(general_system_template1),
            HumanMessagePromptTemplate.from_template(general_user_template1)
]
qa_prompt1 = ChatPromptTemplate.from_messages( messages1)
chain1 = ConversationalRetrievalChain.from_llm(llama_model, vectorstore.as_retriever(), return_source_documents=True,
                                             combine_docs_chain_kwargs={"prompt": qa_prompt1})

In [None]:
#1playful
general_system_template2 = r"""
Act as a friend: calm me down and given the drug name - please provide the summary of all the reviews,
in form of a funny story like I am a child.
Please wish health and provide one fun proverb at the end.
 ----
{context}
----
"""
general_user_template2 = "Question:```{question}```"
messages2 = [
            SystemMessagePromptTemplate.from_template(general_system_template2),
            HumanMessagePromptTemplate.from_template(general_user_template2)
]
qa_prompt2 = ChatPromptTemplate.from_messages( messages2)
chain2 = ConversationalRetrievalChain.from_llm(llama_model, vectorstore.as_retriever(), return_source_documents=True,
                                             combine_docs_chain_kwargs={"prompt": qa_prompt2})

In [None]:
def give_summary(chain,query,chat_history = []):
    #chat_history = [(query, result["answer"])] #if we need to make a chatbot
    result = chain({"question": query, "chat_history": chat_history})
    return(md(result['answer']))

In [None]:
import pandas as pd

df=pd.read_csv('drugLibTrain_raw.tsv', delimiter="\t")
drugs  = sorted(df["urlDrugName"].unique())

df2=pd.read_csv('drugs_for_common_treatments.csv')

In [None]:
llm_resp = {"serious" : chain1, "playful": chain2}

In [None]:
import ipywidgets as widgets
drug_dropdown = widgets.Dropdown(
    options=drugs,
    value=drugs[0],
    description='Drug name:',
    disabled=False,
)
display(drug_dropdown)

llm_dropdown = widgets.Dropdown(
    options=list(llm_resp.keys()),
    value=list(llm_resp.keys())[0],
    description='Summary:',
    disabled=False,
)
display(llm_dropdown)

button = widgets.Button(
    description='Give summary',
    disabled=False,
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Run report',
    icon='check'
)
display(button)

out = widgets.Output(layout={'border': '1px solid black'})
display(out)

def on_button_clicked(b):
    with out:
        out.clear_output()
        give_summary(llm_resp[llm_dropdown.value],drug_dropdown.value)
        drug_url  = df2[df2['drug_name'].str.lower() == drug_dropdown.value.lower()]["drug_link"].head(1) #checking URL in the second dataset
        if not drug_url.empty:
            print("URL of the drug: ", drug_url.to_string(index=False, header=False))


button.on_click(on_button_clicked, False)

Dropdown(description='Drug name:', options=('abilify', 'accolate', 'accupril', 'accutane', 'aciphex', 'actiq',…

Dropdown(description='Summary:', options=('serious', 'playful'), value='serious')

Button(button_style='info', description='Give summary', icon='check', style=ButtonStyle(), tooltip='Run report…

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…