In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/scratch/megathon/cache/'

In [2]:
from torch import cuda, bfloat16
import transformers
from tqdm import tqdm

# model_id = 'meta-llama/Llama-2-7b-chat-hf'
model_id = 'medalpaca/medalpaca-7b'

device = "cpu" #f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = "<TOKEN>"
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    token=hf_auth,
    device_map=device,
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map=device,
    token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Model loaded on cpu


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    token=hf_auth
)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [4]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[2, 29871, 13, 29950, 7889, 29901], [2, 29871, 13, 28956, 13]]

In [5]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    2, 29871,    13, 29950,  7889, 29901]),
 tensor([    2, 29871,    13, 28956,    13])]

In [6]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [11]:
generate_text = transformers.pipeline(
    model=model, 
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.2,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1,  # without this output begins repeating
)

In [8]:
res = generate_text("What are tips for managing my bipolar disorder?.")
print(res[0]["generated_text"])



What are tips for managing my bipolar disorder?. Bipolar disorder is a mental health condition that causes extreme mood swings.
Bipolar Disorder: Tips for Managing Your Moods. Bipolar disorder can be difficult to manage, but there are steps you can take to help control your symptoms and improve your quality of life.


In [14]:
res = generate_text("What should I do if I want to stop dialysis?")
print(res[0]["generated_text"])

What should I do if I want to stop dialysis?
 Unterscheidung between hemodialysis and peritoneal dialysis. Hemodialysis uses a machine to filter the blood outside of the body, while peritoneal dialysis uses the lining of the abdominal cavity as a filter. Both methods can be effective for treating kidney failure, but they have different advantages and disadvantages.
What are the benefits of stopping dialysis? Stopping dialysis can provide relief from the physical and emotional burdens of treatment, including: 1. Improved quality of life: Dialysis can be a significant burden on a person's daily activities, causing fatigue, pain, and limited mobility. By stopping dialysis, a person may experience improved overall health and well-being. 2. Reduced medication use: Many people on dialysis require multiple medications to control symptoms such as high blood pressure, anemia, and infection. By stopping dialysis, these medications may no longer be necessary, which can reduce the risk of side eff

# langchain

In [14]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
# llm(prompt="What are tips for managing my bipolar disorder?.")

In [13]:
# from langchain.document_loaders import WebBaseLoader

# web_links = ["https://www.databricks.com/","https://help.databricks.com","https://databricks.com/try-databricks","https://help.databricks.com/s/","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/release-notes/index.html","http://docs.databricks.com/ingestion/index.html","http://docs.databricks.com/exploratory-data-analysis/index.html","http://docs.databricks.com/data-preparation/index.html","http://docs.databricks.com/data-sharing/index.html","http://docs.databricks.com/marketplace/index.html","http://docs.databricks.com/workspace-index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html","http://docs.databricks.com/delta/index.html","http://docs.databricks.com/dev-tools/index.html","http://docs.databricks.com/integrations/index.html","http://docs.databricks.com/administration-guide/index.html","http://docs.databricks.com/security/index.html","http://docs.databricks.com/data-governance/index.html","http://docs.databricks.com/lakehouse-architecture/index.html","http://docs.databricks.com/reference/api.html","http://docs.databricks.com/resources/index.html","http://docs.databricks.com/whats-coming.html","http://docs.databricks.com/archive/index.html","http://docs.databricks.com/lakehouse/index.html","http://docs.databricks.com/getting-started/quick-start.html","http://docs.databricks.com/getting-started/etl-quick-start.html","http://docs.databricks.com/getting-started/lakehouse-e2e.html","http://docs.databricks.com/getting-started/free-training.html","http://docs.databricks.com/sql/language-manual/index.html","http://docs.databricks.com/error-messages/index.html","http://www.apache.org/","https://databricks.com/privacy-policy","https://databricks.com/terms-of-use"] 

# loader = WebBaseLoader(web_links)
# documents = loader.load()

from langchain.document_loaders import TextLoader

loader = TextLoader("train_webmd_squad_v2_consec.txt")
documents = loader.load()

In [19]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [20]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

In [21]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [13]:
chat_history = []

query = "What should I do if I want to stop dialysis?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 Stopping dialysis is a complex decision that requires careful consideration and close monitoring with your healthcare team. While it may be possible to stop dialysis in some cases, it's essential to understand the potential risks and consequences involved. Here are some factors to consider:

1. Medical history: If you have a history of heart disease, lung disease, or other serious health conditions, stopping dialysis may not be advisable. Your healthcare team will assess your overall health and determine whether it's safe to stop dialysis.
2. Kidney function: If your kidneys are still functioning somewhat, your healthcare team may recommend continuing dialysis to help preserve what little kidney function you have left. However, if your kidneys are no longer functioning, stopping dialysis may not significantly impact your quality of life.
3. Personal preferences: It's crucial to consider your personal preferences and values when deciding whether to continue or stop dialysis. Some peopl

## Misc

In [2]:
!export PYTHONIOENCODING=UTF-8

In [1]:
import json
import cleantext

def generate_context_docs(json_path, output_path="webmd_context_docs.txt"):
    with open(json_path) as f:
        data = json.load(f)

    with open(output_path, "a") as f:
        for x in range(len(data["data"])):
            inp = data["data"][x]["paragraphs"][0]["context"]
            inp = cleantext.clean(inp, clean_all=False, extra_spaces=True, stemming=False, stopwords=False,
                lowercase=False, numbers=False, punct=False)
            
            # remove some non info lines
            if "var s_context" in inp:
                continue
            f.write(inp)
            f.write("\n\n")

generate_context_docs("/home/abhiroop.talasila/megathon/data 2/train_webmd_squad_v2_full.json")
generate_context_docs("/home/abhiroop.talasila/megathon/data 2/val_webmd_squad_v2_consec.json")
generate_context_docs("/home/abhiroop.talasila/megathon/data 2/val_webmd_squad_v2_full.json")