# Final Project
## ADSP 32021 IP01 Machine Learning Operations
### 5: Finetuned Model Test
#### Group 2: Maria Clarissa Fionalita, Kajal Shukla, Mia Zhang, Priya Suvvaru Venkata

In [1]:
!python --version
!jupyter nbextension enable --py widgetsnbextension

Python 3.10.13
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "cpu"

In [3]:
# from huggingface_hub import notebook_login
# # https://huggingface.co/settings/tokens

# notebook_login()

# Load FineTuned OPT-125M

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from optimum.bettertransformer import BetterTransformer

from pprint import pprint

In [5]:
%%time

model_name = "facebook/opt-125m"
new_model_name = "model/opt_125_data_v2"

model = AutoModelForCausalLM.from_pretrained(new_model_name)
model = BetterTransformer.transform(model, keep_original_model=True) #enable CPU inference but not all models are supported

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.


CPU times: user 5.77 s, sys: 35.3 s, total: 41 s
Wall time: 11.3 s


(True, True)

In [6]:
def inference(text, model, tokenizer, max_input_tokens = 1000, max_output_tokens = 100):
    device = model.device
    # Tokenize
    input_ids = tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=max_input_tokens).to(device)

    # Generate
    generated_tokens = model.generate(input_ids=input_ids.to(device), max_length=max_output_tokens, temperature = 0.4, pad_token_id=tokenizer.eos_token_id, do_sample = True)

    # Decode
    generated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    # Strip the prompt
    generated_text_answer = generated_text[0][len(text):]
    
    return generated_text_answer

In [7]:
def qa_gen(text, model, tokenizer, max_output_tokens = 100):
    # instruction = "instruction: please answer the following question\n"
    question = "question: " + str(text) + "\n"
    prompt = question + "answer:"
    print(prompt)
    print("-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------")
    print(inference(text = prompt, model = model, tokenizer = tokenizer, max_output_tokens = max_output_tokens))
    print("-------------------END OF TEXT GENERATED BY LANGUAGE MODEL------------------------")

In [8]:
%%time

text = "hello?"

qa_gen(text = text, model = model, tokenizer = tokenizer, max_output_tokens = 30)

question: hello?
answer:
-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------




 Yes, those annoying annoying symptoms of psoriasis are actually caused by a fungus called candida. It's
-------------------END OF TEXT GENERATED BY LANGUAGE MODEL------------------------
CPU times: user 25.7 s, sys: 103 ms, total: 25.8 s
Wall time: 3.26 s


# N-Shot Learning

In [9]:
test_prompt = ["What types of exercise are best for people with asthma?", "How is obsessive-compulsive disorder diagnosed?", "When are you more likely to get a blood clot?", "How should you lift objects to prevent back pain?", "How can you be smart with antibiotics?"]

test_prompt[0]

'What types of exercise are best for people with asthma?'

# Zero-Shot

In [10]:
%%time

for prompt in test_prompt:
    qa_gen(text = prompt, model = model, tokenizer = tokenizer, max_output_tokens = 100)
    print()

question: What types of exercise are best for people with asthma?
answer:
-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------
 Exercise is a great way to manage your asthma symptoms, but even the best workout routine can feel a bit blah after a while. If you're looking for a challenge -- something new or more intense -- the latest trends may have caught your eye. You've got a bunch to choose from, like barre classes that draw on dance moves, high-intensity interval training ( HIIT) that trades hardcore bursts of exercise
-------------------END OF TEXT GENERATED BY LANGUAGE MODEL------------------------

question: How is obsessive-compulsive disorder diagnosed?
answer:
-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------
 A psychiatrist is someone who specializes in mental health problems such as depression or bipolar disorder. He or she can identify symptoms of obsessive-compulsive disorder and prescribe treatment.

    ### G

# One Shot

In [11]:
%%time

one_shot_sample = """
question: What should I do if I want to stop dialysis?\n
answer: But you can choose not to have it or stop at any time. If you do, make sure to talk to your doctor about other treatments that can help you. Changes to your diet or lifestyle may improve your quality of life. If you want to stop dialysis because you feel depressed or ashamed, your doctor may urge you to speak to a counselor first. Sharing your feelings, taking antidepressants, or doing both of these things may help you make a more informed decision.\n
question: """

for prompt in test_prompt:
    one_shot_qa = one_shot_sample + prompt + "\n" + "answer:"
    print(one_shot_qa)
    print("-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------")
    print(inference(text = one_shot_qa, model = model, tokenizer = tokenizer, max_output_tokens = 200))
    print("-------------------END OF TEXT GENERATED BY LANGUAGE MODEL------------------------")


question: What should I do if I want to stop dialysis?

answer: But you can choose not to have it or stop at any time. If you do, make sure to talk to your doctor about other treatments that can help you. Changes to your diet or lifestyle may improve your quality of life. If you want to stop dialysis because you feel depressed or ashamed, your doctor may urge you to speak to a counselor first. Sharing your feelings, taking antidepressants, or doing both of these things may help you make a more informed decision.

question: What types of exercise are best for people with asthma?
answer:
-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------
 The answer: Exercise is an important part of controlling your asthma and reducing symptoms of asthma. It also helps control your blood pressure and cholesterol. Here are some tips to help you keep doing those things. Make sure you have a quiet location, like a locker room or bathroom. Ask your doctor if you can get help 

# Few Shot

In [None]:
%%time

few_shot_sample = """
question: What should I do if I want to stop dialysis?\n
answer: But you can choose not to have it or stop at any time. If you do, make sure to talk to your doctor about other treatments that can help you. Changes to your diet or lifestyle may improve your quality of life. If you want to stop dialysis because you feel depressed or ashamed, your doctor may urge you to speak to a counselor first. Sharing your feelings, taking antidepressants, or doing both of these things may help you make a more informed decision.\n
question: What are some tips to stay healthy during dialysis?\n
answer: Hemodialysis patients are also at an increased risk for infections. Try these tips to stay healthy: Check your access site daily for redness, pus, and swelling. If you see any, call your doctor. Keep the bandage that covers your catheter clean and dry.\n
question:"""

for prompt in test_prompt:
    few_shot_qa = few_shot_sample + prompt + "\n" + "answer:"
    print(few_shot_qa)
    print("-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------")
    print(inference(text = one_shot_qa, model = model, tokenizer = tokenizer, max_output_tokens = 200))
    print("-------------------END OF TEXT GENERATED BY LANGUAGE MODEL------------------------")


question: What should I do if I want to stop dialysis?

answer: But you can choose not to have it or stop at any time. If you do, make sure to talk to your doctor about other treatments that can help you. Changes to your diet or lifestyle may improve your quality of life. If you want to stop dialysis because you feel depressed or ashamed, your doctor may urge you to speak to a counselor first. Sharing your feelings, taking antidepressants, or doing both of these things may help you make a more informed decision.

question: What are some tips to stay healthy during dialysis?

answer: Hemodialysis patients are also at an increased risk for infections. Try these tips to stay healthy: Check your access site daily for redness, pus, and swelling. If you see any, call your doctor. Keep the bandage that covers your catheter clean and dry.

question:What types of exercise are best for people with asthma?
answer:
-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------

# Retrieval Augmented Generation Prompt Engineering

## Load the Vector Store Database

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

In [None]:
%%time

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name = "sentence-transformers/all-MiniLM-l6-v2",     # Provide the pre-trained model's path
    model_kwargs = model_kwargs, # Pass the model configuration options
    encode_kwargs = encode_kwargs # Pass the encoding options
)

In [None]:
%%time

db = FAISS.load_local("data/RAG_data", embeddings)

## Create the Retriever

https://github.com/langchain-ai/langchain/discussions/3115

In [None]:
# Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
retriever = db.as_retriever(search_type = "mmr", search_kwargs={"k": 4})

In [None]:
# test retriever
for prompt in test_prompt:
    print(prompt)
    print(retriever.get_relevant_documents(prompt)[0].page_content)
    print()

## Initialize the LLM Pipeline

### Define the Prompt Template

In [None]:
from langchain.prompts import PromptTemplate

qa_template = """You are a helpful assistant that can answer medical questions. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Context information is below:

{context}

Given the context information and not prior knowledge, answer the question: {question}
Answer: """

prompt_template = PromptTemplate(
    template = qa_template,
    input_variables = ["context", "question"]
)

In [None]:
# test prompt

print(
    prompt_template.invoke(
        {"context": "filler context", "question": "filler question"}
    ).to_string()
)

### Load the Model into a Chain

In [None]:
import json
from pathlib import Path
from pprint import pprint
import ast

from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.llms import HuggingFaceHub

In [None]:
%%time

pipe = pipeline("text-generation",
                model = model,
                tokenizer=tokenizer,
                model_kwargs = {"temperature": 0.4, "max_length": 100, "pad_token_id": tokenizer.eos_token_id, "do_sample": True},
                max_new_tokens = 100)

llm = HuggingFacePipeline(pipeline = pipe)

In [None]:
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
# https://python.langchain.com/docs/use_cases/question_answering/

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

## Generate Output

In [None]:
%%time

for prompt in test_prompt:
    print("question:", prompt)
    result = rag_chain.invoke(prompt)
    print("-------------------BELOW IS GENERATED BY LANGUAGE MODEL---------------------------")
    print()
    print(result)
    print()
    print("-------------------END OF TEXT GENERATED BY LANGUAGE MODEL------------------------")
    print()