# MysticGPT

## ChatBot code
*Note : use GPU*

**Also change prompt when you run**

*highly dependent on prompt so change this if answer is coming wheird*

In [None]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip install -q datasets loralib sentencepiece
!pip -q install bitsandbytes accelerate xformers
!pip -q install langchain
!pip -q install peft chromadb
!pip -q install unstructured
!pip install -q sentence_transformers
!pip -q install pypdf

In [None]:
!nvidia-smi
# Check the GPU stats

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

In [None]:
bnb_config = BitsAndBytesConfig(load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False)
# Config for loading quantized model

In [None]:
model_id = "HuggingFaceH4/zephyr-7b-alpha"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config = bnb_config,device_map={"":0})
# Load the quantized LLM in context (no API)

In [None]:
!nvidia-smi
# Check GPU utilization

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader

In [None]:
from google.colab import files
uploaded = files.upload()
# Uploadd the PDF to colab

In [None]:
for fn in uploaded.keys():
    loader = PyPDFLoader(fn)
# Can add support for multiple pdfs in colab this way

In [None]:
text_splitter = RecursiveCharacterTextSplitter(

    chunk_size = 500,
    chunk_overlap  = 20,
    length_function = len,
)
# Split th text into chunks for embeddings, set a really chunk size, due to GPU requirements

In [None]:
pages = loader.load_and_split(text_splitter)
db = Chroma.from_documents(pages, HuggingFaceEmbeddings(), persist_directory = '/content/db')
# Store it into a vector database which can be retrieved later
# Maybe for large scale cases, FAISS is better?

In [None]:
import json
import textwrap

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

# Construct a prompt template

def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template


In [None]:
instruction = "Given the context that has been provided. \n {context}, Answer the following question - \n{question}"

system_prompt = """You are an expert and experienced student from IIT Bombay who knows all the rules for UG programs.
Provide concise, correct and useful answers when asked. Mention the page number or section from where the given information can be referenced. """

get_prompt(instruction, system_prompt)

In [None]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate,  LLMChain
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory

In [None]:
template = get_prompt(instruction, system_prompt)
print(template)

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [None]:
memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5,
    return_messages=True
)
retriever = db.as_retriever()
# Set the memory and retriever for llm to access

In [None]:
class InstiBot:
    """
    Creates a chatbot with the HuggingFaceH4/zephyr-7b-alpha chat model
    with huggingface pipeline with the inputte memory, retriever and max_new_tokens
    To get a response from the bot, just call the instance. For example,

    Bot = InstiBot(...)
    response = Bot({"question": ... })
    """

    def create_pipeline(self, max_new_tokens=512):
        pipe = pipeline("text-generation",
                    model=model,
                    tokenizer = tokenizer,
                    max_new_tokens = max_new_tokens,
                    do_sample = True)
        return pipe

    def __init__(self, memory, prompt, task:str = "text-generation", retriever = retriever, max_new_tokens=512):

        self.memory = memory
        self.prompt = prompt
        self.retriever = retriever

        self.hf_pipe = self.create_pipeline(max_new_tokens)
        self.llm = HuggingFacePipeline(pipeline = self.hf_pipe)
        self.convo_chain = ConversationalRetrievalChain.from_llm(
        llm = self.llm,
        retriever=self.retriever,
        memory=self.memory,
        combine_docs_chain_kwargs={"prompt": self.prompt}
        )


    def __call__(self, *kwargs):
        return self.convo_chain(*kwargs)


    def clear_llm_memory(self):
        self.memory.clear()


    def update_prompt(self,sys_prompt=None):
        if sys_prompt == "" or sys_prompt is None:
            sys_prompt = system_prompt
        template = get_prompt(instruction, sys_prompt)

        prompt = PromptTemplate(template=template, input_variables=["context", "question"])

        self.convo_chain.combine_docs_chain.llm_chain.prompt = prompt


In [None]:
from IPython.display import display, Markdown, Latex

In [None]:
Bot = InstiBot(memory = memory, prompt = prompt)

In [None]:
response = Bot({"question": "Explain in brief : Maya?"})
display(Markdown(f"{response['answer']}"))