<a href="https://colab.research.google.com/github/moneebullah25/TeacherAssistant/blob/main/Teacher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# CONV_CHAT_BOT Setup

In [None]:
%%capture
!pip install langchain
!pip install pypdf
!pip install sentence_transformers
!pip install chromadb
!pip install accelerate
!pip install --upgrade accelerate
!pip install bitsandbytes

In [None]:
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# Load multiple documents
pdf_files = ["/content/drive/MyDrive/Books/Basic_Marketing_2021.pdf", "/content/drive/MyDrive/Books/Marketing_Book.pdf"]

# Create an empty list to store documents
all_documents = []
# Load documents using PyPDFLoader
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    documents = loader.load()
    all_documents.extend(documents)

all_documents.extend(documents)

# Split the documents
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=40)
all_documents = text_splitter.split_documents(all_documents)

# Initialize HuggingFaceEmbeddings
hf_embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Create Chroma vector store from multiple documents
Vector_db = Chroma.from_documents(collection_name="document_docs", documents=all_documents, embedding=hf_embed, persist_directory="/content/drive/MyDrive/Conv_bot/Data/Docs/Persist_dir")

# Perform a similarity search (if needed)
# Vector_db.similarity_search("dummy")  # Tickle it to persist metadata (?)

# Persist the vector store
Vector_db.persist()




#1/ Download our 2 embeddings model from hugging face
(same as data preparation)

In [None]:
def get_available_gpus():
    """Get a list of available gpu devices (formatted as strings)."""
    from tensorflow.python.client import device_lib
    local_device_protos = device_lib.list_local_devices()
    print(local_device_protos)
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

In [None]:
# Start here to load a previously-saved DB
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

if len(get_available_gpus()) == 0:
  Exception("Running dolly without GPU will be slow. We recommend you switch to a Single Node cluster with at least 1 GPU to properly run this demo.")

# gardening_vector_db_path = "/dbfs"+demo_path+"/vector_db"
vector_db_path = "/content/drive/MyDrive/Conv_bot/Data/Docs/db_path"

hf_embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
db = Chroma(collection_name="document_docs", embedding_function=hf_embed, persist_directory=vector_db_path)

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1394150920888293744
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 13118799872
locality {
  bus_id: 1
  links {
  }
}
incarnation: 9906092392026291983
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
xla_global_id: 416903419
]


In [None]:
conversation_history = []

In [None]:
def get_similar_docs(question, similar_doc_count):
    similar_docs = Vector_db.similarity_search(question, similar_doc_count)

    unique_docs = []
    seen_doc_contents = set()

    for doc in similar_docs:
        doc_content = doc.page_content  # Assuming 'page_content' is the key for the document content
        if doc_content not in seen_doc_contents:
            unique_docs.append(doc)
            seen_doc_contents.add(doc_content)

    return unique_docs


# Let's test it with blackberries:
for doc in get_similar_docs("What is prison?", 4):
    print(doc)

page_content='and a. Pick any six dif ficult words from this video and write their meanings\nand use each in a sentence. b. Comment on the statement "He who does not\nmove, does not notice his chains" and elaborate it clearly in light of what he\nsaid. [100+ words] http://www .ted.com/talks/glenn_greenwald why privacy\nmatters?language=en OR on Email Privacy http://www .ted.com/talks/andy\nyen think your email s_ private think again Learning outcomes: Some top\nbrands are risking privacy of world. This assignments sparks interest in how\nto avoid it. It also introduces to TED talks. 56_' metadata={'page': 69, 'source': '/content/drive/MyDrive/Books/Basic_Marketing_2021.pdf'}


# 3/ Prompt engineering with langchain
Now we can compose with a language model and prompting strategy to make a langchain chain that answers questions.

In [None]:
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  {conversation_history}

  {context}

  Question: {question}

  Response:
  """

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from langchain import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain

def build_qa_chain():
  torch.cuda.empty_cache()
  model_name = "databricks/dolly-v2-3b" # can use dolly-v2-3b or dolly-v2-7b for smaller model and faster inferences.

  # Increase max_new_tokens for a longer response
  # Other settings might give better results! Play around
  instruct_pipeline = pipeline(model=model_name, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto", return_full_text=True, max_new_tokens=512, top_p=0.95, top_k=50,model_kwargs={'load_in_8bit': True})
  # Note: if you use dolly 12B or smaller model but a GPU with less than 24GB RAM, use 8bit. This requires %pip install bitsandbytes
  # instruct_pipeline = pipeline(model=model_name, trust_remote_code=True, device_map="auto", model_kwargs={'load_in_8bit': True})
  # For GPUs without bfloat16 support, like the T4 or V100, use torch_dtype=torch.float16 below
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
  prompt = PromptTemplate(input_variables=['context', 'conversation_history', 'question'], template=template)

  hf_pipe = HuggingFacePipeline(pipeline=instruct_pipeline)
  # Set verbose=True to see the full prompt:
  return load_qa_chain(llm=hf_pipe, chain_type="stuff", prompt=prompt, verbose=True)

In [None]:
qa_chain = build_qa_chain()

#4/ Using the Chain for Simple Question Answering
That's it! It's ready to go. Define a function to answer a question and pretty-print the answer, w

In [None]:
def displayHTML(html):
    """Display HTML in Jupyter notebook."""
    from IPython.display import HTML
    display(HTML(html))


def answer_question(question):
    # Use the entire conversation history when answering the question
    similar_docs = get_similar_docs(question, similar_doc_count=4)
    result = qa_chain({"input_documents": similar_docs, "conversation_history": conversation_history, "question": question})

    if result is None:
        result_html = f'<h1>Couldn\'t find data from the book</h1>'
        return

    # Update conversation history with the latest turn
    conversation_history.append({"user_question": question, "bot_response": result["output_text"]})
    while len(conversation_history) != 1:
      conversation_history.pop(0)

    result_html = f"<p><blockquote style=\"font-size:24\">{question}</blockquote></p>"
    result_html += f"<p><blockquote style=\"font-size:18px\">{result['output_text']}</blockquote></p>"
    result_html += "<p><hr/></p>"
    for d in similar_docs:
        source_id = d.metadata["source"]
        page = d.metadata['page']
        result_html += f"<p><blockquote>{d.page_content}<br/>(Source: <p>{source_id} Page: {page}</p>)</blockquote></p>"
    displayHTML(result_html)

In [None]:
conversation_history.clear()

In [None]:
answer_question("If I query you about something which you don't know what will be your response?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  []

  CH CH CH CH CH-12. GCH-1 1 . CH-10 CH-9 \ GH CH - 13 CH
- 14 Aapter @ Discussion: Objectionable Questions A new hired researcher
might be asking questions that may not be deemed ideal. T ake a look at
following questions, suggest improvements in those that have any flaw . ¢
What is your salary? ¢ Are you strong or weak supporter of ...? (ambiguous
question) ¢ Do your children behav

In [None]:
answer_question("I have exam tomorrow on Marketing. What should I do to pass the exam?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  [{'user_question': "If I query you about something which you don't know what will be your response?", 'bot_response': '\nSure, I will check the website for the information you mentioned.'}]

  otpend« (Each class may have an unannounced CA or Quiz, some
CAs may be non-graded) Mid T erm 15% Project/Slides/Presentation 70% -
Sessional work Final Exam + V ivas 30% Prior preparation, regular

In [None]:
answer_question("I know that marketing is to sell things to people through various means. Is it enough to pass the course?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  [{'user_question': 'I have exam tomorrow on Marketing. What should I do to pass the exam?', 'bot_response': '\nSure, I will check the website for the information you mentioned'}]

  you know this you’ll become more alert consumer with rational
decision making. This also reflects the purpose of education; to unlock the
mind, to educate it and feed it with knowledge that is beneficial for


In [None]:
answer_question("What is Postmodernism?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  [{'user_question': 'I know that marketing is to sell things to people through various means. Is it enough to pass the course?', 'bot_response': '\nNo.'}]

  18 The Marketing Book
The postmodern condition
Paralleling the transformations that are taking
place in the aesthetic and economic spheres, a
postmodern turn in the nature of knowledge
and thought has transpired. The so-called
Enligh

In [None]:
answer_question("What is marketing?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  [{'user_question': 'What is Postmodernism?', 'bot_response': '\nPostmodernism is a way of looking at the world\naskance at the world. A pose, if you prefer.\nIrreverence, parody, playfulness, cynicism and\nabsolute unwillingness to accept the accepted'}]

  The Marketing Book
Fifth Edition
Edited by
MICHAEL J. BAKER
OXFORD AMSTERDAM BOSTON LONDON NEW YORK PARIS
SAN DIEGO SAN FRANCISCO SI

In [None]:
answer_question("Explain in detail what is marketing? Also add some examples")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  [{'user_question': 'What is marketing?', 'bot_response': '\nMarketing is the practice of identifying and satisfying customer demand through the generation of ideas, products or services to meet customer needs.'}]

  you know this you’ll become more alert consumer with rational
decision making. This also reflects the purpose of education; to unlock the
mind, to educate it and feed it with

In [None]:
answer_question("What do you mean by capturing customer value?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mBelow is an instruction that describes a task. Write a response that appropriately completes the request.

  Instruction:
  You are a marketing teacher at university. Your job is to provide answers to student questions.
  Use only information in the following paragraphs to answer the question at the end.
  Explain the answer with reference to these paragraphs.
  If you don't have the information in below paragraphs then give response "I will get back to you on this".

  [{'user_question': 'Explain in detail what is marketing? Also add some examples', 'bot_response': '\nMarketing, also called market marketing, is the practice of identifying and satisfying customer demand through the generation of ideas, products or services to meet customer needs. Marketing  encompasses a lot more as a company  must do several activities to understand the needs of people