In [1]:
# Adapted from https://colab.research.google.com/drive/1OZpmLgd5D_qmjTnL5AsD1_ZJDdb7LQZI?usp=sharing#scrollTo=KH546j3nkFwX
import os
import pandas as pd
import matplotlib.pyplot as plt
from transformers import GPT2TokenizerFast
from transformers import AutoTokenizer
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from constants import MODEL_PATH

# To run following cells, make sure to download the chat model from 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf' and put it in './models/'

# 1. Loading PDFs and chunking with LangChain

In [3]:
# Load PDF
input_files = ["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
all_splits = []

for file in input_files:
    loader = PyPDFLoader(file)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    splits = text_splitter.split_documents(data)
    all_splits.extend(splits)

# # Load PDF
# input_files = ["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
# docs = []
# for file in input_files:
#     loader = PyPDFLoader(file)
#     docs.extend(loader.load())

# # Split
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size = 500,
#     chunk_overlap = 20
# )

# all_splits = text_splitter.split_documents(docs)

In [3]:
len(all_splits)

2377

# 2. Embed text and store embeddings

In [4]:
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma

# Get embedding model
# embeddings = OpenAIEmbeddings()
embeddings = GPT4AllEmbeddings()

persist_directory = './docs/chroma/'

if not os.path.exists(persist_directory):
    os.makedirs(persist_directory)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory=persist_directory)

# # use peresisted vectorstore
# vectorstore = Chroma(
#     persist_directory=persist_directory,
#     embedding_function=embeddings
# )

In [5]:
print(vectorstore._collection.count())

11340


In [6]:
question = "what was the FY2022 return on equity?"
docs = vectorstore.similarity_search(question,k=3)
len(docs)

3

# 3. Set up local LLM using LlamaCpp

In [4]:
from langchain_community.llms import LlamaCpp

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

llm = LlamaCpp(    
    model_path=MODEL_PATH,    
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    # n_ctx=2048,
    n_ctx=3900,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


# 4. Setup retrieval function

In [8]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [9]:
from langchain.chains import RetrievalQA

# Customizing the prompt
template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

{context}

Question: {question}

Helpful Answer:"""
rag_prompt = PromptTemplate.from_template(template)

retriever = vectorstore.as_retriever()

# # build chain option 1
# qa_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | rag_prompt
#     | llm
#     | StrOutputParser()
# )

# build chain option 2
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": rag_prompt}
)

In [10]:
# Helper function to print the results
def query(question):
    for output in qa_chain.stream(question):
        # print(output, end="", flush=True)
        print(output["result"], end="", flush=True)

In [11]:
query("how do I get started on a personal project in AI?")

 To get started on an AI project, identify a problem or area of interest and research existing solutions. Once you have a good understanding of the space, brainstorm ideas for your own solution. Then, break down the project into smaller tasks and start building. Finally, test and refine your solution until it meets your desired outcome.




In [12]:
query("what was the FY2022 return on equity?")

Llama.generate: prefix-match hit


 The FY2022 return on equity was 18.7%.

In [13]:
query("What is the recipe for a Basic pasta?")

Llama.generate: prefix-match hit


 Here are the basic ingredients needed to make a simple tomato pasta dish. Dried pasta, water, salt, tomato puree, cheese/tuna/both or none. Method 1: Boil your pasta adding salt to the water for later use. While the pasta is cooking grate your cheese/drain your tuna. Then add about half a tube of tomato puree, stirring well to give you a passata consistency. Of course, you can just use passata if you want and drain the pasta instead. Add the tuna if using and leave for a minute to heat up. Add salt/pepper to taste and serve into a bowl with lots of cheese on top. Enjoy!

# 5. Create chatbot with chat memory

In [14]:
from IPython.display import display
import ipywidgets as widgets

# Create conversation chain that uses our vectordb as retriver, this also allows for chat history management
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())

In [15]:
chat_history = []

def on_submit(_):
    query = input_box.value
    input_box.value = ""
    
    if query.lower() == 'exit':
        print("Thank you for using the State of the Union chatbot!")
        return
    
    result = qa({"question": query, "chat_history": chat_history})
    chat_history.append((query, result['answer']))
    
    display(widgets.HTML(f'<b>User:</b> {query}'))
    display(widgets.HTML(f'<b><font color="blue">Chatbot:</font></b> {result["answer"]}'))

print("Welcome to the Transformers chatbot! Type 'exit' to stop.")

input_box = widgets.Text(placeholder='Please enter your question:')
input_box.on_submit(on_submit)

display(input_box)

Welcome to the Transformers chatbot! Type 'exit' to stop.


  input_box.on_submit(on_submit)


Text(value='', placeholder='Please enter your question:')