In [1]:
# Adapted from https://colab.research.google.com/drive/1OZpmLgd5D_qmjTnL5AsD1_ZJDdb7LQZI?usp=sharing#scrollTo=KH546j3nkFwX
import os
import pandas as pd
import matplotlib.pyplot as plt
from transformers import GPT2TokenizerFast
from transformers import AutoTokenizer
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
model_path = './models/llama-2-7b-chat.Q4_K_M.gguf'

# 1. Loading PDFs and chunking with LangChain

In [2]:
# Simple method - Split by pages 
input_files = ["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
all_splits = []

for file in input_files:
    loader = PyPDFLoader(file)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    splits = text_splitter.split_documents(data)
    all_splits.extend(splits)

In [3]:
len(all_splits)

2377

# 2. Embed text and store embeddings

In [4]:
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain_community.vectorstores import Chroma

# Get embedding model
# embeddings = OpenAIEmbeddings()
embeddings = GPT4AllEmbeddings()
# embeddings = LlamaCppEmbeddings(model_path=model_path)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

# 3. Set up local LLM using LlamaCpp

In [5]:
from langchain_community.llms import LlamaCpp

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

llm = LlamaCpp(
    model_path=model_path,        
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    # n_ctx=2048,
    n_ctx=3900,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


# 4. Setup retrieval function

In [6]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [7]:
# Customizing the prompt
template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

{context}

Question: {question}

Helpful Answer:"""
rag_prompt = PromptTemplate.from_template(template)

retriever = vectorstore.as_retriever()
qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [8]:
# Helper function to print the results
def query(question):
    for output in qa_chain.stream(question):
        print(output, end="", flush=True)

In [9]:
query("how do I get started on a personal project in AI?")
query("what was the FY2022 return on equity?")
query("How to make Pineapple Chicken?")

 There are many resources available online to help you learn about AI and get started on a personal project. Some popular resources include Udemy, Coursera, edX, Kaggle, and GitHub. Additionally, there are many online communities and forums dedicated to AI, such as Reddit's r/MachineLearning and r/AI, where you can ask questions and get help from other members of the community. Finally, you can also consider participating in hackathons or competitions to gain experience working on real-world projects and to showcase your skills to potential employers.

Llama.generate: prefix-match hit


 The Return on Equity for FY2022 was 18.7%.

Llama.generate: prefix-match hit


 I don't know the answer to this question, as the text does not provide enough information to create a complete recipe for Pineapple Chicken. The ingredients list includes pineapple chunks and tidbits, but it does not specify how much of each ingredient to use or any cooking instructions beyond heating the oil and adding the ingredients. Without more context, I cannot provide a helpful answer.

# 5. Create chatbot with chat memory

In [10]:
from IPython.display import display
import ipywidgets as widgets

# Create conversation chain that uses our vectordb as retriver, this also allows for chat history management
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())

In [11]:
chat_history = []

def on_submit(_):
    query = input_box.value
    input_box.value = ""
    
    if query.lower() == 'exit':
        print("Thank you for using the State of the Union chatbot!")
        return
    
    result = qa({"question": query, "chat_history": chat_history})
    chat_history.append((query, result['answer']))
    
    display(widgets.HTML(f'<b>User:</b> {query}'))
    display(widgets.HTML(f'<b><font color="blue">Chatbot:</font></b> {result["answer"]}'))

print("Welcome to the Transformers chatbot! Type 'exit' to stop.")

input_box = widgets.Text(placeholder='Please enter your question:')
input_box.on_submit(on_submit)

display(input_box)

Welcome to the Transformers chatbot! Type 'exit' to stop.


  input_box.on_submit(on_submit)


Text(value='', placeholder='Please enter your question:')

Llama.generate: prefix-match hit


HTML(value='<b>User:</b> what was the FY2022 return on equity?')

HTML(value='<b><font color="blue">Chatbot:</font></b>  The answer to the question can be found in the last par…

Llama.generate: prefix-match hit
Llama.generate: prefix-match hit


HTML(value='<b>User:</b> Great! Can you give me more information of this company?')

HTML(value='<b><font color="blue">Chatbot:</font></b>  The answer to the question can be found in the passage …