In [1]:
# https://github.com/kennethleungty/Llama-2-Open-Source-LLM-CPU-Inference/tree/main/data
# https://towardsdatascience.com/running-llama-2-on-cpu-inference-for-document-q-a-3d636037a3d8

In [2]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings

In [3]:
# Load PDF file from data path
loader = DirectoryLoader('data/',
                         glob="*.pdf",
                         loader_cls=PyPDFLoader)
documents = loader.load()

In [4]:
# Split text from PDF into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                               chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [5]:
# Load embeddings model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                   model_kwargs={'device': 'cpu'})

# Build and persist FAISS vector store
vectorstore = FAISS.from_documents(texts, embeddings)
vectorstore.save_local('vectorstore/db_faiss')

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
qa_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [7]:
# Config: https://github.com/marella/ctransformers#config
# Download model from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main

from langchain.llms import CTransformers

# Local CTransformers wrapper for Llama-2-7B-Chat
llm = CTransformers(model='models/llama-2-7b-chat.ggmlv3.q8_0.bin', # Location of downloaded GGML model
                    model_type='llama', # Model type Llama
                    config={'max_new_tokens': 1000,
                            'temperature': 0.01})

In [8]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Wrap prompt template in a PromptTemplate object
def set_qa_prompt():
    prompt = PromptTemplate(template=qa_template,
                            input_variables=['context', 'question'])
    return prompt


# Build RetrievalQA object
def build_retrieval_qa(llm, prompt, vectordb):
    dbqa = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=vectordb.as_retriever(search_kwargs={'k':2}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt})
    return dbqa


# Instantiate QA object
def setup_dbqa():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    vectordb = FAISS.load_local('vectorstore/db_faiss', embeddings)
    qa_prompt = set_qa_prompt()
    dbqa = build_retrieval_qa(llm, qa_prompt, vectordb)

    return dbqa

In [9]:
dbqa = setup_dbqa()

In [10]:
response = dbqa({'query': "test dulu"})

In [11]:
response

{'query': 'test dulu',
 'result': 'The break-even test results submitted in March 2022 were positive, so no further action is required until the next break-even test.',
 'source_documents': [Document(page_content='test result submitted in March 2022 was positive.', metadata={'source': 'data/manu-20f-2022-09-24.pdf', 'page': 61}),
  Document(page_content='Rules was submitted in March 2022, based on our fiscal year 2021 and fiscal year 2020 audited financialstatements. The break-even test is based on a club’s audited pre-tax earnings. If the break-even test resultsare positive, no further action is required until the next break-even test. If the initial test is negative, a club isre-tested, using the UEFA definition of “adjusted earnings before tax,” which allows credit for depreciationof tangible fixed assets and expenditure on youth development', metadata={'source': 'data/manu-20f-2022-09-24.pdf', 'page': 61})]}

In [12]:
print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator
source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
    print(f'\nSource Document {i+1}\n')
    print(f'Source Text: {doc.page_content}')
    print(f'Document Name: {doc.metadata["source"]}')
    print(f'Page Number: {doc.metadata["page"]}\n')
    print('='* 50) # Formatting separator


Answer: The break-even test results submitted in March 2022 were positive, so no further action is required until the next break-even test.

Source Document 1

Source Text: test result submitted in March 2022 was positive.
Document Name: data/manu-20f-2022-09-24.pdf
Page Number: 61


Source Document 2

Source Text: Rules was submitted in March 2022, based on our fiscal year 2021 and fiscal year 2020 audited financialstatements. The break-even test is based on a club’s audited pre-tax earnings. If the break-even test resultsare positive, no further action is required until the next break-even test. If the initial test is negative, a club isre-tested, using the UEFA definition of “adjusted earnings before tax,” which allows credit for depreciationof tangible fixed assets and expenditure on youth development
Document Name: data/manu-20f-2022-09-24.pdf
Page Number: 61



In [13]:
response = dbqa({'query': "How much is the minimum guarantee payable by adidas?"})

print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator

source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
    print(f'\nSource Document {i+1}\n')
    print(f'Source Text: {doc.page_content}')
    print(f'Document Name: {doc.metadata["source"]}')
    print(f'Page Number: {doc.metadata["page"]}\n')
    print('='* 50) # Formatting separator


Answer: The minimum guarantee payable by adidas over the term of the agreement with us is £750 million, subject to certain adjustments.

Source Document 1

Source Text: The minimum guarantee payable by adidas over the term of our agreement with them is equal to
Document Name: data/manu-20f-2022-09-24.pdf
Page Number: 84


Source Document 2

Source Text: Pursuant to our contract with adidas, which began on 1 August 2015, the minimum guarantee payable
by adidas over the 10-year term of the agreement is equal to £750 million, subject to certain adjustments.See “Item 4. Information on the Company—Revenue Sectors—Commercial—Retail, Merchandising,Apparel & Product Licensing” for additional information regarding our agreement with adidas.
We also maintain a mixture of long-term debt and capacity under our revolving facilities in order to
Document Name: data/manu-20f-2022-09-24.pdf
Page Number: 74



In [14]:
response = dbqa({'query': "What is embeddings?"})
print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator

source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
    print(f'\nSource Document {i+1}\n')
    print(f'Source Text: {doc.page_content}')
    print(f'Document Name: {doc.metadata["source"]}')
    print(f'Page Number: {doc.metadata["page"]}\n')
    print('='* 50) # Formatting separator


Answer: Embeddings are data that has been transformed into n-dimensional matrices for use in deep learning computations. They represent individual elements in a dataset as vectors or tensors, which can be easily processed by deep neural networks.

Source Document 1

Source Text: them explicitly from the beginning.
As a general definition, embeddings are data that has been transformed
into n-dimensional matrices for use in deep learning computations. The
process of embedding (as a verb):
•Transforms multimodal input into representations that are easier to
perform intensive computation on, in the form of vectors , tensors, or
graphs [ 51]. For the purpose of machine learning, we can think of
vectors as a list (or array) of numbers.
Document Name: data/embeddings.pdf
Page Number: 4


Source Document 2

Source Text: What do embeddings actually look like? Here is one single embedding,
also called a vector , in three dimensions . We can think of this as a repre-
sentation of a single elemen

In [15]:
response = dbqa({'query': "Explain word2vec to me"})
print(f'\nAnswer: {response["result"]}')
print('='*50) # Formatting separator

source_docs = response['source_documents']
for i, doc in enumerate(source_docs):
    print(f'\nSource Document {i+1}\n')
    print(f'Source Text: {doc.page_content}')
    print(f'Document Name: {doc.metadata["source"]}')
    print(f'Page Number: {doc.metadata["page"]}\n')
    print('='* 50) # Formatting separator


Answer: Word2Vec is a technique for converting words in a text corpus into numerical vectors that capture their semantic meaning. The vectors are learned based on the context in which the words appear, so similar words will be close together in the vector space. Word2Vec models use a shallow neural network to learn the vector representations from large amounts of text data. The key insight behind Word2Vec is that the vector representation of a word can be learned by predicting the context words around it. This allows the model to capture subtle nuances in meaning between similar words, such as "car" and "automobile".

Source Document 1

Source Text: relationship between them. For example, “The dog chased the cat” and “the
cat chased the dog” would have the same distance in the vector space, even
though they’re two completely different sentences.
Word2Vec is a family of models that has several implementations, each
of which focus on transforming the entire input dataset into vector rep