# How to Run Llama 2 Locally with Python (Document QA)

This Jupyter Notebook is part of a Blog Post on https://swharden.com

https://swharden.com/blog/2023-07-29-ai-chat-locally-with-python/

In [1]:
%pip install langchain langchain-community sentence_transformers faiss-cpu ctransformers Transformers

Collecting langchain
  Downloading langchain-0.2.6-py3-none-any.whl (975 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/975.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.5/975.5 kB[0m [31m8.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m849.9/975.5 kB[0m [31m12.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m975.5/975.5 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.2.6-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1

In [2]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

from IPython.display import display, HTML
import json
import time
import pathlib

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Consume information in the documents

In [4]:
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import pandas as pd
from langchain.docstore.document import Document


In [5]:
# # Load the CSV file with the correct encoding
# file_path = "/content/BankFAQs.csv"
# encoding = 'utf-8'  # or 'latin1' if utf-8 doesn't work
# df = pd.read_csv(file_path, encoding=encoding)

# # Convert the DataFrame into a list of Document objects
# documents = [Document(page_content=f"Question: {row['Question']}\nAnswer: {row['Answer']}\nClass: {row['Class']}")
#              for _, row in df.iterrows()]

In [6]:
%pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2


In [15]:
import pandas as pd
import pathlib
import docx
from langchain.docstore.document import Document
import os

def read_docx(file_path):
    doc = docx.Document(file_path)
    return "\n".join([paragraph.text for paragraph in doc.paragraphs])

def load_all_files(directory_path):
    data = []
    for file_path in pathlib.Path(directory_path).glob("*"):
        if file_path.suffix == '.csv':
            df = pd.read_csv(file_path)
            for _, row in df.iterrows():
                content = " ".join(str(value) for value in row.values)
                data.append(Document(page_content=content))
        elif file_path.suffix == '.txt':
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                data.append(Document(page_content=content))
        elif file_path.suffix == '.docx':
            content = read_docx(file_path)
            data.append(Document(page_content=content))
    return data


In [16]:
# def load_all_files(directory):
#     data = []

#     if os.path.isfile(directory):
#         file_path = directory
#         if file_path.endswith('.xlsx'):
#             df = pd.read_excel(file_path)
#             data.extend(df.values.flatten().tolist())
#         elif file_path.endswith('.txt'):
#             with open(file_path, 'r') as file:
#                 data.append(file.read())
#     else:
#         for filename in os.listdir(directory):
#             file_path = os.path.join(directory, filename)
#             if filename.endswith('.xlsx'):
#                 df = pd.read_excel(file_path)
#                 data.extend(df.values.flatten().tolist())
#             elif filename.endswith('.txt'):
#                 with open(file_path, 'r') as file:
#                     data.append(file.read())

#     return data



In [17]:
documents = load_all_files('/content/drive/MyDrive/Data')


In [18]:
documents

[Document(page_content="CUSTOMERS\nConsistently enhance customer service by focusing on customer experience, innovation and quality assurance to deliver superior Axis Banking Experience\n\nWe continue to build sound customer franchises across distinct businesses and forge long-term relationships with our customers. We provide high-quality services and products through regular customer engagement in a convenient manner. Our customer centricity model has four key elements that address the design, delivery and service aspects of our products and services\nMulti-channel delivery\nCustomized solution\nDependable technology\nSimple process\n\nProducts and services\n Our objective is to offer a full range of financial products and banking services catering to all levels of the economic pyramid. The products and services are delivered through multiple channels, thus giving our customers the flexibility to choose the channel that best suits them. Being a customer-centric bank, we place emphasis

In [26]:
from transformers import AutoTokenizer, AutoModel
import numpy as np
import torch

# def create_embeddings(data, model_name='sentence-transformers/all-MiniLM-L6-v2'):
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModel.from_pretrained(model_name)

#     def embed_text(text):
#         inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
#         with torch.no_grad():
#             embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
#         return embeddings

#     embeddings = np.vstack([embed_text(text) for text in data])
#     return embeddings



In [29]:
# Interpret information in the CSV file
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
texts = splitter.split_documents(documents)

# Create embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'}
)

# Create and save the local database
db = FAISS.from_documents(texts, embeddings)
db.save_local("faiss")



### Prepare a LLM that knows about our documents

In [None]:
# prepare the template we will use when prompting the AI
template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""


In [None]:
# load the language model
llm = CTransformers(model='/content/drive/MyDrive/models/llama-2-7b-chat.ggmlv3.q8_0.bin',
                    model_type='llama',
                    config={'max_new_tokens': 256, 'temperature': 0.01})

# load the interpreted information from the local database
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})
db = FAISS.load_local("faiss", embeddings,allow_dangerous_deserialization=True)

# prepare a version of the llm pre-loaded with the local content
retriever = db.as_retriever(search_kwargs={'k': 2})
prompt = PromptTemplate(
    template=template,
    input_variables=['context', 'question'])

QA_LLM = RetrievalQA.from_chain_type(llm=llm,
                                     chain_type='stuff',
                                     retriever=retriever,
                                     return_source_documents=True,
                                     chain_type_kwargs={'prompt': prompt})



## Ask Questions About our Documents

In [None]:
def query(model, question):
    model_path = model.combine_documents_chain.llm_chain.llm.model
    model_name = pathlib.Path(model_path).name
    time_start = time.time()
    output = model({'query': question})
    response = output["result"]
    time_elapsed = time.time() - time_start
    display(HTML(f'<code>{model_name} response time: {time_elapsed:.02f} sec</code>'))
    display(HTML(f'<strong>Question:</strong> {question}'))
    display(HTML(f'<strong>Answer:</strong> {response}'))

In [None]:
query(QA_LLM, "How can I obtain an IVR Password")

  warn_deprecated(


In [None]:
query(QA_LLM, "Who is the Prime Minister of India?")

In [None]:
query(QA_LLM, "How will the OTP be sent to the customers?")

In [None]:
query(QA_LLM,"Where can I shop with Verified by Visa?")

In [None]:
query(QA_LLM, "How can I obtain a cheque book?")

In [None]:
query(QA_LLM, "What is LIME?")

In [None]:
query(QA_LLM, "What is PingPay?")

In [None]:
query(QA_LLM, "How to open a Savings Account?")

In [None]:
query(QA_LLM,"How do I check the balance in my Current Account")

In [None]:
query(QA_LLM," How do I apply for a Locker")

In [None]:
query(QA_LLM," How will I know if interest rate on loan products is changed?")