In [None]:
HOME_DIR = "/content/drive/MyDrive/PhD research/LLM Privacy Policy"

In [None]:
!pip -q install llama-index llama-index-embeddings-huggingface llama-index-llms-llama-cpp pypdf
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip -q install llama-cpp-python

In [None]:
import os
import time

from transformers import LlamaForCausalLM, LlamaTokenizer
from llama_index.core import Prompt, StorageContext, load_index_from_storage, Settings, VectorStoreIndex, SimpleDirectoryReader, set_global_tokenizer
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP

from transformers import AutoTokenizer

In [None]:
# Preference settings - change as desired
pdf_path = HOME_DIR + "/Codes/Privacy Policies/12M recruiting.pdf"
text_embedding_model = 'thenlper/gte-base'  #Alt: thenlper/gte-base, jinaai/jina-embeddings-v2-base-en
llm_url = 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf'
# set_global_tokenizer(AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf").encode)

In [None]:
# Load PDF
filename_fn = lambda filename: {'file_name': os.path.basename(pdf_path)}
loader = SimpleDirectoryReader(input_files=[pdf_path], file_metadata=filename_fn)
documents = loader.load_data()

In [None]:
# Load models and service context
embed_model = HuggingFaceEmbedding(model_name=text_embedding_model)
llm = LlamaCPP(model_url=llm_url, temperature=0.7, max_new_tokens=256, context_window=4096, generate_kwargs = {"stop": ["<s>", "[INST]", "[/INST]"]}, model_kwargs={"n_gpu_layers": -1}, verbose=True)
# service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model, chunk_size=512)
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /tmp/llama_index/models/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:             

In [None]:
# Indexing
start_time = time.time()

# index = VectorStoreIndex.from_documents(documents, service_context=service_context)
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, llm=llm)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed indexing time: {elapsed_time:.2f} s")

Elapsed indexing time: 1.17 s


In [None]:
query_str = "What are the data the company collect?"

In [None]:
query_engine = index.as_query_engine(similarity_top_k=1, llm=llm)
# use this for testing
vector_retriever = index.as_retriever(similarity_top_k=1)

In [None]:
response = query_engine.query(query_str)
print(str(response))

llama_perf_context_print:        load time =   21620.25 ms
llama_perf_context_print: prompt eval time =   21619.90 ms /    50 tokens (  432.40 ms per token,     2.31 tokens per second)
llama_perf_context_print:        eval time =   13181.88 ms /    18 runs   (  732.33 ms per token,     1.37 tokens per second)
llama_perf_context_print:       total time =   34812.67 ms /    68 tokens


 The company collects data on customer demographics, website usage, and purchases.


In [None]:
# Few-Shot Learning Prompt
few_shot_prompt = """What are the data the company collect?

Example 1:
Customer information: names, addresses, phone numbers, and email addresses.
Demographic data: age, gender, income level, occupation, and education level.
Product information: types of products or services purchased, frequency of purchases, and purchase amounts.
Behavioral data: browsing history, search queries, and website interactions.
Location data: geolocation data from mobile devices or GPS coordinates.
Social media data: information collected from social media platforms, such as Facebook, Twitter, or LinkedIn.
Financial data: credit card information, bank account information, and payment history.
Technical data: information about the device or software used to access the website or application, such as the browser type, operating system, and screen resolution.
Usage data: data on how users interact with the website or application, such as the number of page views, time spent on the site, and bounce rate.
Sales data: data on the sales of products or services, including the amounts and frequencies of sales.


Now, answer the following query:
Query: {query}
Response:
"""

Reference
1. https://colab.research.google.com/github/kazcfz/LlamaIndex-RAG/blob/main/LlamaIndex_RAG.ipynb#scrollTo=5P44cIP1PONJ
2. https://docs.llamaindex.ai/en/stable/examples/prompts/prompts_rag/
3. https://docs.haystack.deepset.ai/docs/llamacppgenerator
