In [1]:
# Import necessary libraries.
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import hf_hub_download
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.question_answering import load_qa_chain

In [2]:
# Define to variables to use "sentence-transformers/all-MiniLM-L6-v2" embedding model from HuggingFace.
# https://huggingface.co/blog/getting-started-with-embeddings
# https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [3]:
# Define the Chroma vector store and function to generate embeddings.
db = Chroma(persist_directory="./chroma_db/", embedding_function=embeddings)

In [4]:
# Define the query to search and display the most relevant document content.
inquiry = "Who is the author of this book?"
docs = db.similarity_search(inquiry)
print(docs[0].page_content)

58 Building a superhero team - Get your teammates to read this

Congratulations on finishing this book!

In Chapter 2, we talked about how this book can help you become the superhero of your team.

The only thing better than being a superhero is being part of a superhero team. I hope you’ll give copies of this book to your friends and teammates and help create other superheroes!

Page 118

Machine Learning Yearning-Draft

Andrew Ng


In [6]:
# Install llama-cpp-python library to use llama.cpp.
# https://python.langchain.com/docs/integrations/llms/llamacpp
# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Using pip 23.2.1 from /Users/easonlai/anaconda3/envs/llama2/lib/python3.11/site-packages/pip (python 3.11)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.2.6.tar.gz (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25l  Running command pip subprocess to install build dependencies
  Collecting scikit-build-core[pyproject]>=0.5.0
    Obtaining dependency information for scikit-build-core[pyproject]>=0.5.0 from https://files.pythonhosted.org/packages/94/b8/fba31e512f4e1817e3adce4fa1e2dd73dd06b7013fca9671b6b5c19a0bae/scikit_build_core-0.5.0-py3-none-any.whl.me

In [5]:
# Defien the Llama 2 model name, path, and base name to use.
# https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "llama-2-13b-chat.Q4_K_M.gguf"

In [6]:
# Construst the model path and download the model from HuggingFace.
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

In [7]:
# Print out the model path.
print(model_path)

/Users/easonlai/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGUF/snapshots/245bc5104d85dcc9a11a0e0a9ae6de38dfae536f/llama-2-13b-chat.Q4_K_M.gguf


In [8]:
# Callbacks support token-wise streaming.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [22]:
# Reference configuration for GPU https://python.langchain.com/docs/integrations/llms/llamacpp#gpu.
n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Initialize the llamaCpp model.
llm = LlamaCpp(
    model_path=model_path,
    max_tokens=256,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    n_ctx=2048,
    verbose=False,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/easonlai/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGUF/snapshots/245bc5104d85dcc9a11a0e0a9ae6de38dfae536f/llama-2-13b-chat.Q4_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     [  5120

In [23]:
# Initialize the question answering chain.
chain = load_qa_chain(llm, chain_type="stuff")

ggml_metal_free: deallocating


In [24]:
# Perform first sample of question answering.
inquiry = "Who is the author of this book?"
docs = db.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

 The author of this book is Andrew Ng.

' The author of this book is Andrew Ng.'

In [27]:
# Perform second sample of question answering.
inquiry = "Please tell me the key summary of this book."
docs = db.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

 The book talks about how to build a superhero team and become the superhero of your team using machine learning yearning draft, written by Andrew Ng, to help you understand how to set technical direction for a machine learning project, persuade your teammates to follow it, and scale your team's progress.

" The book talks about how to build a superhero team and become the superhero of your team using machine learning yearning draft, written by Andrew Ng, to help you understand how to set technical direction for a machine learning project, persuade your teammates to follow it, and scale your team's progress."