In [None]:
%pip install llama-index-readers-file pymupdf
%pip install llama-index-vector-stores-postgres
%pip install llama-index-embeddings-huggingface


In [None]:
%pip install llama-index-llms-llama-cpp

In [None]:
!pip install --upgrade typing-extensions
!pip install --upgrade pip

In [8]:
# sentence transformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

In [None]:
#Llama CPP, in this notebook, we use the llama-2-chat-13b-ggml model, along with the proper prompt formatting.

!pip install llama-cpp-python

In [13]:
from llama_index.llms.llama_cpp import LlamaCPP

model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
# model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"

llm = LlamaCPP(
    model_path="C:\\path\\to\\your\\local\\llama-2-13b-chat.ggmlv3.q4_0.bin",
    temperature=0.1,
    max_new_tokens=256,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": 1},
    verbose=True,
)


ValueError: Provided model path does not exist. Please check the path or provide a model_url to download.

In [None]:
# init Postgres 
# Using an existing Postgres running at the localhost, create the database we will be using
!pip install psycopg2-binary pgvector asycpg "sqlalchemy[asyncio]" greenlet


In [None]:
import psycopg2

dbname = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "jerry"
# conn = psycopg2.connect(connection_string)
conn = psycopg2.connect(
    dbname=dbname, 
    user=user, 
    password=password, 
    host=host, 
    port=port,
    user=user)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {dbname}")
    c.execute(f"CREATE DATABASE {dbname}")

In [None]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password=password,
    port=port, 
    user=user,
    table_name="llama2_paper",
    embed_dim=384, # Openai embedding dimension
)

In [None]:
# Build an Ingestion Pipeline from Scratch
# 1. Load Data
!mkdir data
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"

In [None]:
from pathlib import Path
from llama_index.readers.file import PyMuPDFReader

In [None]:
loader = PyMuPDFReader()
documents = loader.load(file_path=Path("data/llama2.pdf"))

In [None]:
# 2. Use a Text Splitter to split the documents
from llama_index.core.node_parser import SentenceSplitter


In [None]:
text_parser = SentenceSplitter(
    chunk_size=1024,
    # separator = "",
)


In [None]:
text_chunks = []
# maintain relationship with source doc index, to help inject doc metadata in (3)

doc_idxs = []
for doc_idx, doc in enumerate(documents):
    cur_text_chunks = text_parser.split_text(doc.text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

In [None]:
# 3. Manually Construct Nodes from Text Chunks
from llama_index.core.schema import TextNode

nodes = []
for idx, text_chunk in enumerate(text_chunks):
    nodes = TextNode(
        text = text_chunk,
    )
    src_doc = documents[doc_idxs[idx]]
    node.metadata = src_doc.metadata   
    nodes.append(node)

In [None]:
#4. Generate Embeddings for each Node
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [None]:
# Load Nodes into a Vector Store
vector_store.add(nodes)

In [None]:
# Build Retrieval Pipeline from Scratch
query_str = "Can you tell me about the key concepts for safety finetuning"

In [None]:
#1. Genearate a Query Embedding
query_embedding = embed_model.get_query_embedding(query_str)

In [None]:
#2.Query the Vector Database
# construct vector store query
from llama_index.core.vector_stores import VectorStoreQuery

query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"

vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)


In [None]:
# returns a VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)
print(query_result.nodes[0].get_content())


In [None]:
# 3 Parse Result into a Set of Nodes
from llama_index.core.schema import NodeWithScore
from typing import Optional

nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
    score: Optional[float] = None
    if query_result.similarities is not None:
        score = query_result.similarities[index]
    nodes_with_scores.append(NodeWithScore(node=node, score=score))

In [None]:
# Put into a Retirever
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetirever
from typing import Any, List

class VectorDBRetriever(BaseRetriever):
    """Retriever over a postgres vector store."""

    def __init__(
        self,
        vector_store: PGVectorStore,
        embed_model: Any,
        query_mode: str = "default",
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._query_mode = query_mode
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        query_embedding = embed_model.get_query_embedding(
            query_bundle.query_str
        )
        vector_store_query = VectorStoreQuery(
            query_embedding=query_embedding,
            similarity_top_k=self._similarity_top_k,
            mode=self._query_mode,
        )
        query_result = vector_store.query(vector_store_query)

        nodes_with_scores = []
        for index, node in enumerate(query_result.nodes):
            score: Optional[float] = None
            if query_result.similarities is not None:
                score = query_result.similarities[index]
            nodes_with_scores.append(NodeWithScore(node=node, score=score))

        return nodes_with_scores

In [None]:
retriever = VectorDBRetriever(
    vector_store, embed_model, query_mode="default", similarity_top_k=2
)

In [None]:
# Plug this into RetrieverQueryEngine to synthesize a response

from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

In [None]:
query_str = "How does Llama 2 perform compared to other open-source models?"

response = query_engine.query(query_str)

In [None]:
print(str(response))

In [None]:
print(response.source_nodes[0].get_content())
