In [None]:
!pip install llama-index-embeddings-openai
!pip install llama-index-vector-stores-pinecone
!pip install llama-index-llms-openai
!pip install llama-index

In [None]:
!pip install python-dotenv pinecone llama-index pymupdf

## Setup
### We build an empty Pinecone Index, and define the necessary LlamaIndex wrappers/abstractions so that we can start loading data into Pinecone.

In [7]:
from pinecone import Pinecone, Index, ServerlessSpec
import os

In [8]:
api_key = os.environ["pinecone_api_key"]
pc = Pinecone(api_key=api_key)

In [9]:
index_name = "llamaindex-rag-fs"

In [10]:
# dimensions are for text-embedding-ada-002
if index_name not in pc.list_indexes().names():
    pc.create_index(
        index_name,
        dimension=1536,
        metric="euclidean",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

In [11]:
pinecone_index = pc.Index(index_name)

### Create PineconeVectorStore
Simple wrapper abstraction to use in LlamaIndex. Wrap in StorageContext so we can easily load in Nodes.

In [13]:
from llama_index.vector_stores.pinecone import PineconeVectorStore

In [14]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

## Build an Ingestion Pipeline from Scratch
We show how to build an ingestion pipeline as mentioned in the introduction.

Note that steps (2) and (3) can be handled via our NodeParser abstractions, which handle splitting and node creation.

For the purposes of this tutorial, we show you how to create these objects manually.

### 1. Load Data¶

In [None]:
!mkdir data
!wget --user-agent "Mozilla" "https://arxiv.org/pdf/2307.09288.pdf" -O "data/llama2.pdf"

In [15]:
import fitz

In [16]:
file_path = "./data/llama2.pdf"
doc = fitz.open(file_path)

### 2. Use a Text Splitter to Split Documents
Here we import our SentenceSplitter to split document texts into smaller chunks, while preserving paragraphs/sentences as much as possible.

In [17]:
from llama_index.core.node_parser import SentenceSplitter
text_parser = SentenceSplitter(
    chunk_size=1024,
    # separator=" ",
)

In [18]:
text_chunks = []

doc_idxs = []
for doc_idx, page in enumerate(doc):
    page_text = page.get_text("text")
    cur_text_chunks = text_parser.split_text(page_text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

### 3. Manually Construct Nodes from Text Chunks
We convert each chunk into a TextNode object, a low-level data abstraction in LlamaIndex that stores content but also allows defining metadata + relationships with other Nodes.

We inject metadata from the document into each node.

This essentially replicates logic in our SentenceSplitter.

In [19]:
from llama_index.core.schema import TextNode

In [20]:
nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
    )
    src_doc_idx = doc_idxs[idx]
    src_page = doc[src_doc_idx]
    nodes.append(node)

In [52]:
print(nodes[2].metadata)

{}


In [21]:
# print a sample node
print(nodes[0].get_content(metadata_mode="all"))

Llama 2: Open Foundation and Fine-Tuned Chat Models
Hugo Touvron∗
Louis Martin†
Kevin Stone†
Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra
Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen
Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller
Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou
Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev
Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich
Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra
Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi
Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang
Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang
Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic
Sergey Edunov

### [Optional] 4. Extract Metadata from each Node
We extract metadata from each Node using our Metadata extractors.

This will add more metadata to each Node.

This step — using metadata extractors like TitleExtractor and QuestionsAnsweredExtractor — is optional because it enhances the searchability and explainability of your document chunks, but is not strictly required for retrieval or embedding.


In [None]:
from llama_index.core.extractors import (
    QuestionsAnsweredExtractor,
    TitleExtractor,
)
from llama_index.core.ingestion import IngestionPipeline
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo",api_key=os.environ['openai_api_key'])

extractors = [
    TitleExtractor(nodes=5, llm=llm),
    QuestionsAnsweredExtractor(questions=3, llm=llm),
]

pipeline = IngestionPipeline(
    transformations=extractors,
)
nodes = await pipeline.arun(nodes=nodes, in_place=False)

print(nodes[0].metadata)


### 5. Generate Embeddings for each Node¶
Generate document embeddings for each Node using our OpenAI embedding model (text-embedding-ada-002).

Store these on the embedding property on each Node.

In [22]:
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding(api_key=os.environ['openai_api_key'])

In [None]:
import time 

for node in nodes:
    time.sleep(1)  
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [None]:
texts = [node.get_content(metadata_mode="all") for node in nodes]

# Split into chunks of 10
for i in range(0, len(texts), 10):
    batch = texts[i:i+10]
    embeddings = embed_model.get_text_embedding_batch(batch)
    for node, embedding in zip(nodes[i:i+10], embeddings):
        node.embedding = embedding


Retrying llama_index.embeddings.openai.base.OpenAIEmbedding._get_text_embeddings.<locals>._retryable_get_embeddings in 0.3815429388645212 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}.


### 6. Load Nodes into a Vector Store
We now insert these nodes into our PineconeVectorStore.

NOTE: We skip the VectorStoreIndex abstraction, which is a higher-level abstraction that handles ingestion as well. We use VectorStoreIndex in the next section to fast-track retrieval/querying.

In [None]:
vector_store.add(nodes)

## Retrieve and Query from the Vector Store
Now that our ingestion is complete, we can retrieve/query this vector store.

NOTE: We can use our high-level VectorStoreIndex abstraction here. See the next section to see how to define retrieval at a lower-level!

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext

In [None]:
index = VectorStoreIndex.from_vector_store(vector_store)

In [None]:
query_engine = index.as_query_engine()

In [None]:
query_str = "Can you tell me about the key concepts for safety finetuning"

In [None]:
response = query_engine.query(query_str)

In [None]:
print(str(response))