In [1]:
import os
from dotenv import load_dotenv
import pinecone
from llama_index.vector_stores import PineconeVectorStore
import fitz

load_dotenv(dotenv_path='.env')
api_key = os.environ["PINECONE_API_KEY"]
environment = os.environ["PINECONE_ENVIRONMENT"]
pinecone.init(api_key=api_key, environment=environment)

  from tqdm.autonotebook import tqdm


In [2]:
index_name = "llamaindex-rag-fs"
pinecone.delete_index(index_name)
pinecone.create_index(
    index_name, dimension=1536, metric='euclidean', pod_type='p1'
)

In [3]:
pinecone_index = pinecone.Index(index_name=index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [4]:
file_path = "./data/llama2.pdf"
doc = fitz.open(file_path)

In [5]:
type(doc)

fitz.fitz.Document

In [6]:
for doc_idx, page in enumerate(doc):
    print(doc_idx)
    print(page)

0
page 0 of ./data/llama2.pdf
1
page 1 of ./data/llama2.pdf
2
page 2 of ./data/llama2.pdf
3
page 3 of ./data/llama2.pdf
4
page 4 of ./data/llama2.pdf
5
page 5 of ./data/llama2.pdf
6
page 6 of ./data/llama2.pdf
7
page 7 of ./data/llama2.pdf
8
page 8 of ./data/llama2.pdf
9
page 9 of ./data/llama2.pdf
10
page 10 of ./data/llama2.pdf
11
page 11 of ./data/llama2.pdf
12
page 12 of ./data/llama2.pdf
13
page 13 of ./data/llama2.pdf
14
page 14 of ./data/llama2.pdf
15
page 15 of ./data/llama2.pdf
16
page 16 of ./data/llama2.pdf
17
page 17 of ./data/llama2.pdf
18
page 18 of ./data/llama2.pdf
19
page 19 of ./data/llama2.pdf
20
page 20 of ./data/llama2.pdf
21
page 21 of ./data/llama2.pdf
22
page 22 of ./data/llama2.pdf
23
page 23 of ./data/llama2.pdf
24
page 24 of ./data/llama2.pdf
25
page 25 of ./data/llama2.pdf
26
page 26 of ./data/llama2.pdf
27
page 27 of ./data/llama2.pdf
28
page 28 of ./data/llama2.pdf
29
page 29 of ./data/llama2.pdf
30
page 30 of ./data/llama2.pdf
31
page 31 of ./data/llama2.

In [7]:
from datetime import datetime
last = datetime.now()

In [8]:
type(last)

datetime.datetime

In [9]:
current = datetime.now()
delta = current - last
print(delta)

0:00:00.023734


In [10]:
def time_diff(last:datetime, now:datetime) -> str:
    delta = now - last
    # Calculate hours, minutes, and seconds
    hours, remainder = divmod(delta.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{hours}h{minutes}m{seconds}s"

In [11]:
print(time_diff(last=last, now=datetime.now()))

0h0m0s


In [12]:
from llama_index.node_parser import SentenceSplitter

In [13]:
# 2. Use a Text Splitter to Split Documents
from llama_index.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(
    chunk_size=1024
)

text_chunks = []
doc_idxs = []
for doc_idx, page in enumerate(doc):
    print(f'Start process page {doc_idx + 1}')
    start_time = datetime.now()
    
    page_text = page.get_text("text")
    cur_text_chunk = text_splitter.split_text(page_text)
    text_chunks.extend(cur_text_chunk)
    doc_idxs.extend([doc_idx] * len(cur_text_chunk))
    
    print(f'Finish process page {doc_idx + 1}')
    print(f'It takes {time_diff(start_time, datetime.now())} to process page {doc_idx + 1}')

Start process page 1
Finish process page 1
It takes 0h0m0s to process page 1
Start process page 2
Finish process page 2
It takes 0h0m0s to process page 2
Start process page 3
Finish process page 3
It takes 0h0m0s to process page 3
Start process page 4
Finish process page 4
It takes 0h0m0s to process page 4
Start process page 5
Finish process page 5
It takes 0h0m0s to process page 5
Start process page 6
Finish process page 6
It takes 0h0m0s to process page 6
Start process page 7
Finish process page 7
It takes 0h0m0s to process page 7
Start process page 8
Finish process page 8
It takes 0h0m0s to process page 8
Start process page 9
Finish process page 9
It takes 0h0m0s to process page 9
Start process page 10
Finish process page 10
It takes 0h0m0s to process page 10
Start process page 11
Finish process page 11
It takes 0h0m0s to process page 11
Start process page 12
Finish process page 12
It takes 0h0m0s to process page 12
Start process page 13
Finish process page 13
It takes 0h0m0s to pro

In [14]:
# 3. Manually Construct Nodes from Text Chunks
from llama_index.schema import TextNode
nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
    )
    src_doc_idx = doc_idxs[idx]
    src_page = doc[src_doc_idx]
    nodes.append(node)

In [15]:
print(nodes[0].metadata)

{}


In [16]:
print(nodes[0].get_content(metadata_mode="all"))

Llama 2: Open Foundation and Fine-Tuned Chat Models
Hugo Touvron∗
Louis Martin†
Kevin Stone†
Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra
Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen
Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller
Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou
Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev
Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich
Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra
Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi
Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang
Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang
Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic
Sergey Edunov

In [17]:
print(nodes[1].get_content(metadata_mode="all"))

Contents
1
Introduction
3
2
Pretraining
5
2.1
Pretraining Data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
5
2.2
Training Details . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
5
2.3
Llama 2 Pretrained Model Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
7
3
Fine-tuning
8
3.1
Supervised Fine-Tuning (SFT) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
9
3.2
Reinforcement Learning with Human Feedback (RLHF)
. . . . . . . . . . . . . . . . . . . . .
9
3.3
System Message for Multi-Turn Consistency . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
16
3.4
RLHF Results
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
17
4
Safety
20
4.1
Safety in Pretraining
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
20
4.2
Safety Fine-Tuning
. . . . . . . . . . . . . . . . . . 

In [18]:
print(nodes[5].get_content(metadata_mode="all"))

Figure 3: Safety human evaluation results for Llama 2-Chat compared to other open-source and closed-
source models. Human raters judged model generations for safety violations across ~2,000 adversarial
prompts consisting of both single and multi-turn prompts. More details can be found in Section 4.4. It is
important to caveat these safety results with the inherent bias of LLM evaluations due to limitations of the
prompt set, subjectivity of the review guidelines, and subjectivity of individual raters. Additionally, these
safety evaluations are performed using content standards that are likely to be biased towards the Llama
2-Chat models.
We are releasing the following models to the general public for research and commercial use‡:
1. Llama 2, an updated version of Llama 1, trained on a new mix of publicly available data. We also
increased the size of the pretraining corpus by 40%, doubled the context length of the model, and
adopted grouped-query attention (Ainslie et al., 2023). We are

In [19]:
from llama_index.extractors import (
    QuestionsAnsweredExtractor,
    TitleExtractor,
)
from llama_index.ingestion import IngestionPipeline
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")

extractors = [
    TitleExtractor(nodes=5, llm=llm),
    QuestionsAnsweredExtractor(questions=3, llm=llm),
]

In [20]:
pipeline = IngestionPipeline(
    transformations=extractors,
)
nodes = pipeline.run(nodes=nodes, in_place=False, show_progress=True)

RuntimeError: asyncio.run() cannot be called from a running event loop

In [21]:
import asyncio

asyncio.get_event_loop().is_running()

True