# Chunk Dreaming

In [1]:
import nest_asyncio

nest_asyncio.apply()

import os
import openai

In [2]:
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY_HERE"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [2]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.schema import MetadataMode

In [3]:
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

We also show how to instantiate the `SummaryExtractor` and `QuestionsAnsweredExtractor`.

In [10]:
from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    SummaryExtractor,
    QuestionsAnsweredExtractor,
)
from llama_index.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

metadata_extractor = MetadataExtractor(
    extractors=[
        QuestionsAnsweredExtractor(questions=3, llm=llm),
        SummaryExtractor(summaries=["prev", "self"], llm=llm),
    ],
)

node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    # metadata_extractor=metadata_extractor,
)

In [11]:
from llama_index import SimpleDirectoryReader

We first load in the Great Gatsby

In [12]:
docs = SimpleDirectoryReader(input_files=['../../../examples/gatsby/data/gatsby_part.txt']).load_data()

In [13]:
nodes = node_parser.get_nodes_from_documents(docs)

In [14]:
# take just the first 10 nodes for testing
nodes = nodes[:10]

In [15]:
# process nodes with metadata extractor
metadata_extractor.process_nodes(nodes)

Extracting questions:   0%|          | 0/10 [00:00<?, ?it/s]

Extracting summaries:   0%|          | 0/10 [00:00<?, ?it/s]

[TextNode(id_='54167618-0fe3-4870-aff9-83ca0f65c0ac', embedding=None, metadata={'questions_this_excerpt_can_answer': "1. Why did Gatsby's career as Trimalchio come to an end?\n2. Why did Gatsby dismiss all of his servants and replace them with new ones?\n3. Who are the new people that Gatsby hired as servants and why did he hire them?", 'section_summary': "In this section, the narrator, Nick Carraway, notices that the lights in Gatsby's house are not on one Saturday night, signaling the end of Gatsby's extravagant parties. When Nick goes to check on Gatsby, he is met by an unfamiliar butler who is rude and uncooperative. Nick learns from his Finn (a servant) that Gatsby had fired all his previous servants and replaced them with new ones who do not go into town and are not considered traditional servants. Gatsby later calls Nick and explains that he fired his servants to prevent gossip and that Daisy often visits him in the afternoons. Gatsby also mentions that the new people in his hou

In [24]:
print(nodes[1].get_content(metadata_mode="all"))

[Excerpt from document]
questions_this_excerpt_can_answer: 1. Why did Gatsby fire all of his servants?
2. Who are the people that Wolfshiem wanted to do something for?
3. What was the reason for Daisy's relief when she found out the narrator was coming to lunch at her house?
prev_section_summary: In this section, the narrator, Nick Carraway, notices that the lights in Gatsby's house are not on one Saturday night, signaling the end of Gatsby's extravagant parties. When Nick goes to check on Gatsby, he is met by an unfamiliar butler who is rude and uncooperative. Nick learns from his Finn (a servant) that Gatsby had fired all his previous servants and replaced them with new ones who do not go into town and are not considered traditional servants. Gatsby later calls Nick and explains that he fired his servants to prevent gossip and that Daisy often visits him in the afternoons. Gatsby also mentions that the new people in his house are siblings whom his associate, Wolfshiem, wanted to help

In [8]:
from llama_index.utils import get_tqdm_iterable

queue = get_tqdm_iterable(list(range(10)), True, "tmp")
for node in queue:
    print(node)

tmp:   0%|          | 0/10 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
