# Chunk Dreaming

In [1]:
import nest_asyncio

nest_asyncio.apply()

import os
import openai

In [2]:
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY_HERE"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [2]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.schema import MetadataMode

In [3]:
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

We also show how to instantiate the `SummaryExtractor` and `QuestionsAnsweredExtractor`.

In [4]:
from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    SummaryExtractor,
    QuestionsAnsweredExtractor,
)
from llama_index.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

metadata_extractor = MetadataExtractor(
    extractors=[
        QuestionsAnsweredExtractor(questions=3, llm=llm),
        SummaryExtractor(summaries=["prev", "self"], llm=llm),
    ],
    in_place=False
)

node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    # metadata_extractor=metadata_extractor,
)

In [5]:
from llama_index import SimpleDirectoryReader

We first load in the Great Gatsby

In [6]:
docs = SimpleDirectoryReader(input_files=['../../../examples/gatsby/data/gatsby_part.txt']).load_data()

In [7]:
nodes = node_parser.get_nodes_from_documents(docs)

In [8]:
# take just the first 8 nodes for testing
nodes = nodes[:8]

In [13]:
print(nodes[3].get_content(metadata_mode="all"))

the singing breeze of the fans.

“We can’t move,” they said together.

Jordan’s fingers, powdered white over their tan, rested for a moment
in mine.

“And Mr. Thomas Buchanan, the athlete?” I inquired.

Simultaneously I heard his voice, gruff, muffled, husky, at the hall
telephone.

Gatsby stood in the centre of the crimson carpet and gazed around with
fascinated eyes. Daisy watched him and laughed, her sweet, exciting
laugh; a tiny gust of powder rose from her bosom into the air.

“The rumour is,” whispered Jordan, “that that’s Tom’s girl on the
telephone.”

We were silent. The voice in the hall rose high with annoyance: “Very
well, then, I won’t sell you the car at all … I’m under no obligations
to you at all … and as for your bothering me about it at lunch time, I
won’t stand that at all!”

“Holding down the receiver,” said Daisy cynically.

“No, he’s not,” I assured her. “It’s a bona-fide deal. I happen to
know about it.”

Tom flung open the door, blocked out its space for a moment

In [10]:
# process nodes with metadata extractor
nodes_1 = metadata_extractor.process_nodes(nodes)

Extracting questions:   0%|          | 0/8 [00:00<?, ?it/s]

Extracting summaries:   0%|          | 0/8 [00:00<?, ?it/s]

In [None]:
tmp = metadata_extractor.extractors[0].extract(nodes)

In [12]:
print(nodes_1[3].get_content(metadata_mode="all"))

[Excerpt from document]
questions_this_excerpt_can_answer: 1. Who is the athlete referred to as Mr. Thomas Buchanan?
2. What is the rumour about Tom's girl on the telephone?
3. What is Daisy's reaction when Tom enters the room?
prev_section_summary: In this section, the narrator is on a train and overhears a conversation between the conductor and other passengers about the hot weather. The narrator receives their commutation ticket back with a stain from the conductor's hand. The scene then shifts to the Buchanan's house, where the butler informs them that it is too hot to touch the master's body. The butler directs the narrator and Gatsby to the salon, where they find Daisy and Jordan lying on a couch. The narrator asks about Mr. Buchanan, and simultaneously hears his voice on the telephone. Gatsby stands in the center of the room, fascinated, while Daisy watches him and laughs.
section_summary: In this section, the key topics include the strained relationship between Tom and Daisy, G

In [14]:
# 2nd pass: run metadata extractor again 

# process nodes with metadata extractor
nodes_2 = metadata_extractor.process_nodes(nodes_1)

Extracting questions:   0%|          | 0/8 [00:00<?, ?it/s]

Extracting summaries:   0%|          | 0/8 [00:00<?, ?it/s]

In [16]:
print(nodes_2[3].get_content(metadata_mode="all"))

[Excerpt from document]
questions_this_excerpt_can_answer: 1. Who is the athlete referred to as Mr. Thomas Buchanan?
2. What is the rumour about Tom's girl on the telephone?
3. What is Daisy's reaction when Tom enters the room?
prev_section_summary: In this section, the narrator is on a train and overhears a conversation between the conductor and other passengers about the hot weather. The narrator receives their commutation ticket back with a stain from the conductor's hand. The scene then shifts to the Buchanan's house, where the butler informs them that it is too hot to touch the master's body. The narrator and Gatsby are then led to the salon where they find Daisy and Jordan lying on a couch. The narrator asks about Mr. Buchanan, and simultaneously hears his voice on the telephone. Gatsby is fascinated by the surroundings and Daisy laughs.
section_summary: In this section, the key topics include the strained relationship between Tom and Daisy, Gatsby's fascination with Daisy, and t