# Episode Pagination Implementation

In [1]:
def paginate_text(text, min_words=200, max_words=400):
# text -> input text
# min_words -> min no. of words per page
# max_words -> max no. of words in page

    paragraphs = text.split('\n\n')
    # This assumes that paragraphs are separated by two newline characters

    pages = []                # List of Pages
    current_page = []         # Temporary list to collect paragraphs for the current page
    current_word_count = 0    # Tracks how many words are in the current page

    for para in paragraphs:
        para_word_count = len(para.split())     # counts how many words are in current para

        if current_word_count + para_word_count > max_words and current_word_count >= min_words:
            pages.append('\n\n'.join(current_page))
            current_page = [para]
            current_word_count = para_word_count
        else:
            current_page.append(para)
            current_word_count += para_word_count

    if current_page:
        pages.append('\n\n'.join(current_page))
    return pages

In [2]:
from google.colab import files
sample = files.upload()

Saving SampleTextFile_10kb.txt to SampleTextFile_10kb.txt


In [3]:
filename = list(sample.keys())[0]  # gets the uploaded file name
with open(filename, 'r') as file:
    content = file.read()

In [4]:
pages = paginate_text(content)

In [5]:
for i, page in enumerate(pages):
    print(f"--- Page {i+1} ---\n")
    print(page)
    print("\n\n")

--- Page 1 ---

Mary Teresa Bojaxhiu (born Anjezë Gonxhe Bojaxhiu, Albanian: [aˈɲɛzə ˈɡɔndʒɛ bɔjaˈdʒi.u]; 26 August 1910 – 5 September 1997), better known as Mother Teresa or Saint Mother Teresa, was an Albanian-Indian Catholic nun, founder of the Missionaries of Charity, and a Catholic saint. Born in Skopje, then part of the Ottoman Empire, she was raised in a devoutly Catholic family. At the age of 18, she moved to Ireland to join the Sisters of Loreto and later to India, where she lived most of her life and carried out her missionary work. On 4 September 2016, she was canonised by the Catholic Church as Saint Teresa of Calcutta. The anniversary of her death, 5 September, is now observed as her feast day.

Mother Teresa founded the Missionaries of Charity, a religious congregation that was initially dedicated to serving "the poorest of the poor" in the slums of Calcutta. Over the decades, the congregation grew to operate in over 133 countries, as of 2012, with more than 4,500 nuns ma

# NarrativeQA

In [10]:
def create_narrativeqa_gists(pages):
    """
    Creates gist memories for NarrativeQA content (books/movie scripts).
    Each gist captures key story elements: characters, actions, and locations.

    Args:
        pages: List of text chunks from pagination

    Returns:
        List of gist strings with page references
    """
    gists = []

    for i, page in enumerate(pages):
        # Extract story elements
        characters = set()
        actions = []
        locations = []

        # Process each sentence
        sentences = [s.strip() for s in page.split('.') if s.strip()]
        for sent in sentences:
            # Character detection (e.g., "Alice said" or "Bob's hat")
            if "'s" in sent or " said" in sent:
                character = sent.split()[0]  # First word is often the name
                characters.add(character)

            # Action detection (e.g., "went to" or "opened the")
            action_verbs = ['went', 'entered', 'opened', 'ran', 'shouted']
            if any(verb in sent for verb in action_verbs):
                actions.append(sent)

            # Location detection (e.g., "in the castle" or "at home")
            location_prepositions = [' in ', ' at ', ' on ', ' near ']
            if any(prep in sent for prep in location_prepositions):
                locations.append(sent.split()[-1])  # Last word often the location

        # Build the gist
        gist_parts = []
        if characters:
            gist_parts.append(f"Characters: {', '.join(characters)}")
        if actions:
            gist_parts.append(f"Action: {actions[0]}")
        if locations:
            gist_parts.append(f"Location: {locations[0]}")

        gist = ". ".join(gist_parts) if gist_parts else "Story progression"
        gists.append(f"(Page {i}) {gist}")

    return gists

# QMSum

In [11]:
def create_qmsum_gists(pages):
    """
    Creates gist memories for QMSum meeting transcripts.
    Focuses on extracting decisions, action items, and key discussion points.

    Args:
        pages: List of meeting transcript chunks from pagination

    Returns:
        List of gist strings with page references
    """
    gists = []

    for i, page in enumerate(pages):
        # Initialize storage for meeting elements
        decisions = []
        action_items = []
        key_points = []

        # Process each line of the transcript
        lines = [line.strip() for line in page.split('\n') if line.strip()]
        for line in lines:
            lower_line = line.lower()

            # Decision detection (e.g., "We agree to...")
            if any(phrase in lower_line for phrase in ['agree', 'decide', 'conclude', 'resolution']):
                decisions.append(line)

            # Action item detection (e.g., "ACTION:...")
            elif any(phrase in lower_line for phrase in ['action', 'task', 'todo', 'assign']):
                action_items.append(line)

            # Key point detection (e.g., "Important:...")
            elif any(phrase in lower_line for phrase in ['important', 'key point', 'note that']):
                key_points.append(line)

        # Build the gist with prioritized information
        gist_parts = []
        if decisions:
            gist_parts.append(f"Decisions: {decisions[0]}")
        if action_items:
            gist_parts.append(f"Actions: {action_items[0]}")
        if key_points and not (decisions or action_items):  # Only include if no decisions/actions
            gist_parts.append(f"Key Point: {key_points[0]}")

        gist = " | ".join(gist_parts) if gist_parts else "Meeting discussion"
        gists.append(f"(Page {i}) {gist}")

    return gists

# QuALITY

In [12]:
def create_quality_gists(pages):
    """
    Creates gist memories for QuALITY research articles.
    Focuses on extracting: thesis statements, key evidence, and conclusions.

    Args:
        pages: List of article chunks from pagination

    Returns:
        List of gist strings with page references
    """
    gists = []

    for i, page in enumerate(pages):
        # Initialize storage for academic elements
        thesis = None
        evidence = []
        conclusions = []

        # Process each paragraph
        paragraphs = [p.strip() for p in page.split('\n\n') if p.strip()]
        for para in paragraphs:
            first_sentence = para.split('.')[0] + '.'  # Get first complete sentence

            # Thesis detection (usually in first paragraphs)
            if not thesis and any(phrase in para.lower() for phrase in
                               ['this paper', 'we argue', 'hypothesize']):
                thesis = first_sentence[:250]  # Truncate long sentences

            # Evidence detection (data, studies, references)
            elif any(phrase in para.lower() for phrase in
                    ['study shows', 'data suggest', ' p < ', 'n=']):
                evidence.append(first_sentence)

            # Conclusion detection
            elif any(phrase in para.lower() for phrase in
                    ['conclusion', 'in summary', 'results indicate']):
                conclusions.append(first_sentence)

        # Build the gist with academic structure
        gist_parts = []
        if thesis:
            gist_parts.append(f"Thesis: {thesis}")
        if evidence:
            gist_parts.append(f"Evidence: {evidence[0]}")
        if conclusions:
            gist_parts.append(f"Conclusion: {conclusions[-1]}")  # Use last conclusion

        gist = " | ".join(gist_parts) if gist_parts else "Academic content"
        gists.append(f"(Page {i}) {gist}")

    return gists

# Interactive Lookup

In [13]:
def interactive_lookup(pages, gists):
    """
    Provides an interactive CLI for users to look up a specific page and its gist.

    Args:
        pages: List of paginated text content.
        gists: List of gists corresponding to the paginated pages.

    Returns:
        None (prints content interactively).
    """
    total_pages = len(pages)

    print("\nWelcome to the Interactive Look-Up!")
    print(f"Total pages available: {total_pages}\n")
    print("Type a page number (0 to {}) to view the content and its gist.".format(total_pages - 1))
    print("Type 'all' to view all gists. Type 'exit' to quit.\n")

    while True:
        user_input = input("Enter your choice: ").strip().lower()

        if user_input == 'exit':
            print("Exiting interactive look-up. Goodbye!")
            break

        elif user_input == 'all':
            print("\n--- All Gists ---")
            for gist in gists:
                print(gist)
            print("\n")

        elif user_input.isdigit():
            page_num = int(user_input)
            if 0 <= page_num < total_pages:
                print(f"\n--- Page {page_num} Content ---\n{pages[page_num]}")
                print(f"\n--- Gist ---\n{gists[page_num]}\n")
            else:
                print("Invalid page number. Try again.\n")

        else:
            print("Invalid input. Please enter a page number, 'all', or 'exit'.\n")

In [15]:
if __name__ == "__main__":
    gists = create_narrativeqa_gists(pages)

    interactive_lookup(pages, gists)


Welcome to the Interactive Look-Up!
Total pages available: 3

Type a page number (0 to 2) to view the content and its gist.
Type 'all' to view all gists. Type 'exit' to quit.

Enter your choice: 1

--- Page 1 Content ---
She was the youngest child of Nikollë and Dranafile Bojaxhiu (Bernai). Her father, who was involved in Albanian-community politics in Ottoman Macedonia, was probably poisoned, an act attributed to Serbian agents, after he had visited Belgrade for a political meeting in 1919 when she was eight years old. He was born in Prizren (today in Kosovo), however, his family was from Mirdita (present-day Albania). Her mother may have been from a village near Gjakova, believed by her offspring to be Bishtazhin.

According to a biography by Joan Graff Clucas, Anjezë was in her early years when she became fascinated by stories of the lives of missionaries and their service in Bengal; by age 12, she was convinced that she should commit herself to religious life. Her resolve strength