In [1]:
%load_ext autoreload
%autoreload 2

import os
from dotenv import load_dotenv
load_dotenv()

from pydantic import BaseModel
from typing import List, Dict, Union
import re
import json


import sys
sys.path.append('multimodal_processing_pipeline')
sys.path.append('search')

from IPython.display import Markdown, display

from utils.file_utils import * 
from utils.text_utils import *
from utils.openai_utils import *
from utils.data_models import *
from pdf_ingestion_pipeline import *

In [4]:
def process_document(pdf_path, 
                     output_directory, 
                     multimodal_model = MulitmodalProcessingModelInfo(model_name="o1", reasoning_efforts="high"), 
                     text_model=TextProcessingModelnfo(model_name="o1", reasoning_efforts="high")
                     ):
    pipeline = PDFIngestionPipeline(
        pdf_path=pdf_path, 
        output_directory=output_directory, 
        multimodal_model=multimodal_model, 
        text_model=text_model
    )

    # Process the PDF
    document_content = pipeline.process_pdf()

    # Save text twins
    for page in document_content.pages:
        pipeline.save_page_text_twin(page)

    pipeline.save_text_twin(document_content)    


    # Condense text twins
    for page in document_content.pages:
        pipeline.condense_page_text(page)

    pipeline.condense_text(document_content)    

    return document_content

In [5]:
# Example usage:
pdf_path = "sample_data/1_London_Brochure.pdf"
output_directory = "output_london_4o"

document_content = process_document(pdf_path=pdf_path, 
                                    output_directory=output_directory,
                                    multimodal_model=MulitmodalProcessingModelInfo(), 
                                    text_model=TextProcessingModelnfo())

Processing page 1/2...
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\process_extracted_text_prompt.txt


Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\text\page_1.txt
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\image_description_prompt.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\images\page_1_photo_1.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\images\page_1_photo_2.txt


Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\table_description_prompt.txt


Processing page 2/2...
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\process_extracted_text_prompt.txt


Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\text\page_2.txt
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\image_description_prompt.txt


Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\table_description_prompt.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\tables\page_2_table_1.txt


Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\combined\page_1_twin.txt
Page 1 text twin saved at: output_london_4o\combined\page_1_twin.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\combined\page_2_twin.txt
Page 2 text twin saved at: output_london_4o\combined\page_2_twin.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\combined\text_twin.md
Text twin saved at: output_london_4o\combined\text_twin.md
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\document_condensation_prompt.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_4o\combined\page_1_condensed.md
Page 1 condensed text saved at: output_london_4o\combined\page_1_condensed.md
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_

In [2]:
# Example usage:
pdf_path = "sample_data/1_London_Brochure.pdf"
output_directory = "output_london_o1"

pipeline = PDFIngestionPipeline(
    pdf_path=pdf_path, 
    output_directory=output_directory, 
    multimodal_model=MulitmodalProcessingModelInfo(model_name="o1", reasoning_efforts="high"), 
    text_model=TextProcessingModelnfo(model_name="o1", reasoning_efforts="high")
)

# Process the PDF
document_content = pipeline.process_pdf()

# Save text twins
for page in document_content.pages:
    pipeline.save_page_text_twin(page)

pipeline.save_text_twin(document_content)    


# Condense text twins
for page in document_content.pages:
    pipeline.condense_page_text(page)

pipeline.condense_text(document_content)    



Processing page 1/2...
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\process_extracted_text_prompt.txt


Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\text\page_1.txt
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\image_description_prompt.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\images\page_1_photo_1.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\images\page_1_photo_2.txt


Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\table_description_prompt.txt


Processing page 2/2...
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\process_extracted_text_prompt.txt


Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\text\page_2.txt
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\image_description_prompt.txt


Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\table_description_prompt.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\tables\page_2_table_1.txt


Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\combined\page_1_twin.txt
Page 1 text twin saved at: output_london_o1\combined\page_1_twin.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\combined\page_2_twin.txt
Page 2 text twin saved at: output_london_o1\combined\page_2_twin.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\combined\text_twin.md
Text twin saved at: output_london_o1\combined\text_twin.md
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\prompts\document_condensation_prompt.txt
Writing file to full path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\output_london_o1\combined\page_1_condensed.md
Page 1 condensed text saved at: output_london_o1\combined\page_1_condensed.md
Reading file from path: c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_

In [3]:
display(Markdown(document_content.full_text))

[autoreload of utils.openai_utils failed: Traceback (most recent call last):
  File "c:\Users\selhousseini\.conda\envs\lightmmrag\Lib\site-packages\IPython\extensions\autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "c:\Users\selhousseini\.conda\envs\lightmmrag\Lib\site-packages\IPython\extensions\autoreload.py", line 475, in superreload
    module = reload(module)
             ^^^^^^^^^^^^^^
  File "c:\Users\selhousseini\.conda\envs\lightmmrag\Lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 621, in _exec
  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "c:\Users\selhousseini\Documents\GitHub\mm_doc_proc\multimodal_processing_pipeline\utils\openai_utils.py", line 66, in <module>
    azure_endpoint = get_azure_endpoint(os.getenv('RESOURCE_4O')),
                

##### --- Page 1 ---

# Extracted Text

## Margie’s Travel Presents…

## London
London is the capital and most populous city of England and the United Kingdom. Standing on the River Thames in the south east of the island of Great Britain, London has been a major settlement for two millennia. It was founded by the Romans, who named it Londinium. London’s ancient core, the City of London, largely retains its 1.12-square-mile medieval boundaries.

Since at least the 19th century, London has also referred to the metropolis around this core, historically split between Middlesex, Essex, Surrey, Kent, and Hertfordshire, which today largely makes up Greater London, governed by the Mayor of London and the London Assembly.

## Mostly popular for:
Leisure, Outdoors, Historical, Arts  
& Culture

## Best time to visit:
Jun-Aug

## Averag Precipitation:
1.9 in

## Average Temperature:
56-67°F

## London Hotels
Margie’s Travel offers the following accommodation options in London:

The Buckingham Hotel  
Comfortable hotel close to major sights like Buckingham Palace, Regent’s Park, and Trafalgar  
Square.

The City Hotel  
Luxury rooms in the city, within walking distance of Tower Bridge and the Tower of London..

The Kensington Hotel  
Budget accommodation near Earl’s Court.

To book your trip to London, visit www.margiestravel.com


# Embedded Images:

### - Image 0:
A color photograph showing a large arched bridge spanning a wide river, with two stone towers connected by blue metal walkways. The bridge deck appears raised in the middle, and the sky above is cloudy.

Likely included to highlight a famous architectural landmark in London, serving as a recognizable symbol of the city.

The partially raised roadway demonstrates the bridge’s functional drawbridge aspect. The water in the foreground and the overcast sky lend a characteristic London atmosphere.

### - Image 1:
A color photograph featuring a prominent fountain in a public square, with water cascading into a large circular basin. Surrounding sculptures and statues are visible, and nearby buildings form the backdrop.

Possibly included to showcase another iconic city attraction, illustrating notable sites for visitors.

The fountain’s central position draws attention to the flowing water feature. The presence of statues and architectural surroundings underscores the square’s cultural and historical significance.

<br/>
<br/>
<img src="output_london_o1\images\page_1.png" alt="Page Number 1" width="300" height="425">




##### --- Page 2 ---

# Extracted Text

| Category                  | Information                                    |
|---------------------------|-----------------------------------------------|
| Country                   | United Kingdom                                |
| Capital Of                | England                                       |
| Currency                  | Pound Sterling (GBP)                          |
| Population (2021 census)  | Approximately 8.8 million                     |
| Famous For                | Historical landmarks, museums, cultural diversity |


# Tables:

### - Table 0:

| Category                  | Information                                      |
|---------------------------|--------------------------------------------------|
| Country                   | United Kingdom                                   |
| Capital Of                | England                                          |
| Currency                  | Pound Sterling (GBP)                             |
| Population (2021 census)  | Approximately 8.8 million                        |
| Famous For                | Historical landmarks, museums, cultural diversity|


<br/>
<br/>
<img src="output_london_o1\images\page_2.png" alt="Page Number 2" width="300" height="425">





In [4]:
display(Markdown(document_content.condensed_full_text))

Margie’s Travel Presents…

LONDON  
London is the capital and most populous city of England and the United Kingdom, standing on the River Thames in southeast Great Britain. It has been a major settlement for two millennia, founded by the Romans as Londinium. London’s ancient core, the City of London, retains its 1.12-square-mile medieval boundaries. Since at least the 19th century, “London” has referred to the surrounding metropolis, historically split among Middlesex, Essex, Surrey, Kent, and Hertfordshire, now forming Greater London, governed by the Mayor of London and the London Assembly.

Mostly popular for: Leisure, Outdoors, Historical, Arts & Culture  
Best time to visit: Jun–Aug  
Average Precipitation: 1.9 in  
Average Temperature: 56–67°F  

London Hotels (offered by Margie’s Travel):  
• The Buckingham Hotel – Comfortable, near Buckingham Palace, Regent’s Park, and Trafalgar Square.  
• The City Hotel – Luxury rooms within walking distance of Tower Bridge and the Tower of London.  
• The Kensington Hotel – Budget accommodation near Earl’s Court.  

To book your trip to London, visit www.margiestravel.com

Embedded Images:  
• Image 0: Color photograph of a large arched bridge with two stone towers connected by blue metal walkways. The roadway is partially raised over a wide river, under an overcast sky, illustrating its functional drawbridge aspect and a characteristic London atmosphere.  
• Image 1: Color photograph of a prominent fountain in a public square, water cascading into a circular basin, surrounded by statues and buildings, highlighting a culturally and historically significant site.

TABLE OF KEY DATA:  
┌───────────────────────────────┬───────────────────────────────────────────────┐
│ Category                      │ Information                                  │
├───────────────────────────────┼───────────────────────────────────────────────┤
│ Country                       │ United Kingdom                               │
│ Capital Of                    │ England                                      │
│ Currency                      │ Pound Sterling (GBP)                         │
│ Population (2021 census)      │ Approximately 8.8 million                    │
│ Famous For                    │ Historical landmarks, museums, cultural      │
│                               │ diversity                                    │
└───────────────────────────────┴───────────────────────────────────────────────┘

In [5]:
get_token_count(document_content.full_text), get_token_count(document_content.condensed_full_text)

(716, 479)