In [1]:
import base64
import os
import io
from typing import Optional, Dict, Any, List, Tuple
import google.generativeai as genai
from PIL import Image
import fitz  # PyMuPDF for PDF processing
from pathlib import Path
import concurrent.futures
import threading
from tqdm import tqdm
import time
from dataclasses import dataclass
from queue import Queue
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@dataclass
class ProcessingConfig:
    """Configuration for parallel processing."""
    max_workers: int = 4  # Number of parallel workers
    batch_size: int = 3   # Number of pages to process in each batch
    timeout: int = 30     # Timeout for each API call in seconds
    retry_attempts: int = 3  # Number of retry attempts for failed pages

def find_all_pdf_files(input_dir: str) -> List[str]:
    """
    Find all PDF files in the input directory and its subdirectories.
    
    Args:
        input_dir: Path to the input directory
        
    Returns:
        List of paths to all PDF files found
    """
    pdf_files = []
    input_path = Path(input_dir)
    
    if not input_path.exists():
        print(f"Error: Input directory {input_dir} does not exist")
        return pdf_files
    
    # Walk through all subdirectories
    for root, dirs, files in os.walk(input_path):
        for file in files:
            if file.lower().endswith('.pdf'):
                pdf_path = os.path.join(root, file)
                pdf_files.append(pdf_path)
    
    return pdf_files

def encode_pdf(pdf_path: str) -> Optional[str]:
    """
    Encode the PDF to base64.
    
    Args:
        pdf_path: Path to the PDF file
        
    Returns:
        Base64 encoded string of the PDF or None if error
    """
    try:
        with open(pdf_path, "rb") as pdf_file:
            return base64.b64encode(pdf_file.read()).decode('utf-8')
    except FileNotFoundError:
        print(f"Error: The file {pdf_path} was not found.")
        return None
    except Exception as e:
        print(f"Error: {e}")
        return None

def process_single_page(args: Tuple[str, int, str, genai.GenerativeModel, ProcessingConfig]) -> Dict[str, Any]:
    """
    Process a single page with retry logic and error handling.
    
    Args:
        args: Tuple containing (pdf_path, page_num, api_key, model, config)
        
    Returns:
        Dictionary with page processing results
    """
    pdf_path, page_num, api_key, model, config = args
    
    # Configure Gemini for this thread
    genai.configure(api_key=api_key)
    
    try:
        # Open PDF and get specific page
        pdf_document = fitz.open(pdf_path)
        page = pdf_document[page_num]
        
        # Convert page to image with higher resolution
        pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
        img_data = pix.tobytes("png")
        
        # Create PIL Image
        image = Image.open(io.BytesIO(img_data))
        
        # Prepare prompt for Gemini
        prompt = """
        Please extract all text from this image. 
        Maintain the original formatting and structure.
        If there are tables, preserve the table structure.
        Return only the extracted text without any additional commentary.
        """
        
        # Retry logic for API calls
        for attempt in range(config.retry_attempts):
            try:
                response = model.generate_content([prompt, image], timeout=config.timeout)
                page_text = response.text
                
                pdf_document.close()
                return {
                    'page': page_num + 1,  # Convert to 1-indexed
                    'text': page_text,
                    'success': True,
                    'attempts': attempt + 1
                }
                
            except Exception as e:
                if attempt == config.retry_attempts - 1:
                    pdf_document.close()
                    return {
                        'page': page_num + 1,
                        'text': f"Error extracting text after {config.retry_attempts} attempts: {e}",
                        'success': False,
                        'attempts': config.retry_attempts
                    }
                time.sleep(1)  # Brief pause before retry
                
    except Exception as e:
        return {
            'page': page_num + 1,
            'text': f"Error processing page: {e}",
            'success': False,
            'attempts': 0
        }

def process_pdf_pages_parallel(pdf_path: str, api_key: str, start_page: int = 1, 
                             end_page: Optional[int] = None, config: ProcessingConfig = None) -> Dict[str, Any]:
    """
    Extract text from PDF using parallel processing for multiple pages.
    
    Args:
        pdf_path: Path to the PDF file
        api_key: Google API key for Gemini
        start_page: First page to extract (1-indexed, default: 1)
        end_page: Last page to extract (1-indexed, default: None for all pages)
        config: Processing configuration
        
    Returns:
        Dictionary containing extracted text and metadata
    """
    if config is None:
        config = ProcessingConfig()
    
    # Configure Gemini
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel('gemini-1.5-flash')
    
    # Open PDF to get total pages
    pdf_document = fitz.open(pdf_path)
    total_pages_in_pdf = len(pdf_document)
    pdf_document.close()
    
    # Validate page range
    if end_page is None:
        end_page = total_pages_in_pdf
    
    # Adjust for 0-indexed pages in PyMuPDF
    start_page_idx = start_page - 1
    end_page_idx = end_page - 1
    
    # Validate page range
    if start_page_idx < 0 or end_page_idx >= total_pages_in_pdf:
        print(f"Error: Page range {start_page}-{end_page} is invalid for PDF with {total_pages_in_pdf} pages")
        return None
    
    if start_page_idx > end_page_idx:
        print(f"Error: Start page ({start_page}) cannot be greater than end page ({end_page})")
        return None
    
    print(f"Starting parallel OCR extraction for {os.path.basename(pdf_path)}...")
    print(f"Total pages in PDF: {total_pages_in_pdf}")
    print(f"Extracting pages: {start_page} to {end_page}")
    print(f"Using {config.max_workers} parallel workers")
    print("-" * 50)
    
    # Create list of pages to process
    pages_to_process = list(range(start_page_idx, end_page_idx + 1))
    
    # Process pages in parallel
    extracted_text = []
    successful_pages = 0
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=config.max_workers) as executor:
        # Prepare arguments for each page
        futures_args = [
            (pdf_path, page_num, api_key, model, config) 
            for page_num in pages_to_process
        ]
        
        # Submit all tasks and track progress
        with tqdm(total=len(pages_to_process), desc=f"Processing {os.path.basename(pdf_path)}") as pbar:
            # Submit all tasks
            future_to_page = {
                executor.submit(process_single_page, args): args[1] 
                for args in futures_args
            }
            
            # Collect results as they complete
            for future in concurrent.futures.as_completed(future_to_page):
                result = future.result()
                extracted_text.append(result)
                
                if result['success']:
                    successful_pages += 1
                
                pbar.update(1)
                pbar.set_postfix({
                    'Success': f"{successful_pages}/{len(pages_to_process)}",
                    'Page': result['page']
                })
    
    # Sort results by page number
    extracted_text.sort(key=lambda x: x['page'])
    
    print("-" * 50)
    print(f"Parallel OCR extraction completed for {os.path.basename(pdf_path)}!")
    print(f"Successfully processed: {successful_pages}/{len(extracted_text)} pages")
    
    return {
        'pdf_path': pdf_path,
        'pdf_name': os.path.basename(pdf_path),
        'total_pages_extracted': len(extracted_text),
        'pages': extracted_text,
        'start_page': start_page,
        'end_page': end_page,
        'total_pages_in_pdf': total_pages_in_pdf,
        'successful_pages': successful_pages
    }

def process_multiple_pdfs_parallel(pdf_files: List[str], api_key: str, start_page: int = 1, 
                                 end_page: Optional[int] = None, config: ProcessingConfig = None) -> List[Dict[str, Any]]:
    """
    Process multiple PDF files in parallel.
    
    Args:
        pdf_files: List of PDF file paths
        api_key: Google API key for Gemini
        start_page: First page to extract (1-indexed, default: 1)
        end_page: Last page to extract (1-indexed, default: None for all pages)
        config: Processing configuration
        
    Returns:
        List of processing results for each PDF
    """
    if config is None:
        config = ProcessingConfig()
    
    results = []
    
    print(f"Processing {len(pdf_files)} PDF files in parallel...")
    print(f"Using {config.max_workers} parallel workers")
    print("=" * 60)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=min(config.max_workers, len(pdf_files))) as executor:
        # Submit all PDF processing tasks
        future_to_pdf = {
            executor.submit(process_pdf_pages_parallel, pdf_path, api_key, start_page, end_page, config): pdf_path
            for pdf_path in pdf_files
        }
        
        # Collect results as they complete
        with tqdm(total=len(pdf_files), desc="Processing PDFs") as pbar:
            for future in concurrent.futures.as_completed(future_to_pdf):
                pdf_path = future_to_pdf[future]
                try:
                    result = future.result()
                    results.append(result)
                    pbar.update(1)
                    pbar.set_postfix({
                        'PDF': os.path.basename(pdf_path),
                        'Success': f"{result['successful_pages']}/{result['total_pages_extracted']}" if result else "Failed"
                    })
                except Exception as e:
                    print(f"Error processing {pdf_path}: {e}")
                    results.append(None)
                    pbar.update(1)
    
    return results

def save_extracted_text_to_file(extracted_data: Dict[str, Any], output_path: str) -> None:
    """
    Save extracted text to a file.
    
    Args:
        extracted_data: Dictionary containing extracted text
        output_path: Path where to save the output file
    """
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(f"OCR Extraction Results\n")
        f.write(f"PDF: {extracted_data['pdf_name']}\n")
        f.write(f"Pages extracted: {extracted_data['start_page']} to {extracted_data['end_page']}\n")
        f.write(f"Total pages in PDF: {extracted_data['total_pages_in_pdf']}\n")
        f.write(f"Successfully processed: {extracted_data['successful_pages']}/{extracted_data['total_pages_extracted']} pages\n")
        f.write("=" * 50 + "\n\n")
        
        for page_data in extracted_data['pages']:
            f.write(f"Page {page_data['page']}")
            if not page_data['success']:
                f.write(" (FAILED)")
            f.write("\n")
            f.write("-" * 30 + "\n")
            f.write(page_data['text'])
            f.write("\n\n")

def process_all_pdfs_in_directory_parallel(input_dir: str, api_key: str, start_page: int = 1, 
                                         end_page: Optional[int] = None, config: ProcessingConfig = None) -> None:
    """
    Process all PDF files in the input directory using parallel processing.
    
    Args:
        input_dir: Path to the input directory
        api_key: Google API key for Gemini
        start_page: First page to extract (1-indexed, default: 1)
        end_page: Last page to extract (1-indexed, default: None for all pages)
        config: Processing configuration
    """
    if config is None:
        config = ProcessingConfig()
    
    # Find all PDF files
    pdf_files = find_all_pdf_files(input_dir)
    
    if not pdf_files:
        print(f"No PDF files found in {input_dir} or its subdirectories")
        return
    
    print(f"Found {len(pdf_files)} PDF file(s):")
    for i, pdf_file in enumerate(pdf_files, 1):
        print(f"{i}. {pdf_file}")
    print("=" * 60)
    
    # Process PDFs in parallel
    results = process_multiple_pdfs_parallel(pdf_files, api_key, start_page, end_page, config)
    
    # Save results
    successful_saves = 0
    for i, result in enumerate(results):
        if result:
            try:
                # Create output filename based on PDF name
                pdf_name = os.path.splitext(os.path.basename(result['pdf_path']))[0]
                output_filename = f"{pdf_name}_extracted_text.txt"
                output_path = os.path.join("output", output_filename)
                
                # Ensure output directory exists
                os.makedirs("output", exist_ok=True)
                
                # Save to file
                save_extracted_text_to_file(result, output_path)
                print(f"✓ Results saved to {output_path}")
                successful_saves += 1
            except Exception as e:
                print(f"✗ Error saving results for {result['pdf_name']}: {e}")
        else:
            print(f"✗ No results to save for PDF {i+1}")
    
    print(f"\nProcessing completed!")
    print(f"Successfully processed: {len([r for r in results if r])}/{len(results)} PDFs")
    print(f"Successfully saved: {successful_saves}/{len(results)} files")

# Usage example with parallel processing
if __name__ == "__main__":
    # Set your API key
    api_key = os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        print("Please set GOOGLE_API_KEY environment variable")
        exit(1)
    
    # Path to your input directory
    input_dir = "C:/MoovMedia/ocr_exploration/input"
    
    # Configure parallel processing
    config = ProcessingConfig(
        max_workers=4,      # Number of parallel workers
        batch_size=3,       # Pages per batch
        timeout=30,         # API timeout in seconds
        retry_attempts=3    # Retry attempts for failed pages
    )
    
    # Process all PDF files with parallel processing
    process_all_pdfs_in_directory_parallel(
        input_dir, 
        api_key, 
        start_page=1, 
        end_page=None,
        config=config
    )
    
    print("\nAll PDF files have been processed with parallel processing!")

  from .autonotebook import tqdm as notebook_tqdm


Found 8 PDF file(s):
1. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Información\Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf
2. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Información\tendencias-globales-de-capital-humano-2024.pdf
3. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Información\trendwatching-fastforward-theagenteconomy.pdf
4. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Información\trendwatching-snapshot-march-2025.pdf
5. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Licitaciones\Brief_cliente.pdf
6. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Licitaciones\IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf
7. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Licitaciones\OFICIAL Licitación Mallplaza - Moov.pdf
8. C:\MoovMedia\ocr_exploration\input\[GPT PLANNING]\Presentaciones\Licitaciones\Presentación Licitación.pdf
Processing 8 PDF 

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:   0%|          | 0/28 [00:00<?, ?it/s]

[A[A


[A[A[A
[A


Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:   4%|▎         | 1/28 [00:11<02:58,  6.61s/it]
[A

[A[A


Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:   4%|▎         | 1/28 [00:11<02:58,  6.61s/it, Success=0/28, Page=3]
[A

[A[A


Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:   7%|▋         | 2/28 [00:11<02:19,  5.37s/it, Success=0/28, Page=3]
[A

[A[A


Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:   7%|▋         | 2/28 [00:11<02:19,  5.37s/it, Success=0/28, Page=4]
[A

[A[A


Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  11%|█         | 3/28 [00:11<01:21,  3.25s/it, Success=0/28, Page=4]
[A

[A[A


Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  11%|█         | 3/28 [00:11<01:21,  3.25s/it, Success=0/28, Page=2]
[A

[A[A


Proc

--------------------------------------------------
Parallel OCR extraction completed for trendwatching-fastforward-theagenteconomy.pdf!
Successfully processed: 0/11 pages
Starting parallel OCR extraction for Brief_cliente.pdf...
Total pages in PDF: 67
Extracting pages: 1 to 67
Using 4 parallel workers
--------------------------------------------------



Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  39%|███▉      | 11/28 [00:16<00:14,  1.20it/s, Success=0/28, Page=10]


[A[A[A

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  39%|███▉      | 11/28 [00:23<00:14,  1.20it/s, Success=0/28, Page=11]
[A


[A[A[A

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  43%|████▎     | 12/28 [00:29<00:37,  2.34s/it, Success=0/28, Page=11]
[A


[A[A[A

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  43%|████▎     | 12/28 [00:29<00:37,  2.34s/it, Success=0/28, Page=13]
[A


[A[A[A

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  46%|████▋     | 13/28 [00:29<00:39,  2.65s/it, Success=0/28, Page=13]
[A


[A[A[A

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  46%|████▋     | 13/28 [00:29<00:39,  2.65s/it, Success=0/28, Page=12]
[A


[A[A[A

Processing Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf:  50%|█████     | 14/

--------------------------------------------------
Parallel OCR extraction completed for Estudio_Amor-en-tiempos-de-comunicacion-liquida_.pdf!
Successfully processed: 0/28 pages
Starting parallel OCR extraction for IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf...
Total pages in PDF: 32
Extracting pages: 1 to 32
Using 4 parallel workers
--------------------------------------------------


Processing trendwatching-snapshot-march-2025.pdf: 100%|██████████| 30/30 [00:29<00:00,  1.02it/s, Success=0/30, Page=30][A[A[A






--------------------------------------------------
Parallel OCR extraction completed for trendwatching-snapshot-march-2025.pdf!
Successfully processed: 0/30 pages


Processing tendencias-globales-de-capital-humano-2024.pdf:  22%|██▏       | 27/122 [00:29<01:07,  1.40it/s, Success=0/122, Page=26][A[A[A[A
[A


[A[A[A
[A





Starting parallel OCR extraction for OFICIAL Licitación Mallplaza - Moov.pdf...
Total pages in PDF: 122
Extracting pages: 1 to 122
Using 4 parallel workers
--------------------------------------------------


Processing tendencias-globales-de-capital-humano-2024.pdf:  23%|██▎       | 28/122 [00:29<01:07,  1.39it/s, Success=0/122, Page=27][A[A[A[A
[A

[A[A


[A[A[A
Processing IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf:   3%|▎         | 1/32 [00:06<01:24,  2.74s/it]


[A[A[A

[A[A
Processing IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf:   3%|▎         | 1/32 [00:17<01:24,  2.74s/it, Success=0/32, Page=2]


[A[A[A

[A[A
Processing IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf:   6%|▋         | 2/32 [00:23<04:56,  9.88s/it, Success=0/32, Page=2]


[A[A[A

[A[A
Processing IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf:   6%|▋         | 2/32 [00:23<04:56,  9.88s/it, Success=0/32, Page=1]


[A[A[A

[A[A
Processing IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf:   9%|▉         | 3/32 [00:23<03:53,  8.04s/it, Success=0/32, Page=1]


[A[A[A

[A[A
Processing IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf:   9%|▉         | 3/32 [00:23<03:53,  8.04s/it, Success=

--------------------------------------------------
Parallel OCR extraction completed for IKEA_COCINAS_PROPUESTA MOOV_compressed (1).pdf!
Successfully processed: 0/32 pages
Starting parallel OCR extraction for Presentación Licitación.pdf...
Total pages in PDF: 82
Extracting pages: 1 to 82
Using 4 parallel workers
--------------------------------------------------


Processing Presentación Licitación.pdf:   0%|          | 0/82 [00:00<?, ?it/s]


[A[A[A

[A[A
[A


Processing Presentación Licitación.pdf:   1%|          | 1/82 [00:03<03:11,  2.36s/it]

[A[A
[A


Processing Presentación Licitación.pdf:   1%|          | 1/82 [00:03<03:11,  2.36s/it, Success=0/82, Page=3]

[A[A
[A


Processing Presentación Licitación.pdf:   2%|▏         | 2/82 [00:03<02:15,  1.69s/it, Success=0/82, Page=3]

[A[A
[A


Processing Presentación Licitación.pdf:   2%|▏         | 2/82 [00:03<02:15,  1.69s/it, Success=0/82, Page=1]

[A[A
[A


Processing Presentación Licitación.pdf:   4%|▎         | 3/82 [00:03<02:13,  1.69s/it, Success=0/82, Page=2]

[A[A
[A


Processing Presentación Licitación.pdf:   5%|▍         | 4/82 [00:03<02:12,  1.69s/it, Success=0/82, Page=4]

[A[A

[A[A


[A[A[A
[A

Processing Presentación Licitación.pdf:   6%|▌         | 5/82 [00:05<01:06,  1.16it/s, Success=0/82, Page=4]


[A[A[A
[A

Processing Presentación Licitación

--------------------------------------------------
Parallel OCR extraction completed for Brief_cliente.pdf!
Successfully processed: 0/67 pages


Processing OFICIAL Licitación Mallplaza - Moov.pdf:  46%|████▌     | 56/122 [00:49<01:12,  1.09s/it, Success=0/122, Page=56][A[A[A


Processing Presentación Licitación.pdf:  32%|███▏      | 26/82 [00:19<00:31,  1.81it/s, Success=0/82, Page=25]


Processing Presentación Licitación.pdf:  32%|███▏      | 26/82 [00:19<00:31,  1.81it/s, Success=0/82, Page=26]


Processing Presentación Licitación.pdf:  33%|███▎      | 27/82 [00:19<00:28,  1.92it/s, Success=0/82, Page=27]

Processing Presentación Licitación.pdf:  34%|███▍      | 28/82 [00:20<00:27,  1.95it/s, Success=0/82, Page=27]

Processing Presentación Licitación.pdf:  34%|███▍      | 28/82 [00:20<00:27,  1.95it/s, Success=0/82, Page=28]


[A[A[A

Processing Presentación Licitación.pdf:  35%|███▌      | 29/82 [00:22<00:30,  1.74it/s, Success=0/82, Page=28]


[A[A[A

Processing Presentación Licitación.pdf:  35%|███▌      | 29/82 [00:22<00:30,  1.74it/s, Success=0/82, Page=29]


[A[A[A

Processing Presentación Licitación.pdf:  

--------------------------------------------------
Parallel OCR extraction completed for tendencias-globales-de-capital-humano-2024.pdf!
Successfully processed: 0/122 pages


Processing OFICIAL Licitación Mallplaza - Moov.pdf:  79%|███████▊  | 96/122 [01:20<00:17,  1.50it/s, Success=0/122, Page=95][A[A[A

[A[A

[A[A

Processing Presentación Licitación.pdf:  85%|████████▌ | 70/82 [00:51<00:07,  1.60it/s, Success=0/82, Page=67]

Processing Presentación Licitación.pdf:  85%|████████▌ | 70/82 [00:51<00:07,  1.60it/s, Success=0/82, Page=72]

Processing Presentación Licitación.pdf:  87%|████████▋ | 71/82 [00:51<00:06,  1.63it/s, Success=0/82, Page=72]

Processing Presentación Licitación.pdf:  89%|████████▉ | 73/82 [00:51<00:05,  1.63it/s, Success=0/82, Page=73]

[A[A

[A[A

[A[A

[A[A

Processing Presentación Licitación.pdf:  90%|█████████ | 74/82 [00:56<00:05,  1.35it/s, Success=0/82, Page=73]

Processing Presentación Licitación.pdf:  90%|█████████ | 74/82 [00:56<00:05,  1.35it/s, Success=0/82, Page=76]

Processing Presentación Licitación.pdf:  91%|█████████▏| 75/82 [00:56<00:05,  1.19it/s, Success=0/82, Page=76]

Processing Presentación Licitaci

--------------------------------------------------
Parallel OCR extraction completed for Presentación Licitación.pdf!
Successfully processed: 0/82 pages




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

Processing OFICIAL Licitación Mallplaza - Moov.pdf: 100%|██████████| 122/122 [01:38<00:00,  1.23it/s, Success=0/122, Page=122]
Processing PDFs: 100%|██████████| 8/8 [02:09<00:00, 16.20s/it, PDF=OFICIAL Licitación Mallplaza - Moov.pdf, Success=0/122]

--------------------------------------------------
Parallel OCR extraction completed for OFICIAL Licitación Mallplaza - Moov.pdf!
Successfully processed: 0/122 pages
✓ Results saved to output\trendwatching-fastforward-theagenteconomy_extracted_text.txt
✓ Results saved to output\Estudio_Amor-en-tiempos-de-comunicacion-liquida__extracted_text.txt
✓ Results saved to output\trendwatching-snapshot-march-2025_extracted_text.txt
✓ Results saved to output\IKEA_COCINAS_PROPUESTA MOOV_compressed (1)_extracted_text.txt
✓ Results saved to output\Brief_cliente_extracted_text.txt
✓ Results saved to output\tendencias-globales-de-capital-humano-2024_extracted_text.txt
✓ Results saved to output\Presentación Licitación_extracted_text.txt
✓ Results saved to output\OFICIAL Licitación Mallplaza - Moov_extracted_text.txt

Processing completed!
Successfully processed: 8/8 PDFs
Successfully saved: 8/8 files

All PDF files have been processed with parallel processing!



