In [None]:
#!pip install PyMuPDF fpdf tqdm -q

# Install required packages
!pip install PyMuPDF fpdf Pillow tqdm -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fpdf (setup.py) ... [?25l[?25hdone


In [None]:
#!/usr/bin/env python3
"""
PDF Text Extractor for Academic Papers - Unicode Compatible
Extracts text from PDFs with proper Unicode support for academic symbols
"""


import os
import tempfile
import fitz  # PyMuPDF
from tqdm import tqdm
from google.colab import files

def extract_text_to_txt(input_pdf, output_txt):
    """
    Extract text from a PDF and save it as a plain text file.
    This avoids font encoding issues entirely.
    """
    print(f"Processing {input_pdf}...")

    try:
        # Open the PDF with PyMuPDF
        pdf_document = fitz.open(input_pdf)
        total_pages = len(pdf_document)

        # Extract text from each page
        with open(output_txt, 'w', encoding='utf-8') as txt_file:
            # Write a header
            txt_file.write(f"Text extracted from: {input_pdf}\n")
            txt_file.write("=" * 50 + "\n\n")

            # Process each page
            for page_num in tqdm(range(total_pages), desc="Extracting text"):
                page = pdf_document[page_num]

                # Extract text while preserving structure
                text = page.get_text("text")

                if text.strip():
                    # Add page number for reference
                    txt_file.write(f"\n--- Page {page_num + 1} ---\n\n")
                    txt_file.write(text)
                    txt_file.write("\n")

        print(f"Successfully extracted text from {total_pages} pages")
        print(f"Text saved to {output_txt}")

    except Exception as e:
        print(f"Error processing PDF: {e}")
        return False

    return True

# Colab-friendly function to download the output text file
def download_output(output_file):
    try:
        files.download(output_file)
        print(f"Downloading {output_file} to your computer...")
    except Exception as e:
        print(f"Error downloading file: {e}")
        print("If you're using Google Drive, the file has been saved there.")

# Run the extractor
def run_extractor():
    from google.colab import files

    print("Please upload a PDF file:")
    uploaded = files.upload()

    if not uploaded:
        print("No file uploaded. Exiting.")
        return

    # Get the filename of the uploaded PDF
    input_pdf = list(uploaded.keys())[0]

    # Set default output filename
    base_name = os.path.splitext(input_pdf)[0]
    output_txt = f"{base_name}_text.txt"

    # Extract the text
    success = extract_text_to_txt(input_pdf, output_txt)

    if success:
        # Download the output
        download_output(output_txt)

        # Display the first 500 characters as a preview
        with open(output_txt, 'r', encoding='utf-8') as f:
            preview = f.read(1000)
            print("\nPreview of extracted text:")
            print("-" * 50)
            print(preview)
            print("-" * 50)
            print("... (text continues)")

# Run the extractor
if __name__ == "__main__":
    run_extractor()

Please upload a PDF file:


Saving codeact.pdf to codeact.pdf
Processing codeact.pdf...


Extracting text: 100%|██████████| 25/25 [00:00<00:00, 34.34it/s]

Successfully extracted text from 25 pages
Text saved to codeact_text.txt





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading codeact_text.txt to your computer...

Preview of extracted text:
--------------------------------------------------
Text extracted from: codeact.pdf


--- Page 1 ---

Executable Code Actions Elicit Better LLM Agents
Xingyao Wang 1 Yangyi Chen 1 Lifan Yuan 1 Yizhe Zhang 2 Yunzhu Li 1 Hao Peng 1 Heng Ji 1
Abstract
Large Language Model (LLM) agents, capable
of performing a broad range of actions, such
as invoking tools and controlling robots, show
great potential in tackling real-world challenges.
LLM agents are typically prompted to produce ac-
tions by generating JSON or text in a pre-defined
format, which is usually limited by constrained
action space (e.g., the scope of pre-defined
tools) and restricted flexibility (e.g., inability to
compose multiple tools). This work proposes
to use executable Python code to consolidate
LLM agents’ actions into a unified action space
(CodeAct). Integrated with a Python interpreter,
CodeAct can execute code actions and dynam-
ically revis