In [1]:
import os
import re
from pathlib import Path


In [2]:
# --- IMPORTANT ---
# Change this path to the root directory you want to process.
# Example for Windows: "C:/Users/YourUser/Documents/Reports"
# Example for macOS/Linux: "/home/youruser/documents/reports"

root_directory_path = r"pdf_data"


In [3]:
def rename_pdfs_in_structured_dirs(root_directory: str):
    """
    Scans a directory for subdirectories matching the pattern 'YEAR/YEAR QX',
    and renames PDF files within them by appending the quarter's name.
    """
    root_path = Path(root_directory)
    if not root_path.is_dir():
        print(f"Error: Directory not found at '{root_directory}'")
        return

    print(f"Starting scan in: {root_path}\n")

    # Regex to precisely match '202[1-5]' and '202[1-5] Q[1-4]'
    year_pattern = re.compile(r"^202[1-5]$")
    quarter_dir_pattern = re.compile(r"^202[1-5] Q[1-4]$")

    # Step 1: Find all potential quarterly directories using a broad glob
    for quarter_dir in root_path.glob('202*/* Q*'):
        if not quarter_dir.is_dir():
            continue

        year_dir = quarter_dir.parent

        # Step 2: Validate the folder structure with precise regex
        if year_pattern.match(year_dir.name) and quarter_dir_pattern.match(quarter_dir.name):
            print(f"--- Processing valid folder: {quarter_dir} ---")
            suffix_to_add = f" {quarter_dir.name}"

            # Step 3: Process all PDF files within the valid folder
            for pdf_path in quarter_dir.glob('*.pdf'):
                try:
                    # Safety Check: Skip already renamed files
                    if suffix_to_add in pdf_path.stem:
                        print(f"  Skipping (already renamed): {pdf_path.name}")
                        continue

                    new_path = pdf_path.with_stem(f"{pdf_path.stem}{suffix_to_add}")
                    print(f"  Renaming: '{pdf_path.name}' -> '{new_path.name}'")
                    pdf_path.rename(new_path)

                except Exception as e:
                    print(f"  An unexpected error occurred with file {pdf_path.name}: {e}")
            print("-" * (len(str(quarter_dir)) + 24))

    print("\nScan complete.")



In [4]:
# Call the function to start the process
rename_pdfs_in_structured_dirs(root_directory_path)


Starting scan in: pdf_data

--- Processing valid folder: pdf_data\2021\2021 Q1 ---
---------------------------------------------
--- Processing valid folder: pdf_data\2021\2021 Q2 ---
  Renaming: 'Tri Valley.pdf' -> 'Tri Valley 2021 Q2.pdf'
---------------------------------------------
--- Processing valid folder: pdf_data\2021\2021 Q3 ---
  Renaming: 'San Francisco Peninsula Snapshot.pdf' -> 'San Francisco Peninsula Snapshot 2021 Q3.pdf'
  Renaming: 'Tampa.pdf' -> 'Tampa 2021 Q3.pdf'
---------------------------------------------
--- Processing valid folder: pdf_data\2021\2021 Q4 ---
---------------------------------------------
--- Processing valid folder: pdf_data\2022\2022 Q1 ---
  Renaming: 'Boston.pdf' -> 'Boston 2022 Q1.pdf'
  Renaming: 'Raleigh Durham.pdf' -> 'Raleigh Durham 2022 Q1.pdf'
---------------------------------------------
--- Processing valid folder: pdf_data\2022\2022 Q2 ---
  Renaming: 'Tulsa.pdf' -> 'Tulsa 2022 Q2.pdf'
---------------------------------------------
