In [None]:
import fitz
import os

In [None]:
import string
import re

def is_valid_filename(filename):
    """Check if filename contains only valid characters."""
    # Remove invalid characters for filenames
    invalid_chars = '<>:"/\\|?*'
    return not any(char in filename for char in invalid_chars)

def sanitize_filename(filename):
    """Remove or replace invalid characters from filename."""
    invalid_chars = '<>:"/\\|?*'
    for char in invalid_chars:
        filename = filename.replace(char, '')
    # Remove leading/trailing whitespace and dots
    filename = filename.strip('. ')
    return filename

# Configuration
directory_path = input("Enter the directory path containing PDF files (or press Enter for current directory): ").strip()
if not directory_path:
    directory_path = "."

print(f"\nScanning directory: {directory_path}\n")
print("-" * 80)

# Statistics
total_files = 0
renamed_count = 0
skipped_count = 0
error_count = 0

# Process files
for root_dir, cur_dir, files in os.walk(directory_path):
    for name in files:
        # Only process PDF files
        if not name.lower().endswith('.pdf'):
            continue
        
        total_files += 1
        file_path = os.path.join(root_dir, name)
        print(f"\nProcessing: {file_path}")
        
        pdf = None
        try:
            # Open PDF and read metadata
            pdf = fitz.open(file_path)
            metadata = pdf.metadata
            title = metadata.get("title", "").strip()
            
            # Check if title exists
            if not title:
                print(f"  ⚠ Skipped: No title in metadata")
                skipped_count += 1
                continue
            
            # Sanitize the title
            sanitized_title = sanitize_filename(title)
            
            if not sanitized_title:
                print(f"  ⚠ Skipped: Title contains only invalid characters")
                skipped_count += 1
                continue
            
            # Create new file path
            new_file_path = os.path.join(root_dir, sanitized_title + ".pdf")
            
            # Check if file already has the correct name
            if file_path == new_file_path:
                print(f"  ✓ Already named correctly")
                skipped_count += 1
                continue
            
            # Check if target file already exists
            if os.path.exists(new_file_path):
                print(f"  ⚠ Skipped: File already exists with name '{sanitized_title}.pdf'")
                skipped_count += 1
                continue
            
            # Close PDF before renaming
            pdf.close()
            pdf = None
            
            # Rename the file
            os.rename(file_path, new_file_path)
            print(f"  ✓ Renamed to: {sanitized_title}.pdf")
            renamed_count += 1
            
        except Exception as e:
            print(f"  ✗ Error: {str(e)}")
            error_count += 1
        
        finally:
            # Ensure PDF is closed
            if pdf is not None:
                try:
                    pdf.close()
                except:
                    pass

# Print summary
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"Total PDF files found: {total_files}")
print(f"Successfully renamed: {renamed_count}")
print(f"Skipped: {skipped_count}")
print(f"Errors: {error_count}")
print("=" * 80)