In [1]:
import os  # Used for handling file paths
import pytesseract  # OCR library for text extraction from images
from pdf2image import convert_from_path  # Converts PDF pages to images

def extract_text(pdf_path):
    """
    Extracts text from a PDF file using OCR and saves it as 'extract.txt'
    in the same directory as the PDF file.

    Steps:
    1. Convert PDF pages into images.
    2. Perform OCR on each image to extract text.
    3. Save the extracted text to 'extract.txt' in the same folder as the PDF.

    Args:
        pdf_path (str): The file path of the input PDF.

    Returns:
        str: The file path of the saved extracted text file.
    """

    # Get the directory of the input PDF and define the output text file path
    pdf_dir = os.path.dirname(pdf_path)
    text_file_path = os.path.join(pdf_dir, "extract.txt")

    print("🔄 Converting PDF to images...")
    images = convert_from_path(pdf_path)  # Convert PDF pages into images

    print("🔍 Running OCR on extracted images...")
    # Perform OCR on each image and join results into a single string
    ocr_text = "\n".join(pytesseract.image_to_string(img) for img in images)

    # Save the extracted text into a file
    with open(text_file_path, "w", encoding="utf-8") as text_file:
        text_file.write(ocr_text)

    print(f"✅ Extracted text saved to: {text_file_path}")
    return text_file_path  # Return the path of the extracted text file

# Example usage:
# extract_text("path/to/your/contacts.pdf")


In [None]:
import os
from datetime import datetime

def generate_icloud_vcf(txt_path):
    """
    Converts a manually filtered text file into an iCloud-compatible .vcf file.

    Args:
        txt_path (str): Path to the filtered extract.txt file.

    Returns:
        str: Path of the generated .vcf file.
    """

    # Define the output VCF file in the same directory as the input text file
    vcf_path = os.path.join(os.path.dirname(txt_path), "contacts.vcf")

    print("📂 Reading extracted contacts...")
    with open(txt_path, "r", encoding="utf-8") as file:
        lines = [line.strip() for line in file if line.strip()]  # Remove empty lines

    contacts = []
    current_contact = {"first_name": "X", "last_name": "", "phones": []}

    for line in lines:
        if line[0].isalpha():  # Name line (assumption: names start with letters)
            if current_contact["last_name"] and current_contact["phones"]:
                contacts.append(current_contact)  # Save the previous contact

            current_contact = {"first_name": "X", "last_name": line, "phones": []}

        else:  # Phone number line
            current_contact["phones"].append(line)

    # Save the last contact if valid
    if current_contact["last_name"] and current_contact["phones"]:
        contacts.append(current_contact)

    print(f"📄 {len(contacts)} contacts extracted. Generating iCloud-compatible VCF...")

    # Writing to the VCF file
    with open(vcf_path, "w", encoding="utf-8") as vcf_file:
        for idx, contact in enumerate(contacts, start=1):
            # Extract last name correctly
            last_name = contact["last_name"]

            # Generate timestamp for REV field
            rev_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

            vcf_file.write("BEGIN:VCARD\n")
            vcf_file.write("VERSION:3.0\n")
            vcf_file.write("PRODID:-//Apple Inc.//iOS 18.3.1//EN\n")  # Placeholder iOS version
            vcf_file.write(f"N:;{last_name};;;\n")  # No first name, iCloud prefers this format
            vcf_file.write(f"FN:{contact['first_name']} {contact['last_name']}\n")

            # Assign the first number as the main mobile number
            vcf_file.write(f"TEL;TYPE=CELL;TYPE=VOICE;TYPE=pref:{contact['phones'][0]}\n")

            # Additional numbers as itemX.TEL (iCloud-compatible)
            for i, phone in enumerate(contact["phones"][1:], start=1):
                vcf_file.write(f"item{i}.TEL;type=pref:{phone}\n")

            # REV timestamp (iCloud stores last modified time)
            vcf_file.write(f"REV:{rev_time}\n")

            vcf_file.write("END:VCARD\n\n")

    print(f"✅ iCloud-compatible VCF file created: {vcf_path}")
    
# Example usage:
# generate_icloud_vcf("path/to/extract.txt")