In [1]:
from pptx import Presentation
from docx import Document
import os
import shutil

# Directories
input_dir = 'pptx'
output_dir = 'qmd'
images_dir = os.path.join(output_dir, 'images')
os.makedirs(images_dir, exist_ok=True)

# Function to convert PPTX to QMD
def convert_pptx_to_qmd(pptx_file, qmd_file):
    prs = Presentation(pptx_file)
    with open(qmd_file, 'w') as f:
        for i, slide in enumerate(prs.slides):
            # Get slide title or use "Untitled Slide"
            title = "Untitled Slide"
            slide_texts = []
            for shape in slide.shapes:
                if hasattr(shape, "text") and shape.has_text_frame and shape.text.strip():
                    text = shape.text.strip()
                    if title == "Untitled Slide":
                        title = text.split('\n')[0]
                    else:
                        slide_texts.append(text)
            
            f.write(f"## {title}\n\n")
            for text in slide_texts:
                f.write(text + "\n\n")
            
            # Save images
            for shape in slide.shapes:
                if shape.shape_type == 13:
                    image = shape.image
                    image_bytes = image.blob
                    image_filename = f"slide_{i + 1}_img.png"
                    image_path = os.path.join(images_dir, image_filename)
                    with open(image_path, 'wb') as img_file:
                        img_file.write(image_bytes)
                    f.write(f"![Slide Image](images/{image_filename})\n\n")
            
            # Add slide notes
            if slide.notes_slide and slide.notes_slide.notes_text_frame:
                notes = slide.notes_slide.notes_text_frame.text.strip()
                f.write(f"::: {{.notes}}\n{notes}\n:::\n\n")

    print(f"PPTX converted: {qmd_file}")

# Function to convert DOCX to QMD
def convert_docx_to_qmd(docx_file, qmd_file):
    doc = Document(docx_file)
    with open(qmd_file, 'w') as f:
        f.write(f"# {os.path.basename(docx_file).replace('.docx', '')}\n\n")
        
        for para in doc.paragraphs:
            if para.style.name.startswith('Heading'):
                level = int(para.style.name[-1])
                f.write(f"{'#' * level} {para.text.strip()}\n\n")
            else:
                f.write(para.text.strip() + "\n\n")

        # Save images
        for rel in doc.part.rels:
            if "image" in doc.part.rels[rel].target_ref:
                image = doc.part.rels[rel].target_part.blob
                image_filename = f"{os.path.basename(docx_file).replace('.docx', '')}_img_{rel}.png"
                image_path = os.path.join(images_dir, image_filename)
                with open(image_path, 'wb') as img_file:
                    img_file.write(image)
                f.write(f"![Document Image](images/{image_filename})\n\n")

    print(f"DOCX converted: {qmd_file}")

# Process all PPTX and DOCX files
for filename in os.listdir(input_dir):
    file_path = os.path.join(input_dir, filename)
    if filename.endswith('.pptx'):
        qmd_file = os.path.join(output_dir, f"{filename.replace('.pptx', '')}.qmd")
        convert_pptx_to_qmd(file_path, qmd_file)
    elif filename.endswith('.docx'):
        qmd_file = os.path.join(output_dir, f"{filename.replace('.docx', '')}.qmd")
        convert_docx_to_qmd(file_path, qmd_file)

print(f"Conversion complete! QMDs and images are in '{output_dir}'.")


FileNotFoundError: [Errno 2] No such file or directory: ''