In [None]:
pip install pptx BeautifulSoup

In [1]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
import re
import html

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML slides to PowerPoint presentation
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    for slide in slides:
        # Add a new slide
        slide_layout = prs.slide_layouts[1]  # Using layout with title and content
        current_slide = prs.slides.add_slide(slide_layout)
        
        # Get slide title if available
        title_element = slide.find('h1') or slide.find('h2')
        if title_element:
            current_slide.shapes.title.text = title_element.text.strip()
        
        # Find all rows in the slide
        rows = slide.find_all('div', class_='row')
        
        # Track position for adding content
        top = Inches(2)  # Start below title
        
        for row in rows:
            # Create a text box for this row
            left = Inches(0.5)
            width = Inches(9)
            height = Inches(1)
            
            txBox = current_slide.shapes.add_textbox(left, top, width, height)
            tf = txBox.text_frame
            
            # Process the HTML content in this row
            process_html_content(row, tf)
            
            # Move down for next row
            top += Inches(1)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_html_content(element, text_frame):
    """Process HTML content and add to text frame with formatting"""
    # Get all text and elements within this element
    for child in element.children:
        # If it's just a string, add it directly
        if isinstance(child, str):
            if child.strip():
                p = text_frame.add_paragraph()
                p.text = child.strip()
        else:
            # Handle different HTML tags
            tag_name = child.name
            if tag_name in ['p', 'div']:
                p = text_frame.add_paragraph()
                p.text = child.get_text().strip()
            elif tag_name in ['h1', 'h2', 'h3', 'h4']:
                p = text_frame.add_paragraph()
                p.text = child.get_text().strip()
                # Make heading text larger
                size_map = {'h1': 36, 'h2': 28, 'h3': 24, 'h4': 20}
                p.font.size = Pt(size_map.get(tag_name, 18))
                p.font.bold = True
            elif tag_name == 'ul':
                for li in child.find_all('li'):
                    p = text_frame.add_paragraph()
                    p.text = "• " + li.get_text().strip()
                    p.level = 1  # Indentation level
            elif tag_name == 'ol':
                for i, li in enumerate(child.find_all('li'), 1):
                    p = text_frame.add_paragraph()
                    p.text = f"{i}. " + li.get_text().strip()
                    p.level = 1  # Indentation level
            elif tag_name == 'img':
                # Note: Adding images requires more complex handling,
                # would need to download the image or have a local path
                p = text_frame.add_paragraph()
                p.text = "[Image: " + child.get('alt', 'image') + "]"
            elif tag_name in ['b', 'strong']:
                p = text_frame.add_paragraph()
                p.text = child.get_text().strip()
                p.font.bold = True
            elif tag_name in ['i', 'em']:
                p = text_frame.add_paragraph()
                p.text = child.get_text().strip()
                p.font.italic = True
            else:
                # For other tags, just extract the text
                if child.get_text().strip():
                    p = text_frame.add_paragraph()
                    p.text = child.get_text().strip()

# Example usage
if __name__ == "__main__":
    # Sample HTML content
    sample_html = """
    <!DOCTYPE html>
    <html>
    <body>
        <div class="slide">
            <h1>Introduction to Python</h1>
            <div class="row">
                <p>Python is a high-level, interpreted programming language.</p>
            </div>
            <div class="row">
                <ul>
                    <li>Easy to learn</li>
                    <li>Versatile applications</li>
                    <li>Large community</li>
                </ul>
            </div>
        </div>
        
        <div class="slide">
            <h1>Python Features</h1>
            <div class="row">
                <h3>Key Strengths</h3>
            </div>
            <div class="row">
                <ol>
                    <li>Simple and readable syntax</li>
                    <li>Extensive standard library</li>
                    <li>Dynamic typing</li>
                </ol>
            </div>
            <div class="row">
                <p><b>Note:</b> Python is used in <i>web development</i>, data science, and more!</p>
            </div>
        </div>
    </body>
    </html>
    """
    
    # Convert HTML to PowerPoint
    html_to_pptx(sample_html, "python_presentation.pptx")

Presentation saved as python_presentation.pptx


In [2]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
import re
import html
import sys
import os

In [5]:
def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML slides to PowerPoint presentation using blank slides without placeholders
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    for slide_index, slide in enumerate(slides):
        # Use a blank slide to avoid placeholders
        slide_layout = prs.slide_layouts[6]  # Blank slide
        current_slide = prs.slides.add_slide(slide_layout)
        
        # Add title manually instead of using placeholder
        title_element = slide.find('h1') or slide.find('h2')
        if title_element:
            title_shape = current_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(1)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = title_element.text.strip()
            p.font.size = Pt(32)
            p.font.bold = True
            p.alignment = PP_ALIGN.CENTER
        
        # Add a text box for the main content
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        
        # Process the slide content
        process_slide_content_without_placeholders(slide, content_frame, css_rules)
        
        # Clean up any lingering placeholders
        clean_slide_placeholders(current_slide)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_slide_content_without_placeholders(slide, text_frame, css_rules):
    """Process slide content without using PowerPoint placeholders"""
    # Find and process all row divs
    rows = slide.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        process_content(slide, text_frame, css_rules)
    else:
        # Process each row
        for row in rows:
            # Create a paragraph separator
            p = text_frame.add_paragraph()
            
            # Apply any CSS styling from the row's class
            apply_css_to_paragraph(p, row, css_rules)
            
            # Process the content of the row
            process_content(row, text_frame, css_rules)
            
            # Add some spacing between rows
            if len(text_frame.paragraphs) > 0:
                last_p = text_frame.paragraphs[-1]
                if hasattr(last_p, 'space_after'):
                    last_p.space_after = Pt(12)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def get_slide_placeholders(slide):
    """Get a mapping of placeholder names to placeholder objects"""
    placeholders = {}
    for placeholder in slide.placeholders:
        if hasattr(placeholder, 'name'):
            placeholders[placeholder.name.lower()] = placeholder
        elif hasattr(placeholder, 'placeholder_format') and hasattr(placeholder.placeholder_format, 'type'):
            # Use type as fallback
            placeholder_type = str(placeholder.placeholder_format.type)
            placeholders[placeholder_type] = placeholder
    
    return placeholders

def process_content(element, text_frame, css_rules):
    """Process HTML content and add it to a PowerPoint text frame"""
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        process_image(element, text_frame, css_rules)
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules)

def process_text_content(element, text_frame, css_rules):
    """Process text content and add it to the text frame"""
    # Extract direct text content from the element (exclude nested elements)
    direct_text = ''
    for child in element.children:
        if isinstance(child, str):
            direct_text += child
            
    if direct_text.strip():
        p = text_frame.add_paragraph()
        p.text = direct_text.strip()
        apply_css_to_paragraph(p, element, css_rules)
    
    # Process paragraph elements
    paragraphs = element.find_all(['p', 'div', 'h3', 'h4'], recursive=False)
    for para in paragraphs:
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        apply_css_to_paragraph(p, para, css_rules)
        
        # Apply special formatting
        if para.name in ['h3', 'h4']:
            p.font.bold = True
            size_map = {'h3': 20, 'h4': 18}
            p.font.size = Pt(size_map.get(para.name, 16))
            
        # Handle text highlighting
        if para.find('span', class_='highlight'):
            # In a real implementation, you would extract the exact highlighted text
            # This is a simplification
            p.font.highlight_color = 3  # Yellow
            
        # Handle bold and italic
        if para.find(['b', 'strong']):
            p.font.bold = True
        if para.find(['i', 'em']):
            p.font.italic = True

def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)

def process_image(element, text_frame, css_rules):
    """Process images and add placeholders to the text frame"""
    img = element.find('img')
    if not img:
        return
        
    # Add an image placeholder
    p = text_frame.add_paragraph()
    img_alt = img.get('alt', 'Image')
    p.text = f"[Image: {img_alt}]"
    p.alignment = PP_ALIGN.CENTER
    
    # Add image caption if available
    caption = element.find('p', class_='caption')
    if caption:
        p = text_frame.add_paragraph()
        p.text = caption.get_text().strip()
        p.font.italic = True
        p.alignment = PP_ALIGN.CENTER

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None

def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "basic.html"          # Default input HTML file
    output_file = "presentation.ppt"   # Default output PowerPoint file
    
    # Check for command line arguments
    
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")

Presentation saved as presentation.ppt
Successfully converted basic.html to presentation.ppt


In [7]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
import re
import html
import sys
import os

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Add a text box for the main content
    content_shape = current_slide.shapes.add_textbox(
        Inches(0.5), Inches(1.5), Inches(9), Inches(5)
    )
    content_frame = content_shape.text_frame
    
    # Process the slide content
    process_slide_content_without_placeholders(slide, content_frame, css_rules)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_column_slide(slide_html, prs, slide_idx):
    """Process a slide with column-based layout"""
    # Add a new slide
    slide_layout = prs.slide_layouts[6]  # Blank layout
    slide = prs.slides.add_slide(slide_layout)
    
    # Find the left and right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')
    
    # Get column content
    left_rows = left_column.find_all('div', class_='row') if left_column else []
    right_rows = right_column.find_all('div', class_='row') if right_column else []
    
    # Determine slide dimensions and layout ratios dynamically
    slide_width = prs.slide_width
    slide_height = prs.slide_height
    
    # Calculate dynamic margins and spacing based on slide size
    margin_ratio = 0.05  # 5% of slide dimensions for margins
    h_margin = slide_width * margin_ratio
    v_margin = slide_height * margin_ratio
    spacing = min(h_margin, v_margin) * 0.5  # Spacing between elements
    
    # Determine column widths dynamically
    column_ratio = 0.5  # Default to 50% each column
    left_column_width = (slide_width - (3 * h_margin)) * column_ratio
    right_column_width = (slide_width - (3 * h_margin)) * column_ratio
    
    # Create column containers dynamically
    # Left column container
    left_container = {
        'x': h_margin,
        'y': v_margin * 2,  # Space for title
        'width': left_column_width,
        'height': slide_height - (v_margin * 3)
    }
    
    # Right column container
    right_container = {
        'x': h_margin * 2 + left_column_width,
        'y': v_margin * 2,  # Space for title
        'width': right_column_width,
        'height': slide_height - (v_margin * 3)
    }
    
    # Add containers to slide
    left_shape = slide.shapes.add_shape(
        MSO_SHAPE.RECTANGLE,
        left_container['x'],
        left_container['y'],
        left_container['width'],
        left_container['height']
    )
    left_shape.fill.solid()
    left_shape.fill.fore_color.rgb = RGBColor(245, 245, 245)  # Light gray
    left_shape.line.color.rgb = RGBColor(221, 221, 221)  # Light border
    
    right_shape = slide.shapes.add_shape(
        MSO_SHAPE.RECTANGLE,
        right_container['x'],
        right_container['y'],
        right_container['width'],
        right_container['height']
    )
    right_shape.fill.solid()
    right_shape.fill.fore_color.rgb = RGBColor(255, 255, 255)  # White
    right_shape.line.color.rgb = RGBColor(221, 221, 221)  # Light border
    
    # Add title dynamically
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"
    
    title_height = v_margin * 1.5
    title = slide.shapes.add_textbox(
        h_margin,
        v_margin / 2,
        slide_width - (h_margin * 2),
        title_height
    )
    title_text_frame = title.text_frame
    title_text_frame.text = title_text
    
    # Set title font size dynamically based on slide dimensions
    # Slide dimensions are in EMU (English Metric Units)
    # Convert to points for better calculation (1 inch = 72 points)
    slide_width_pt = slide_width / 914400 * 72  # Convert EMU to points
    slide_height_pt = slide_height / 914400 * 72  # Convert EMU to points
    
    title_font_size = int(min(slide_width_pt, slide_height_pt) * 0.06)  # 6% of min dimension
    title_font_size = max(16, min(title_font_size, 36))  # Keep between 16-36pt
    title_text_frame.paragraphs[0].font.size = Pt(title_font_size)
    title_text_frame.paragraphs[0].font.bold = True
    
    # Calculate row heights for left column dynamically
    if left_rows:
        available_height = left_container['height'] - (spacing * (len(left_rows) + 1))
        left_row_height = available_height / len(left_rows)
    else:
        left_row_height = 0
        
    # Calculate row heights for right column dynamically
    if right_rows:
        available_height = right_container['height'] - (spacing * (len(right_rows) + 1))
        right_row_height = available_height / len(right_rows)
    else:
        right_row_height = 0
    
    # Process left column rows
    for i, row in enumerate(left_rows):
        # Calculate position dynamically
        y_position = left_container['y'] + spacing + (i * (left_row_height + spacing))
        
        # Calculate inner margins for row
        row_margin = spacing / 2
        
        # Create a row shape with background color
        row_shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE,
            left_container['x'] + row_margin,
            y_position,
            left_container['width'] - (row_margin * 2),
            left_row_height
        )
        row_shape.fill.solid()
        row_shape.fill.fore_color.rgb = RGBColor(224, 224, 224)  # Light gray for rows
        row_shape.line.color.rgb = RGBColor(200, 200, 200)
        
        # Add text to the row
        left_text_box = slide.shapes.add_textbox(
            left_container['x'] + (row_margin * 2),
            y_position + row_margin,
            left_container['width'] - (row_margin * 4),
            left_row_height - (row_margin * 2)
        )
        left_text_frame = left_text_box.text_frame
        left_text_frame.word_wrap = True
        left_text_frame.text = row.get_text().strip()
        
        # Set font size dynamically based on row height
        # Convert row height from EMU to points
        row_height_pt = left_row_height / 914400 * 72  # Convert EMU to points
        font_size = int(row_height_pt * 0.4)  # 40% of row height in points
        font_size = max(8, min(font_size, 18))  # Keep between 8-18pt
        left_text_frame.paragraphs[0].font.size = Pt(font_size)
        
    # Process right column rows
    for i, row in enumerate(right_rows):
        # Calculate position dynamically
        y_position = right_container['y'] + spacing + (i * (right_row_height + spacing))
        
        # Calculate inner margins for row
        row_margin = spacing / 2
        
        # Create a row shape with background color
        row_shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE,
            right_container['x'] + row_margin,
            y_position,
            right_container['width'] - (row_margin * 2),
            right_row_height
        )
        row_shape.fill.solid()
        row_shape.fill.fore_color.rgb = RGBColor(240, 240, 240)  # Lighter gray for rows
        row_shape.line.color.rgb = RGBColor(221, 221, 221)
        
        # Add text to the row
        right_text_box = slide.shapes.add_textbox(
            right_container['x'] + (row_margin * 2),
            y_position + row_margin,
            right_container['width'] - (row_margin * 4),
            right_row_height - (row_margin * 2)
        )
        right_text_frame = right_text_box.text_frame
        right_text_frame.word_wrap = True
        right_text_frame.text = row.get_text().strip()
        
        # Set font size dynamically based on row height
        # Convert row height from EMU to points
        row_height_pt = right_row_height / 914400 * 72  # Convert EMU to points
        font_size = int(row_height_pt * 0.4)  # 40% of row height in points
        font_size = max(8, min(font_size, 18))  # Keep between 8-18pt
        right_text_frame.paragraphs[0].font.size = Pt(font_size)

def process_slide_content_without_placeholders(slide, text_frame, css_rules):
    """Process slide content without using PowerPoint placeholders"""
    # Find and process all row divs
    rows = slide.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        process_content(slide, text_frame, css_rules)
    else:
        # Process each row
        for row in rows:
            # Create a paragraph separator
            p = text_frame.add_paragraph()
            
            # Apply any CSS styling from the row's class
            apply_css_to_paragraph(p, row, css_rules)
            
            # Process the content of the row
            process_content(row, text_frame, css_rules)
            
            # Add some spacing between rows
            if len(text_frame.paragraphs) > 0:
                last_p = text_frame.paragraphs[-1]
                if hasattr(last_p, 'space_after'):
                    last_p.space_after = Pt(12)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def get_slide_placeholders(slide):
    """Get a mapping of placeholder names to placeholder objects"""
    placeholders = {}
    for placeholder in slide.placeholders:
        if hasattr(placeholder, 'name'):
            placeholders[placeholder.name.lower()] = placeholder
        elif hasattr(placeholder, 'placeholder_format') and hasattr(placeholder.placeholder_format, 'type'):
            # Use type as fallback
            placeholder_type = str(placeholder.placeholder_format.type)
            placeholders[placeholder_type] = placeholder
    
    return placeholders

def process_content(element, text_frame, css_rules):
    """Process HTML content and add it to a PowerPoint text frame"""
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        process_image(element, text_frame, css_rules)
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules)

def process_text_content(element, text_frame, css_rules):
    """Process text content and add it to the text frame"""
    # Extract direct text content from the element (exclude nested elements)
    direct_text = ''
    for child in element.children:
        if isinstance(child, str):
            direct_text += child
            
    if direct_text.strip():
        p = text_frame.add_paragraph()
        p.text = direct_text.strip()
        apply_css_to_paragraph(p, element, css_rules)
    
    # Process paragraph elements
    paragraphs = element.find_all(['p', 'div', 'h3', 'h4'], recursive=False)
    for para in paragraphs:
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        apply_css_to_paragraph(p, para, css_rules)
        
        # Apply special formatting
        if para.name in ['h3', 'h4']:
            p.font.bold = True
            size_map = {'h3': 20, 'h4': 18}
            p.font.size = Pt(size_map.get(para.name, 16))
            
        # Handle text highlighting
        if para.find('span', class_='highlight'):
            # In a real implementation, you would extract the exact highlighted text
            # This is a simplification
            p.font.highlight_color = 3  # Yellow
            
        # Handle bold and italic
        if para.find(['b', 'strong']):
            p.font.bold = True
        if para.find(['i', 'em']):
            p.font.italic = True

def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)

def process_image(element, text_frame, css_rules):
    """Process images and add placeholders to the text frame"""
    img = element.find('img')
    if not img:
        return
        
    # Add an image placeholder
    p = text_frame.add_paragraph()
    img_alt = img.get('alt', 'Image')
    p.text = f"[Image: {img_alt}]"
    p.alignment = PP_ALIGN.CENTER
    
    # Add image caption if available
    caption = element.find('p', class_='caption')
    if caption:
        p = text_frame.add_paragraph()
        p.text = caption.get_text().strip()
        p.font.italic = True
        p.alignment = PP_ALIGN.CENTER

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None

def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample.html"          # Default input HTML file
    output_file = "presentation.ppt"   # Default output PowerPoint file
    
    # Check for command line arguments
    
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")

Presentation saved as presentation.ppt
Successfully converted sample.html to presentation.ppt


In [1]:
import os
import re
import io
import requests
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
from PIL import Image

def html_to_pptx(html_content, output_filename):
    """
    Convert HTML content with slides into a PowerPoint presentation
    
    Args:
        html_content (str): HTML string with slide structure
        output_filename (str): Output PowerPoint file name
    """
    # Parse HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Create a presentation
    prs = Presentation()
    
    # Find all slides
    slides = soup.find_all('div', class_='slide')
    
    for slide_html in slides:
        # Create a new slide
        slide_layout = prs.slide_layouts[1]  # Using the Title and Content layout
        slide = prs.slides.add_slide(slide_layout)
        
        # Set slide title if exists
        title_elem = slide_html.find(['h1', 'h2'])
        if title_elem:
            slide.shapes.title.text = title_elem.get_text().strip()
        
        # Process slide content
        process_slide_content(slide, slide_html)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")


def process_slide_content(slide, slide_html):
    """Process the content of a slide"""
    
    # Get content placeholder
    try:
        content_placeholder = slide.placeholders[1]
    except:
        # If there's no content placeholder, add a text box for content
        content_placeholder = slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5)
        )
    
    # Handle columns if they exist
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')
    
    if left_column and right_column:
        # Two-column layout
        left_box = slide.shapes.add_textbox(Inches(0.5), Inches(1.5), Inches(4.5), Inches(5))
        right_box = slide.shapes.add_textbox(Inches(5.5), Inches(1.5), Inches(4.5), Inches(5))
        
        process_column_content(left_box, left_column)
        process_column_content(right_box, right_column)
    else:
        # Check for two-column divs
        two_column_div = slide_html.find('div', class_='two-column')
        if two_column_div:
            columns = two_column_div.find_all('div', class_='column')
            if len(columns) >= 2:
                left_box = slide.shapes.add_textbox(Inches(0.5), Inches(1.5), Inches(4.5), Inches(5))
                right_box = slide.shapes.add_textbox(Inches(5.5), Inches(1.5), Inches(4.5), Inches(5))
                
                process_column_content(left_box, columns[0])
                process_column_content(right_box, columns[1])
            else:
                # Process rows normally if two columns aren't properly structured
                process_rows(slide, slide_html)
        else:
            # Process rows normally
            process_rows(slide, slide_html)


def process_column_content(text_box, column_html):
    """Process content within a column"""
    text_frame = text_box.text_frame
    
    # Process each row in the column
    rows = column_html.find_all('div', class_='row')
    for row in rows:
        add_paragraph_with_content(text_frame, row)


def process_rows(slide, slide_html):
    """Process rows in a regular slide layout"""
    rows = slide_html.find_all('div', class_='row')
    
    current_y = Inches(1.8)  # Starting Y position after title
    
    for row in rows:
        # Calculate height based on content (simplified approximation)
        row_height = calculate_row_height(row)
        
        # Create a shape for this row
        shape = slide.shapes.add_textbox(
            Inches(0.5),  # X position
            current_y,    # Y position
            Inches(9),    # Width
            row_height    # Height
        )
        
        # Add content to the shape
        process_row_content(shape, row)
        
        # Update Y position for next row
        current_y += row_height + Inches(0.2)  # Add spacing between rows


def calculate_row_height(row):
    """Estimate the height needed for a row based on content"""
    # Get text content length
    text_length = len(row.get_text())
    
    # Check if there's an image
    img = row.find('img')
    if img and img.get('height'):
        try:
            img_height = int(img.get('height')) / 72  # Convert pixels to inches
            return max(Inches(img_height + 0.5), Inches(1))
        except:
            pass
    
    # Check for tables
    table = row.find('table')
    if table:
        rows = table.find_all('tr')
        return Inches(len(rows) * 0.3 + 0.5)
    
    # Check for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = code_block.get_text().strip().count('\n') + 1
        return Inches(code_lines * 0.2 + 0.5)
    
    # Calculate based on text length (simple approximation)
    if text_length < 50:
        return Inches(0.6)
    elif text_length < 200:
        return Inches(1)
    elif text_length < 500:
        return Inches(2)
    else:
        return Inches(3)


def process_row_content(shape, row):
    """Process the content of a row"""
    text_frame = shape.text_frame
    
    # Handle images
    img = row.find('img')
    if img:
        try:
            img_src = img.get('src')
            img_alt = img.get('alt', 'Image')
            img_width = img.get('width')
            
            # For a real application, download and add the image
            # Here, we're adding a placeholder text
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt}]"
            p.alignment = PP_ALIGN.CENTER
            
            # If there's a caption, add it
            caption = row.find('p', class_='caption')
            if caption:
                p = text_frame.add_paragraph()
                p.text = caption.get_text().strip()
                p.alignment = PP_ALIGN.CENTER
                p.font.italic = True
                p.font.size = Pt(10)
                p.font.color.rgb = RGBColor(100, 100, 100)
            
            return
        except Exception as e:
            print(f"Error processing image: {e}")
    
    # Handle chart placeholders
    chart = row.find('div', class_='chart-placeholder')
    if chart:
        p = text_frame.add_paragraph()
        p.text = chart.get_text().strip() or "[Chart Placeholder]"
        p.alignment = PP_ALIGN.CENTER
        return
    
    # Handle tables
    table_elem = row.find('table')
    if table_elem:
        # Add a simple note about the table
        # In a real application, you would create a PowerPoint table
        p = text_frame.add_paragraph()
        p.text = "[Table: " + table_elem.get_text().strip()[:50] + "...]"
        return
    
    # Handle code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        # Add code as preformatted text
        code_text = code_block.get_text().strip()
        p = text_frame.add_paragraph()
        p.text = code_text
        p.font.name = "Courier New"
        p.font.size = Pt(10)
        return
    
    # Handle lists
    ordered_list = row.find('ol')
    unordered_list = row.find('ul')
    
    if ordered_list:
        items = ordered_list.find_all('li')
        for i, item in enumerate(items, 1):
            p = text_frame.add_paragraph()
            p.text = f"{i}. {item.get_text().strip()}"
            p.level = 1
        return
    
    if unordered_list:
        items = unordered_list.find_all('li')
        for item in items:
            p = text_frame.add_paragraph()
            p.text = f"• {item.get_text().strip()}"
            p.level = 1
        return
    
    # Handle headings
    heading = row.find(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
    if heading:
        p = text_frame.add_paragraph()
        p.text = heading.get_text().strip()
        
        # Set styling based on heading level
        heading_tag = heading.name
        if heading_tag == 'h1':
            p.font.size = Pt(24)
            p.font.bold = True
        elif heading_tag == 'h2':
            p.font.size = Pt(20)
            p.font.bold = True
        elif heading_tag == 'h3':
            p.font.size = Pt(18)
            p.font.bold = True
        else:
            p.font.size = Pt(14)
            p.font.bold = True
        
        return
    
    # Handle basic text content
    paragraphs = row.find_all('p')
    if paragraphs:
        for para in paragraphs:
            p = text_frame.add_paragraph()
            p.text = para.get_text().strip()
    else:
        # Add the row's text if no specific elements were processed
        text = row.get_text().strip()
        if text:
            p = text_frame.add_paragraph()
            p.text = text


def add_paragraph_with_content(text_frame, content_html):
    """Add a paragraph to a text frame with formatted content"""
    p = text_frame.add_paragraph()
    
    # Process different content types
    if content_html.find('img'):
        img = content_html.find('img')
        p.text = f"[Image: {img.get('alt', 'Image')}]"
        p.alignment = PP_ALIGN.CENTER
    elif content_html.find('ul'):
        items = content_html.find('ul').find_all('li')
        p.text = "• " + items[0].get_text().strip()
        for item in items[1:]:
            p_item = text_frame.add_paragraph()
            p_item.text = "• " + item.get_text().strip()
            p_item.level = 1
    elif content_html.find('ol'):
        items = content_html.find('ol').find_all('li')
        for i, item in enumerate(items, 1):
            if i == 1:
                p.text = f"{i}. {item.get_text().strip()}"
            else:
                p_item = text_frame.add_paragraph()
                p_item.text = f"{i}. {item.get_text().strip()}"
                p_item.level = 1
    elif content_html.find(['h1', 'h2', 'h3', 'h4']):
        heading = content_html.find(['h1', 'h2', 'h3', 'h4'])
        p.text = heading.get_text().strip()
        p.font.bold = True
        
        # Set size based on heading level
        tag_name = heading.name
        if tag_name == 'h1':
            p.font.size = Pt(24)
        elif tag_name == 'h2':
            p.font.size = Pt(20)
        elif tag_name == 'h3':
            p.font.size = Pt(18)
        else:
            p.font.size = Pt(16)
    else:
        # Basic text content
        p.text = content_html.get_text().strip()


if __name__ == "__main__":
    # Example usage
    html_file = "newsample.html"
    output_pptx = "presentation1.ppt"
    
    # Read HTML from file
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()
    
    # Convert to PowerPoint
    html_to_pptx(html_content, output_pptx)

Presentation saved as presentation1.ppt


In [6]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content
    process_standard_slide_content(slide, current_slide, css_rules)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_standard_slide_content(slide, current_slide, css_rules):
    """Process content for a standard slide layout"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Find and process all row divs
    rows = slide.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide, content_frame, current_slide, css_rules, current_y)
    else:
        # Process each row
        for row in rows:
            # Estimate row height based on content
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y)
            
            # Update the vertical position for the next row
            current_y = max(current_y + row_height, new_y) + Inches(0.2) if new_y else current_y + row_height + Inches(0.2)

def estimate_row_height(row):
    """Estimate the height needed for a row based on content"""
    # Base height
    height = Inches(0.5)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height))
            except (ValueError, TypeError):
                height = max(height, Inches(2))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2))
    
    # Add height for text
    text_content = row.get_text().strip()
    text_lines = len(text_content.split('\n'))
    height = max(height, Inches(0.3 * text_lines))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows))
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        height = max(height, Inches(0.2 * code_lines))
    
    return height

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

# Replace this function in your code

def process_column_slide(slide_html, prs, slide_idx):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Get slide dimensions from the presentation
    # Note: Slides inherit dimensions from the presentation
    slide_width = prs.slide_width  # Use presentation's slide_width
    slide_height = prs.slide_height  # Use presentation's slide_height
    
    # Convert to inches for consistency with other measurements
    # 914400 EMU = 1 inch
    slide_width_inches = slide_width / 914400
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    # Process left column
    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width)

def process_column_content(column, slide, x_pos, y_pos, width):
    """Process content within a column with dynamic image handling"""
    current_y = y_pos
    
    # Process each row in the column
    for row in column.find_all('div', class_='row'):
        # Check for images
        img = row.find('img')
        if img:
            img_url = img.get('src', '')
            if img_url:
                try:
                    # Download and add image
                    response = requests.get(img_url, stream=True)
                    if response.status_code == 200:
                        img_bytes = BytesIO(response.content)
                        
                        # Get original image dimensions for aspect ratio
                        with PILImage.open(img_bytes) as pil_img:
                            img_width, img_height = pil_img.size
                            aspect_ratio = img_width / img_height
                        
                        # Reset file pointer
                        img_bytes.seek(0)
                        
                        # Calculate the image width (restricted to column width)
                        img_width = width
                        
                        # Calculate height based on aspect ratio
                        img_height = width / aspect_ratio
                        
                        # If height is too large, adjust
                        max_height = Inches(4)  # Maximum reasonable height
                        if img_height > max_height:
                            img_height = max_height
                            img_width = img_height * aspect_ratio
                        
                        # Add the image to the slide
                        picture = slide.shapes.add_picture(img_bytes, x_pos, current_y, width=img_width)
                        
                        # Update position for next element
                        current_y += picture.height + Inches(0.2)
                        
                        # Add caption if available
                        caption = row.find('p', class_='caption')
                        if caption and caption.get_text().strip():
                            caption_box = slide.shapes.add_textbox(
                                x_pos, current_y, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption.get_text().strip()
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            current_y += Inches(0.4)  # Space for caption
                    else:
                        # Fallback to text if image can't be downloaded
                        current_y = add_textbox_relative(
                            slide, current_y, x_pos, width, Inches(0.8), 
                            f"[Image: {img.get('alt', 'Image')}]",
                            font_size=14, bg_color=RGBColor(240, 240, 240)
                        )
                except Exception as e:
                    print(f"Error processing image: {e}")
                    current_y = add_textbox_relative(
                        slide, current_y, x_pos, width, Inches(0.8), 
                        f"[Image: {img.get('alt', 'Image')}]",
                        font_size=14, bg_color=RGBColor(240, 240, 240)
                    )
            else:
                # Process as text if no image URL
                text = row.get_text().strip()
                current_y = add_textbox_relative(
                    slide, current_y, x_pos, width, Inches(0.8), text,
                    font_size=14, bg_color=RGBColor(240, 240, 240)
                )
        else:
            # Process regular text content
            text = row.get_text().strip()
            # Skip empty text
            if text:
                # Calculate appropriate height based on text length
                text_lines = max(1, len(text) // 40)  # Rough estimate: 40 chars per line
                text_height = Inches(0.3 * text_lines)
                
                current_y = add_textbox_relative(
                    slide, current_y, x_pos, width, text_height, text,
                    font_size=14, bg_color=RGBColor(240, 240, 240)
                )
    
    return current_y

def process_content(element, text_frame, slide, css_rules, y_position=None):
    """Process HTML content and add it to a PowerPoint slide"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        new_y = process_image_with_download(element, text_frame, slide, css_rules, y_position)
        max_y = max(max_y, new_y) if new_y else max_y
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules)
    
    return max_y

def process_text_content(element, text_frame, css_rules):
    """Process text content and add it to the text frame"""
    # Extract direct text content from the element (exclude nested elements)
    direct_text = ''
    for child in element.children:
        if isinstance(child, str):
            direct_text += child
            
    if direct_text.strip():
        p = text_frame.add_paragraph()
        p.text = direct_text.strip()
        apply_css_to_paragraph(p, element, css_rules)
    
    # Process paragraph elements
    paragraphs = element.find_all(['p', 'div', 'h3', 'h4'], recursive=False)
    for para in paragraphs:
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        apply_css_to_paragraph(p, para, css_rules)
        
        # Apply special formatting
        if para.name in ['h3', 'h4']:
            p.font.bold = True
            size_map = {'h3': 20, 'h4': 18}
            p.font.size = Pt(size_map.get(para.name, 16))
            
        # Handle text highlighting
        if para.find('span', class_='highlight'):
            # In a real implementation, you would extract the exact highlighted text
            # This is a simplification
            p.font.highlight_color = 3  # Yellow
            
        # Handle bold and italic
        if para.find(['b', 'strong']):
            p.font.bold = True
        if para.find(['i', 'em']):
            p.font.italic = True

def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)

def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images by downloading and embedding them in the PowerPoint with dynamic sizing"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Get position information - access slide dimensions from presentation
    # Get a reference to the presentation
    prs = slide.part.parent
    
    # The standard slide dimensions in python-pptx (in EMU units)
    slide_width = prs.slide_width  # This is in EMU units (English Metric Units)
    slide_height = prs.slide_height  # This is in EMU units
    
    # Convert to inches for consistency with other measurements
    # 914400 EMU = 1 inch
    slide_width_inches = slide_width / 914400
    slide_height_inches = slide_height / 914400
    
    # Calculate content area (85% of slide width)
    content_width = Inches(slide_width_inches * 0.85)
    left = Inches((slide_width_inches - (slide_width_inches * 0.85)) / 2)  # Center the content
    top = y_position if y_position is not None else Inches(1.5)
    
    # Max height (60% of slide height)
    max_image_height = Inches(slide_height_inches * 0.6)
    
    try:
        # Download the image
        response = requests.get(img_url, stream=True)
        if response.status_code == 200:
            # Create image from content
            img_bytes = BytesIO(response.content)
            
            # Get original image dimensions
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Calculate dimensions based on various constraints
            # 1. Try to use HTML width attribute if available
            width = None
            if img.get('width'):
                try:
                    width_px = int(img.get('width'))
                    width = min(Inches(width_px / 96), content_width)  # Use specified width but cap at content width
                except (ValueError, TypeError):
                    width = None
            
            # 2. If no width attribute or parsing failed, use dynamic sizing
            if width is None:
                # Convert image width to inches (approximate)
                img_width_inches = img_width / 96  # Assuming 96 DPI
                if img_width_inches > slide_width_inches * 0.85:
                    # Image is wider than content area, scale down
                    width = content_width
                else:
                    # Use image's natural width but cap at content width
                    width = min(Inches(img_width_inches), content_width)
            
            # Calculate height based on aspect ratio, and ensure it's not too tall
            # We can't convert directly to EMU as height.inches might not exist
            calculated_height_inches = width.inches / aspect_ratio if hasattr(width, 'inches') else width / 914400 / aspect_ratio
            
            if calculated_height_inches > slide_height_inches * 0.6:
                # Image would be too tall, recalculate width to maintain aspect ratio
                height = max_image_height
                width = Inches(height.inches * aspect_ratio) if hasattr(height, 'inches') else Inches((height / 914400) * aspect_ratio)
            
            # Add image to slide with calculated dimensions
            picture = slide.shapes.add_picture(img_bytes, left, top, width=width)
            
            # Get actual height after adding to slide
            new_top = top + picture.height
            
            # Add caption if available
            caption = element.find('p', class_='caption')
            if caption:
                caption_text = caption.get_text().strip()
                
                # Only create caption if there's actual text
                if caption_text:
                    caption_box = slide.shapes.add_textbox(
                        left, new_top, width, Inches(0.3)
                    )
                    caption_frame = caption_box.text_frame
                    p = caption_frame.add_paragraph()
                    p.text = caption_text
                    p.font.italic = True
                    p.alignment = PP_ALIGN.CENTER
                    
                    new_top += caption_box.height
            
            return new_top + Inches(0.2)  # Return updated position with spacing
        else:
            # Fallback to placeholder if image can't be downloaded
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt}]"
            p.alignment = PP_ALIGN.CENTER
            
            # Add image caption if available
            caption = element.find('p', class_='caption')
            if caption and caption.get_text().strip():
                p = text_frame.add_paragraph()
                p.text = caption.get_text().strip()
                p.font.italic = True
                p.alignment = PP_ALIGN.CENTER
            
            return y_position + Inches(0.8)  # Approximate space for placeholder
    except Exception as e:
        print(f"Error processing image {img_url}: {e}")
        # Fallback to placeholder
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt}]"
        p.alignment = PP_ALIGN.CENTER
        
        # Add image caption if available
        caption = element.find('p', class_='caption')
        if caption and caption.get_text().strip():
            p = text_frame.add_paragraph()
            p.text = caption.get_text().strip()
            p.font.italic = True
            p.alignment = PP_ALIGN.CENTER
        
        return y_position + Inches(0.8)  # Approximate space for placeholder
def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None

def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")

Error: 'SlidePart' object has no attribute 'parent'
Usage: python html_to_pptx.py <html_file> [output_pptx]


In [6]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides"""
    # Calculate approximately how much text fits on one slide
    chars_per_slide = 1500  # Estimate based on font size and slide area
    
    if len(text) > chars_per_slide:
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:chars_per_slide] + "..."
        
        # Create a new slide for remaining text
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(6.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True  # Enable word wrap
        
        # Add "continued" indicator
        p = next_text_frame.add_paragraph()
        p.text = f"(Continued from slide {current_slide_index+1})"
        p.font.italic = True
        
        # Recursively handle remaining text
        remaining_text = text[chars_per_slide:]
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False
def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row
        for row in rows:
            # Estimate row height based on content
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row
            current_y = max(current_y + row_height, new_y) + Inches(0.2) if new_y else current_y + row_height + Inches(0.2)

def estimate_row_height(row):
    """Estimate the height needed for a row based on content"""
    # Base height
    height = Inches(0.5)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height))
            except (ValueError, TypeError):
                height = max(height, Inches(2))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2))
    
    # Add height for text
    text_content = row.get_text().strip()
    text_lines = len(text_content.split('\n'))
    height = max(height, Inches(0.3 * text_lines))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows))
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        height = max(height, Inches(0.2 * code_lines))
    
    return height

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)

def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process content within a column with dynamic image handling using HTML dimensions"""
    current_y = y_pos
    
    # Process each row in the column
    for row in column.find_all('div', class_='row'):
        # Calculate remaining vertical space on slide
        remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.5) - current_y  # 0.5 inch margin at bottom
        
        # Check for images
        img = row.find('img')
        if img:
            img_url = img.get('src', '')
            if img_url:
                try:
                    # Download and add image
                    response = requests.get(img_url, stream=True)
                    if response.status_code == 200:
                        img_bytes = BytesIO(response.content)
                        
                        # Get original image dimensions for aspect ratio
                        with PILImage.open(img_bytes) as pil_img:
                            img_width, img_height = pil_img.size
                            aspect_ratio = img_width / img_height
                        
                        # Reset file pointer
                        img_bytes.seek(0)
                        
                        # Get dimensions from HTML if available
                        width_specified = img.get('width')
                        height_specified = img.get('height')
                        
                        # Determine image dimensions to use in PowerPoint
                        img_width_in_inches = None
                        img_height_in_inches = None
                        
                        # Case 1: Both width and height specified in HTML
                        if width_specified and height_specified:
                            try:
                                img_width_in_inches = min(Inches(int(width_specified) / 96), width)  # Constrain to column width
                                img_height_in_inches = Inches(int(height_specified) / 96)
                            except (ValueError, TypeError):
                                # Fall back to column width
                                img_width_in_inches = width
                                img_height_in_inches = Inches(width.inches / aspect_ratio) if hasattr(width, 'inches') else Inches(float(width) / aspect_ratio)
                        
                        # Case 2: Only width specified in HTML
                        elif width_specified:
                            try:
                                img_width_in_inches = min(Inches(int(width_specified) / 96), width)  # Constrain to column width
                                img_height_in_inches = Inches(int(width_specified) / 96 / aspect_ratio)
                            except (ValueError, TypeError):
                                img_width_in_inches = width
                                img_height_in_inches = Inches(width.inches / aspect_ratio) if hasattr(width, 'inches') else Inches(float(width) / aspect_ratio)
                        
                        # Case 3: Only height specified in HTML
                        elif height_specified:
                            try:
                                img_height_in_inches = Inches(int(height_specified) / 96)
                                calculated_width = Inches(int(height_specified) / 96 * aspect_ratio)
                                img_width_in_inches = min(calculated_width, width)  # Constrain to column width
                            except (ValueError, TypeError):
                                img_width_in_inches = width
                                img_height_in_inches = Inches(width.inches / aspect_ratio) if hasattr(width, 'inches') else Inches(float(width) / aspect_ratio)
                        
                        # Case 4: No dimensions specified in HTML
                        else:
                            # Use column width and adjust height based on aspect ratio
                            img_width_in_inches = width
                            img_height_in_inches = Inches(width.inches / aspect_ratio) if hasattr(width, 'inches') else Inches(float(width) / aspect_ratio)
                        
                        # Make sure the image fits in the available vertical space
                        if img_height_in_inches > remaining_height:
                            # Scale down to fit
                            img_height_in_inches = remaining_height
                            img_width_in_inches = Inches(img_height_in_inches.inches * aspect_ratio) if hasattr(img_height_in_inches, 'inches') else Inches(float(img_height_in_inches) * aspect_ratio)
                            
                            # Ensure width doesn't exceed column width
                            if img_width_in_inches > width:
                                img_width_in_inches = width
                                img_height_in_inches = Inches(width.inches / aspect_ratio) if hasattr(width, 'inches') else Inches(float(width) / aspect_ratio)
                        
                        # Skip if too little space left
                        if img_height_in_inches < Inches(0.5):
                            break  # Not enough space for a meaningful image
                        
                        # Add the image to the slide with calculated dimensions
                        picture = slide.shapes.add_picture(
                            img_bytes, x_pos, current_y, 
                            width=img_width_in_inches, 
                            height=img_height_in_inches
                        )
                        
                        # Update position for next element
                        current_y += img_height_in_inches + Inches(0.1)
                        
                        # Add caption if available
                        caption = row.find('p', class_='caption')
                        if caption and caption.get_text().strip():
                            caption_text = caption.get_text().strip()
                            caption_height = Inches(0.3)
                            
                            if current_y + caption_height < Inches(SLIDE_HEIGHT_INCHES - 0.5):
                                caption_box = slide.shapes.add_textbox(
                                    x_pos, current_y, img_width_in_inches, caption_height
                                )
                                caption_frame = caption_box.text_frame
                                p = caption_frame.add_paragraph()
                                p.text = caption_text
                                p.font.italic = True
                                p.alignment = PP_ALIGN.CENTER
                                
                                current_y += caption_height + Inches(0.1)
                    else:
                        # Fallback to text if image can't be downloaded
                        current_y = add_textbox_relative(
                            slide, current_y, x_pos, width, Inches(0.8), 
                            f"[Image: {img.get('alt', 'Image')}]",
                            font_size=14, bg_color=RGBColor(240, 240, 240)
                        )
                except Exception as e:
                    print(f"Error processing column image: {e}")
                    current_y = add_textbox_relative(
                        slide, current_y, x_pos, width, Inches(0.8), 
                        f"[Image: {img.get('alt', 'Image')}]",
                        font_size=14, bg_color=RGBColor(240, 240, 240)
                    )
            else:
                # Process as text if no image URL
                text = row.get_text().strip()
                current_y = add_textbox_relative(
                    slide, current_y, x_pos, width, Inches(0.8), text,
                    font_size=14, bg_color=RGBColor(240, 240, 240)
                )
        else:
            # Process regular text content
            text = row.get_text().strip()
            # Skip empty text
            if text:
                # Calculate appropriate height based on text length
                text_lines = max(1, len(text) // 40)  # Rough estimate: 40 chars per line
                text_height = min(Inches(0.3 * text_lines), remaining_height - Inches(0.1))
                
                # Skip if not enough space
                if text_height < Inches(0.3):
                    break
                
                current_y = add_textbox_relative(
                    slide, current_y, x_pos, width, text_height, text,
                    font_size=14, bg_color=RGBColor(240, 240, 240)
                )
    
    return current_y

def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        new_y = process_image_with_download(element, text_frame, slide, css_rules, y_position)
        max_y = max(max_y, new_y) if new_y else max_y
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules, slide, prs, slide_index)
    
    return max_y


def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract direct text content from the element (exclude nested elements)
    direct_text = ''
    for child in element.children:
        if isinstance(child, str):
            direct_text += child
            
    if direct_text.strip():
        # Check if we need to handle text overflow
        if slide and prs and len(direct_text.strip()) > 1500:
            handle_text_overflow(direct_text.strip(), text_frame, slide, slide_index, prs)
        else:
            p = text_frame.add_paragraph()
            p.text = direct_text.strip()
            apply_css_to_paragraph(p, element, css_rules)
    
    # Process paragraph elements
    paragraphs = element.find_all(['p', 'div', 'h3', 'h4'], recursive=False)
    for para in paragraphs:
        para_text = para.get_text().strip()
        
        # Check if we need to handle text overflow for each paragraph
        if slide and prs and len(para_text) > 1500:
            # Remove the p = text_frame.add_paragraph() line
            # Just call handle_text_overflow directly
            handle_text_overflow(para_text, text_frame, slide, slide_index, prs)
        else:
            # Set a reasonable width for the text frame to ensure proper wrapping
            if hasattr(text_frame, 'width') and not text_frame.width:
                text_frame.width = Inches(7)  # Standard width for content
                
            p = text_frame.add_paragraph()
            p.text = para_text
            apply_css_to_paragraph(p, para, css_rules)
            
            # Apply special formatting
            if para.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 20, 'h4': 18}
                p.font.size = Pt(size_map.get(para.name, 16))
                
            # Handle text highlighting
            if para.find('span', class_='highlight'):
                p.font.highlight_color = 3  # Yellow
                
            # Handle bold and italic
            if para.find(['b', 'strong']):
                p.font.bold = True
            if para.find(['i', 'em']):
                p.font.italic = True

def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None

def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "newsample.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL

Presentation saved as presentation.pptx
Successfully converted newsample.html to presentation.pptx


In [10]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    apply_background_from_css(slide, current_slide, css_rules)
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)
def process_row(row, slide, css_rules, top_position, width=Inches(9), prs=None, slide_index=0):
    # Create shape for the row
    height = estimate_row_height(row)
    row_shape = slide.shapes.add_shape(
        MSO_SHAPE.RECTANGLE, 
        Inches(0.5), top_position, 
        width, height
    )
    
    # Apply background color
    apply_background_from_css(row, row_shape, css_rules)
    
    # Set default if no background was applied
    if not hasattr(row_shape.fill, 'fore_color') or not row_shape.fill.fore_color:
        row_shape.fill.solid()
        row_shape.fill.fore_color.rgb = RGBColor(255, 255, 255)  # Default white
    
    # Add text and other content
    text_shape = slide.shapes.add_textbox(
        Inches(0.5), top_position, 
        width, height
    )
    text_frame = text_shape.text_frame
    
    # Process the content
    process_content(row, text_frame, slide, css_rules, top_position, prs, slide_index)
    
    return top_position + height + Inches(0.2)
def process_standard_slide_content(slide, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Find and process all row divs
    rows = slide.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row using our new function
        for row in rows:
            current_y = process_row(row, current_slide, css_rules, current_y)
def estimate_row_height(row):
    """Estimate the height needed for a row based on content"""
    # Base height
    height = Inches(0.5)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height))
            except (ValueError, TypeError):
                height = max(height, Inches(2))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2))
    
    # Add height for text
    text_content = row.get_text().strip()
    text_lines = len(text_content.split('\n'))
    height = max(height, Inches(0.3 * text_lines))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows))
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        height = max(height, Inches(0.2 * code_lines))
    
    return height

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)
    apply_background_from_css(slide_html, slide, css_rules)
    
    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    # Process left column
    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules)
def process_column_content(column, slide, x_pos, y_pos, width, css_rules):
    """Process content within a column with dynamic image handling using HTML dimensions"""
    current_y = y_pos
    
    # Process each row in the column
    for row in column.find_all('div', class_='row'):
        # Process row with styling
        current_y = process_row(row, slide, css_rules, current_y, width)

def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        new_y = process_image_with_download(element, text_frame, slide, css_rules, y_position)
        max_y = max(max_y, new_y) if new_y else max_y
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules, slide, prs, slide_index)
    
    return max_y

def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract direct text content from the element (exclude nested elements)
    direct_text = ''
    for child in element.children:
        if isinstance(child, str):
            direct_text += child
            
    if direct_text.strip():
        # Check if we need to handle text overflow
        if slide and prs and len(direct_text.strip()) > 1500:
            handle_text_overflow(direct_text.strip(), text_frame, slide, slide_index, prs)
        else:
            p = text_frame.add_paragraph()
            p.text = direct_text.strip()
            apply_css_to_paragraph(p, element, css_rules)
    
    # Process paragraph elements
    paragraphs = element.find_all(['p', 'div', 'h3', 'h4'], recursive=False)
    for para in paragraphs:
        para_text = para.get_text().strip()
        
        # Check if we need to handle text overflow for each paragraph
        if slide and prs and len(para_text) > 1500:
            # Remove the p = text_frame.add_paragraph() line
            # Just call handle_text_overflow directly
            handle_text_overflow(para_text, text_frame, slide, slide_index, prs)
        else:
            # Set a reasonable width for the text frame to ensure proper wrapping
            if hasattr(text_frame, 'width') and not text_frame.width:
                text_frame.width = Inches(7)  # Standard width for content
                
            p = text_frame.add_paragraph()
            p.text = para_text
            apply_css_to_paragraph(p, para, css_rules)
            
            # Apply special formatting
            if para.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 20, 'h4': 18}
                p.font.size = Pt(size_map.get(para.name, 16))
                
            # Handle text highlighting
            if para.find('span', class_='highlight'):
                p.font.highlight_color = 3  # Yellow
                
            # Handle bold and italic
            if para.find(['b', 'strong']):
                p.font.bold = True
            if para.find(['i', 'em']):
                p.font.italic = True

def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None
def apply_background_from_css(element, shape, css_rules):
    """Apply background color from CSS to any PowerPoint shape or slide"""
    # Get element classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
    
    # Check element's tag name also
    element_type = getattr(element, 'name', None)
    
    # Apply background from classes
    for class_name in classes:
        if class_name in css_rules and 'background-color' in css_rules[class_name]:
            bg_color_str = css_rules[class_name]['background-color']
            rgb = extract_rgb_color(bg_color_str)
            if rgb:
                # Apply background fill to the shape or slide
                if hasattr(shape, 'background'):  # It's a slide
                    fill = shape.background.fill
                    fill.solid()
                    fill.fore_color.rgb = RGBColor(*rgb)
                else:  # It's a shape
                    shape.fill.solid()
                    shape.fill.fore_color.rgb = RGBColor(*rgb)
                return True
    
    # Check by element type
    if element_type and element_type in css_rules and 'background-color' in css_rules[element_type]:
        bg_color_str = css_rules[element_type]['background-color']
        rgb = extract_rgb_color(bg_color_str)
        if rgb:
            if hasattr(shape, 'background'):  # It's a slide
                fill = shape.background.fill
                fill.solid()
                fill.fore_color.rgb = RGBColor(*rgb)
            else:  # It's a shape
                shape.fill.solid()
                shape.fill.fore_color.rgb = RGBColor(*rgb)
            return True
            
    return False

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string with improved support"""
    if not color_str:
        return None
        
    # Handle hex colors with 6 digits
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
    
    # Handle hex colors with 3 digits (shorthand)
    hex_short_match = re.search(r'#([0-9a-fA-F]{3})', color_str)
    if hex_short_match:
        hex_value = hex_short_match.group(1)
        return (
            int(hex_value[0] + hex_value[0], 16),
            int(hex_value[1] + hex_value[1], 16),
            int(hex_value[2] + hex_value[2], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
    
    return None
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides"""
    # Calculate approximately how much text fits on one slide
    chars_per_slide = 1500  # Estimate based on font size and slide area
    
    if len(text) > chars_per_slide:
        # Check if the text frame already has content
        if not text_frame.text and len(text_frame.paragraphs) == 1 and not text_frame.paragraphs[0].text:
            # If empty, add text to this frame
            text_frame.text = text[:chars_per_slide] + "..."
        else:
            # Otherwise add as a new paragraph
            p = text_frame.add_paragraph()
            p.text = text[:chars_per_slide] + "..."
        
        # Create a new slide for remaining text
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(6.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True  # Enable word wrap
        
        # Add "continued" indicator
        p = next_text_frame.add_paragraph()
        p.text = f"(Continued from slide {current_slide_index+1})"
        p.font.italic = True
        
        # Recursively handle remaining text
        remaining_text = text[chars_per_slide:]
        if remaining_text.strip():  # Only continue if there's actual content
            handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                                current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text to the frame - no overflow
        if not text_frame.text and len(text_frame.paragraphs) == 1 and not text_frame.paragraphs[0].text:
            # If empty, add text to this frame
            text_frame.text = text
        else:
            # Otherwise add as a new paragraph
            p = text_frame.add_paragraph()
            p.text = text
    
    return False

def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "newsample.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        

Presentation saved as presentation.pptx
Successfully converted newsample.html to presentation.pptx


In [3]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides"""
    # Calculate approximately how much text fits on one slide
    chars_per_slide = 1500  # Estimate based on font size and slide area
    
    if len(text) > chars_per_slide:
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:chars_per_slide] + "..."
        
        # Create a new slide for remaining text
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(6.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True  # Enable word wrap
        
        # Add "continued" indicator
        p = next_text_frame.add_paragraph()
        p.text = f"(Continued from slide {current_slide_index+1})"
        p.font.italic = True
        
        # Recursively handle remaining text
        remaining_text = text[chars_per_slide:]
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False
def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row
        for row in rows:
            # Estimate row height based on content
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row
            current_y = max(current_y + row_height, new_y) + Inches(0.2) if new_y else current_y + row_height + Inches(0.2)

def estimate_row_height(row):
    """Estimate the height needed for a row based on content"""
    # Base height
    height = Inches(0.5)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height))
            except (ValueError, TypeError):
                height = max(height, Inches(2))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2))
    
    # Add height for text
    text_content = row.get_text().strip()
    text_lines = len(text_content.split('\n'))
    height = max(height, Inches(0.3 * text_lines))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows))
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        height = max(height, Inches(0.2 * code_lines))
    
    return height

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)

def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process content within a column with properly aligned non-overlapping unified boxes"""
    current_y = y_pos
    
    try:
        # Process each row in the column
        for row in column.find_all('div', class_='row'):
            try:
                # Calculate remaining vertical space on slide
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.5) - current_y  # 0.5 inch margin at bottom
                
                # Skip if not enough space left on slide
                if remaining_height < Inches(0.5):
                    break
                
                # Get images and text content
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # Extract text content (excluding image alt text)
                text = ""
                try:
                    # Create a temporary copy of the row to remove images for text extraction
                    row_copy = BeautifulSoup(str(row), 'html.parser')
                    for img_tag in row_copy.find_all('img'):
                        img_tag.decompose()  # Remove image tags
                    
                    # Get clean text content
                    text = row_copy.get_text().strip()
                except Exception as e:
                    print(f"Error extracting text: {e}")
                    # Fallback to simpler text extraction
                    text = row.get_text().strip()
                
                has_text = bool(text)
                
                # If both text and images are present, create a unified box
                if has_text and has_images:
                    try:
                        # Calculate space needed for text
                        font_size = 14  # in points
                        # Get width in inches (handling both Inches objects and floats)
                        width_inches = getattr(width, 'inches', float(width._val / 914400)) if hasattr(width, '_val') else float(width)
                        chars_per_line = int((width_inches - 0.2) * 72 / (font_size * 0.6))  # Approx chars per line
                        text_lines = max(1, len(text) // chars_per_line + (1 if len(text) % chars_per_line > 0 else 0))
                        text_height = Inches(0.2 + (text_lines * 0.25))  # Base height + lines
                        
                        # Calculate space needed for images (with more precision)
                        image_heights = []
                        for img in img_tags:
                            try:
                                # Try to determine the image's aspect ratio and dimensions
                                width_spec = img.get('width')
                                height_spec = img.get('height')
                                
                                if width_spec and height_spec:
                                    # If both dimensions are specified, use them directly
                                    try:
                                        img_width = min(Inches(int(width_spec) / 96), width - Inches(0.4))
                                        img_height = Inches(int(height_spec) / 96)
                                        image_heights.append(img_height)
                                    except:
                                        image_heights.append(Inches(1))  # Default if conversion fails
                                else:
                                    # Default conservative estimate
                                    image_heights.append(Inches(1))
                            except:
                                image_heights.append(Inches(1))  # Default safe value
                        
                        # Total height calculation
                        total_img_height = sum(image_heights) + (Inches(0.15) * len(image_heights))  # Images + spacing
                        box_height = text_height + total_img_height + Inches(0.4)  # Text + images + padding
                        
                        # Ensure it fits in remaining space
                        if box_height > remaining_height:
                            box_height = remaining_height
                        
                        # Skip if not enough space
                        if box_height < Inches(0.6):
                            continue
                        
                        # Create the unified box (background shape)
                        bg_shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, box_height
                        )
                        bg_shape.fill.solid()
                        bg_shape.fill.fore_color.rgb = RGBColor(240, 240, 240)
                        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                        
                        # Add text at the top of the box
                        text_box = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.1), 
                            width - Inches(0.2), 
                            text_height
                        )
                        text_frame = text_box.text_frame
                        text_frame.word_wrap = True
                        p = text_frame.add_paragraph()
                        p.text = text
                        p.font.size = Pt(font_size)
                        
                        # Starting Y position for the first image - AFTER the text
                        img_y = current_y + text_height + Inches(0.15)
                        
                        # Process each image
                        for img in img_tags:
                            try:
                                img_url = img.get('src', '')
                                if not img_url:
                                    continue
                                
                                # Check available height
                                available_height = box_height - (img_y - current_y) - Inches(0.1)
                                if available_height < Inches(0.3):
                                    break  # No more space for images
                                
                                # Download image
                                response = requests.get(img_url, stream=True, timeout=5)
                                if response.status_code != 200:
                                    continue
                                
                                # Process image
                                img_bytes = BytesIO(response.content)
                                
                                # Get dimensions and aspect ratio
                                with PILImage.open(img_bytes) as pil_img:
                                    if pil_img.width < 10 or pil_img.height < 10:
                                        img_bytes.close()
                                        continue  # Skip invalid images
                                    aspect_ratio = pil_img.width / pil_img.height
                                
                                img_bytes.seek(0)
                                
                                # Calculate dimensions for PowerPoint
                                width_spec = img.get('width')
                                height_spec = img.get('height')
                                
                                if width_spec and height_spec:
                                    try:
                                        max_img_width = width - Inches(0.3)
                                        img_width = min(Inches(int(width_spec) / 96), max_img_width)
                                        img_height = Inches(int(height_spec) / 96)
                                    except:
                                        img_width = width - Inches(0.3)
                                        img_height = Inches((getattr(width, 'inches', 4) - 0.3) / aspect_ratio)
                                elif width_spec:
                                    try:
                                        max_img_width = width - Inches(0.3)
                                        img_width = min(Inches(int(width_spec) / 96), max_img_width)
                                        img_height = Inches(img_width.inches / aspect_ratio)
                                    except:
                                        img_width = width - Inches(0.3)
                                        img_height = Inches((getattr(width, 'inches', 4) - 0.3) / aspect_ratio)
                                elif height_spec:
                                    try:
                                        img_height = Inches(int(height_spec) / 96)
                                        max_img_width = width - Inches(0.3)
                                        img_width = min(Inches(img_height.inches * aspect_ratio), max_img_width)
                                    except:
                                        img_width = width - Inches(0.3)
                                        img_height = Inches((getattr(width, 'inches', 4) - 0.3) / aspect_ratio)
                                else:
                                    img_width = width - Inches(0.3)
                                    img_height = Inches((getattr(width, 'inches', 4) - 0.3) / aspect_ratio)
                                
                                # Ensure image fits in available space
                                if img_height > available_height:
                                    img_height = available_height
                                    img_width = Inches(img_height.inches * aspect_ratio)
                                    if img_width > width - Inches(0.3):
                                        img_width = width - Inches(0.3)
                                        img_height = Inches(img_width.inches / aspect_ratio)
                                
                                # Minimum size check
                                if img_height < Inches(0.3) or img_width < Inches(0.3):
                                    img_bytes.close()
                                    continue
                                
                                # Create a new BytesIO to ensure memory is properly managed
                                img_data = BytesIO(img_bytes.getvalue())
                                img_bytes.close()
                                
                                # Add image - FIXED: Safe centering calculation without .inches attribute
                                # We'll just place it with a simple margin instead of trying to center precisely
                                image_left = x_pos + Inches(0.15)  # Simple left margin
                                
                                picture = slide.shapes.add_picture(
                                    img_data, 
                                    image_left, 
                                    img_y, 
                                    width=img_width, 
                                    height=img_height
                                )
                                
                                # Close BytesIO
                                img_data.close()
                                
                                # Update position for next image
                                img_y += img_height + Inches(0.15)
                                
                            except Exception as img_error:
                                print(f"Error processing image in unified box: {img_error}")
                                continue
                        
                        # Update position for next row
                        current_y += box_height + Inches(0.2)
                        
                    except Exception as unified_error:
                        print(f"Error creating unified box: {unified_error}")
                        # Fallback to processing separately
                        if has_text:
                            try:
                                # Simple fallback
                                text_shape = slide.shapes.add_textbox(
                                    x_pos, current_y, width, Inches(0.5)
                                )
                                text_shape.text_frame.text = text[:100] + "..." if len(text) > 100 else text
                                current_y += Inches(0.6)
                            except:
                                current_y += Inches(0.5)
                
                # Handle text-only and image-only cases
                elif has_text:
                    try:
                        text_lines = max(1, len(text) // 40)
                        text_height = min(Inches(0.3 * text_lines), remaining_height - Inches(0.1))
                        
                        if text_height >= Inches(0.3):
                            current_y = add_textbox_relative(
                                slide, current_y, x_pos, width, text_height, text,
                                font_size=14, bg_color=RGBColor(240, 240, 240)
                            )
                    except Exception as text_error:
                        print(f"Error processing text-only content: {text_error}")
                        current_y += Inches(0.5)
                
                elif has_images:
                    # Image-only processing (simplified)
                    try:
                        for img in img_tags:
                            try:
                                img_url = img.get('src', '')
                                if not img_url:
                                    continue
                                
                                response = requests.get(img_url, stream=True, timeout=5)
                                if response.status_code != 200:
                                    continue
                                
                                img_bytes = BytesIO(response.content)
                                
                                with PILImage.open(img_bytes) as pil_img:
                                    aspect_ratio = pil_img.width / pil_img.height
                                
                                img_bytes.seek(0)
                                
                                # Simple calculation
                                img_width = Inches(3)  # Fixed reasonable width
                                img_height = Inches(3 / aspect_ratio)
                                
                                # Make sure it fits
                                if img_height > remaining_height - Inches(0.2):
                                    img_height = remaining_height - Inches(0.2)
                                    img_width = Inches(img_height.inches * aspect_ratio)
                                
                                if img_height < Inches(0.3):
                                    img_bytes.close()
                                    continue
                                
                                img_data = BytesIO(img_bytes.getvalue())
                                img_bytes.close()
                                
                                picture = slide.shapes.add_picture(
                                    img_data, x_pos, current_y, 
                                    width=img_width, height=img_height
                                )
                                
                                img_data.close()
                                current_y += img_height + Inches(0.2)
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                                continue
                    except Exception as img_section_error:
                        print(f"Error in image section: {img_section_error}")
                        current_y += Inches(0.5)
                
                # Add spacing between rows
                current_y += Inches(0.05)
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y
def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        new_y = process_image_with_download(element, text_frame, slide, css_rules, y_position)
        max_y = max(max_y, new_y) if new_y else max_y
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules, slide, prs, slide_index)
    
    return max_y


def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract direct text content from the element (exclude nested elements)
    direct_text = ''
    for child in element.children:
        if isinstance(child, str):
            direct_text += child
            
    if direct_text.strip():
        # Check if we need to handle text overflow
        if slide and prs and len(direct_text.strip()) > 1500:
            handle_text_overflow(direct_text.strip(), text_frame, slide, slide_index, prs)
        else:
            p = text_frame.add_paragraph()
            p.text = direct_text.strip()
            apply_css_to_paragraph(p, element, css_rules)
    
    # Process paragraph elements
    paragraphs = element.find_all(['p', 'div', 'h3', 'h4'], recursive=False)
    for para in paragraphs:
        para_text = para.get_text().strip()
        
        # Check if we need to handle text overflow for each paragraph
        if slide and prs and len(para_text) > 1500:
            # Remove the p = text_frame.add_paragraph() line
            # Just call handle_text_overflow directly
            handle_text_overflow(para_text, text_frame, slide, slide_index, prs)
        else:
            # Set a reasonable width for the text frame to ensure proper wrapping
            if hasattr(text_frame, 'width') and not text_frame.width:
                text_frame.width = Inches(7)  # Standard width for content
                
            p = text_frame.add_paragraph()
            p.text = para_text
            apply_css_to_paragraph(p, para, css_rules)
            
            # Apply special formatting
            if para.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 20, 'h4': 18}
                p.font.size = Pt(size_map.get(para.name, 16))
                
            # Handle text highlighting
            if para.find('span', class_='highlight'):
                p.font.highlight_color = 3  # Yellow
                
            # Handle bold and italic
            if para.find(['b', 'strong']):
                p.font.bold = True
            if para.find(['i', 'em']):
                p.font.italic = True

def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None

def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "newsample.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL

Presentation saved as presentation.pptx
Successfully converted newsample.html to presentation.pptx


In [19]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function

def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process content within a column with properly aligned non-overlapping unified boxes with adaptive heights"""
    current_y = y_pos
    
    try:
        # Process each row in the column
        for row in column.find_all('div', class_='row'):
            try:
                # Calculate remaining vertical space on slide
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                # Skip if not enough space left on slide
                if remaining_height < Inches(0.5):
                    break
                
                # Get images and text content
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # Extract text content properly to include both tag content and direct text
                text = ""
                try:
                    # Create a temporary copy of the row to work with
                    row_copy = BeautifulSoup(str(row), 'html.parser')
                    for img_tag in row_copy.find_all('img'):
                        img_tag.decompose()  # Remove image tags
                    
                    # Collect all text from all nodes
                    for element in row_copy.descendants:
                        if isinstance(element, str) and element.strip():
                            text += element.strip() + " "
                    
                    text = text.strip()
                except Exception as e:
                    print(f"Error extracting text: {e}")
                    # Fallback to simpler text extraction
                    text = row.get_text().strip()
                
                has_text = bool(text)
                
                # If both text and images are present, create a unified box
                if has_text and has_images:
                    try:
                        # Calculate space needed for text - ADAPTIVE HEIGHT
                        # Estimate how many lines the text will take
                        text_length = len(text)
                        text_words = len(text.split())
                        
                        # Calculate appropriate text height based on content length
                        if text_length < 100:
                            text_height = Inches(0.6)  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0)  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5)  # Longer text
                        else:
                            text_height = Inches(2.0)  # Very long text
                        
                        # Space for images
                        image_space = Inches(1.2)  # Default space for images
                        
                        # Calculate total box height - ADAPTIVE
                        box_height = text_height + image_space + Inches(0.4)  # Text + images + padding
                        
                        # Ensure it fits in remaining space
                        if box_height > remaining_height:
                            # If there's reasonable space, use what we have
                            if remaining_height > Inches(1.5):
                                box_height = remaining_height - Inches(0.1)
                                text_height = box_height - image_space - Inches(0.4)  # Adjust text height to fit
                            else:
                                # Skip this content if not enough space - advance a little and continue
                                current_y += Inches(0.2)
                                continue
                        
                        # Create the unified box (background shape)
                        bg_shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, box_height
                        )
                        bg_shape.fill.solid()
                        bg_shape.fill.fore_color.rgb = RGBColor(240, 240, 240)
                        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                        
                        # Add text at the top of the box
                        text_box = slide.shapes.add_textbox(
                            x_pos + Inches(0.2), 
                            current_y + Inches(0.2), 
                            width - Inches(0.4), 
                            text_height
                        )
                        text_frame = text_box.text_frame
                        text_frame.word_wrap = True
                        
                        # Split text into paragraphs if very long
                        if text_length > 300:
                            # Try to split at sentences
                            sentences = re.split(r'(?<=[.!?])\s+', text)
                            
                            # Add first sentence
                            p = text_frame.add_paragraph()
                            p.text = sentences[0]
                            p.font.size = Pt(11)  # Slightly smaller font for long text
                            
                            # Add remaining sentences as separate paragraphs
                            for sentence in sentences[1:]:
                                if sentence.strip():
                                    p = text_frame.add_paragraph()
                                    p.text = sentence
                                    p.font.size = Pt(11)
                        else:
                            # Add as a single paragraph
                            p = text_frame.add_paragraph()
                            p.text = text
                            p.font.size = Pt(12)
                        
                        # Starting Y position for the first image
                        img_y = current_y + text_height + Inches(0.2)
                        
                        # Process first image only (safer)
                        if img_tags and img_y + Inches(1.0) < current_y + box_height:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # Get image dimensions with aspect ratio
                                        try:
                                            with PILImage.open(img_bytes) as pil_img:
                                                aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Calculate image dimensions based on available space
                                            available_width = width - Inches(0.4)
                                            available_height = box_height - text_height - Inches(0.4)
                                            
                                            # Default dimensions
                                            img_width = min(Inches(2.0), available_width)
                                            img_height = img_width / aspect_ratio
                                            
                                            # Adjust if height is too large
                                            if img_height > available_height:
                                                img_height = available_height
                                                img_width = img_height * aspect_ratio
                                                
                                                # Ensure width isn't too large
                                                if img_width > available_width:
                                                    img_width = available_width
                                                    img_height = img_width / aspect_ratio
                                            
                                            # Create picture with proper sizing
                                            picture = slide.shapes.add_picture(
                                                img_bytes, 
                                                x_pos + Inches(0.2), 
                                                img_y, 
                                                width=img_width, 
                                                height=img_height
                                            )
                                        except:
                                            # Fallback to fixed size if aspect ratio calculation fails
                                            img_width = min(Inches(2.0), width - Inches(0.4))
                                            img_height = Inches(1.0)
                                            
                                            picture = slide.shapes.add_picture(
                                                img_bytes, 
                                                x_pos + Inches(0.2), 
                                                img_y, 
                                                width=img_width, 
                                                height=img_height
                                            )
                                        
                                        img_bytes.close()
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                        
                        # Update position for next row
                        current_y += box_height + Inches(0.3)
                        
                    except Exception as unified_error:
                        print(f"Error creating unified box: {unified_error}")
                        # Skip to next row on error
                        current_y += Inches(0.5)
                
                # Handle text-only content with ADAPTIVE HEIGHT
                elif has_text:
                    try:
                        # Calculate appropriate text height based on content length
                        text_length = len(text)
                        
                        if text_length < 100:
                            text_height = Inches(0.6)  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0)  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5)  # Longer text
                        else:
                            text_height = Inches(2.0)  # Very long text
                        
                        # Ensure it fits in remaining space
                        if text_height > remaining_height - Inches(0.2):
                            text_height = remaining_height - Inches(0.2)
                        
                        # Create textbox with background
                        shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, text_height
                        )
                        shape.fill.solid()
                        shape.fill.fore_color.rgb = RGBColor(240, 240, 240)
                        shape.line.color.rgb = RGBColor(200, 200, 200)

                        # Add the text
                        textbox = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.1), 
                            width - Inches(0.2), 
                            text_height - Inches(0.2)
                        )
                        text_frame = textbox.text_frame
                        text_frame.word_wrap = True
                        
                        # Split text into paragraphs if very long
                        if text_length > 300:
                            # Try to split at sentences
                            sentences = re.split(r'(?<=[.!?])\s+', text)
                            
                            # Add first sentence to first paragraph
                            text_frame.text = sentences[0]
                            
                            # Add remaining sentences as separate paragraphs
                            for sentence in sentences[1:]:
                                if sentence.strip():
                                    p = text_frame.add_paragraph()
                                    p.text = sentence
                                    p.font.size = Pt(11)
                        else:
                            # Add as a single paragraph
                            text_frame.text = text
                            paragraph = text_frame.paragraphs[0]
                            paragraph.font.size = Pt(12)
                        
                        # Update position
                        current_y += text_height + Inches(0.3)
                        
                    except Exception as text_error:
                        print(f"Error processing text-only content: {text_error}")
                        current_y += Inches(0.5)
                
                # Handle image-only content with adaptive sizing
                elif has_images:
                    try:
                        # Process first image only (safer)
                        if img_tags:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # Get image dimensions with aspect ratio
                                        try:
                                            with PILImage.open(img_bytes) as pil_img:
                                                aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Adaptive sizing based on available space
                                            img_width = min(Inches(3.0), width - Inches(0.4))
                                            img_height = img_width / aspect_ratio
                                            
                                            # Make sure it fits
                                            if img_height > remaining_height - Inches(0.3):
                                                img_height = remaining_height - Inches(0.3)
                                                img_width = img_height * aspect_ratio
                                                
                                                # Ensure width isn't too large
                                                if img_width > width - Inches(0.4):
                                                    img_width = width - Inches(0.4)
                                                    img_height = img_width / aspect_ratio
                                        except:
                                            # Fallback to fixed dimensions
                                            img_width = min(Inches(2.5), width - Inches(0.4))
                                            img_height = Inches(2.0)
                                        
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            x_pos + Inches(0.2), 
                                            current_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                                        current_y += img_height + Inches(0.3)
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                                current_y += Inches(0.5)
                    except Exception as img_section_error:
                        print(f"Error in image section: {img_section_error}")
                        current_y += Inches(0.5)
                
                # Add spacing between rows
                current_y += Inches(0.15)
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y


def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with simple reliable handling"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # Simplify by just adding all text to a single paragraph
    if all_text:
        # For very long text, use overflow handling
        if slide and prs and len(all_text) > 1000:
            # Use simple overflow handler
            chars_per_slide = 1000
            first_part = all_text[:chars_per_slide] + "..."
            
            p = text_frame.add_paragraph()
            p.text = first_part
            
            # Create a new slide for remaining text
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])
            
            # Add title to continuation slide
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from previous slide"
            p.font.italic = True
            p.font.bold = True
            
            # Add content to continuation slide
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            
            p = next_text_frame.add_paragraph()
            p.text = all_text[chars_per_slide:]
        else:
            # Just add text directly
            p = text_frame.add_paragraph()
            p.text = all_text
            
            # Apply basic formatting if needed
            if element.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 18, 'h4': 16}
                p.font.size = Pt(size_map.get(element.name, 14))




# Also update the handle_text_overflow function to manage text better
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved handling"""
    # Calculate approximately how much text fits on one slide - MORE CONSERVATIVE
    chars_per_slide = 800  # Reduced from 1500 for better fit
    
    if len(text) > chars_per_slide:
        # Find a good break point - end of sentence or paragraph
        break_point = chars_per_slide
        while break_point > chars_per_slide / 2:
            if text[break_point] in '.!?' and (break_point + 1 >= len(text) or text[break_point + 1] in ' \n\r\t'):
                break_point += 1  # Include the punctuation
                break
            elif text[break_point] in ' \n\r\t' and (break_point > 0 and text[break_point - 1] in '.!?'):
                break
            break_point -= 1
        
        if break_point <= chars_per_slide / 2:
            # If no good break found, find a word boundary
            break_point = chars_per_slide
            while break_point < len(text) and text[break_point] not in ' \n\r\t':
                break_point -= 1
            if break_point <= chars_per_slide / 2:
                break_point = chars_per_slide  # Fall back to hard break
        
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:break_point].strip()
        
        # Create a new slide for remaining text with BETTER FORMATTING
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
        
        # Add a title indicating continuation
        title_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = f"Continued from Slide {current_slide_index+1}"
        p.font.italic = True
        p.font.bold = True
        p.font.size = Pt(18)
        
        # Add the content with better positioning
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True
        
        # Recursively handle remaining text
        remaining_text = text[break_point:].strip()
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False


# Also update the process_standard_slide_content function
def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better overflow handling"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Get overall text length to determine if we might need special handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it as overflow text
    if len(full_text) > 1200 and prs:  # Lower threshold for better fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for row in rows:
            # Check remaining space
            remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.7) - current_y
            if remaining_height < Inches(1.0):
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs and len(rows) > 1:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"Continued from Slide {slide_index+1}"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[rows.index(row):]:
                        row_height = estimate_row_height(next_row)
                        
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        
                        new_y = process_content(next_row, text_frame, next_slide, css_rules, next_y, prs, slide_index+1)
                        
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                        
                        # Check if we're running out of space on this slide too
                        if next_y > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            break
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row with MORE SPACE
            current_y = max(current_y + row_height, new_y) + Inches(0.3) if new_y else current_y + row_height + Inches(0.3)


# Update the estimate_row_height function for better height calculation
def estimate_row_height(row):
    """Estimate the height needed for a row based on content - MORE GENEROUS"""
    # Base height
    height = Inches(0.6)  # Increased from 0.5
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height + 0.4))  # Extra margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.4))  # Increased default if can't parse
        else:
            # Default height for images with extra margin
            height = max(height, Inches(2.4))  # Increased from 2.0
    
    # Add height for text with BETTER ESTIMATION
    text_content = row.get_text().strip()
    text_words = len(text_content.split())
    # Approximate words per line based on average word length
    words_per_line = 12  # Conservative estimate
    text_lines = max(1, int(text_words / words_per_line) + 1)
    # More generous line height
    height = max(height, Inches(0.35 * text_lines))  # Increased from 0.3
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        # More space per table row
        height = max(height, Inches(0.4 * rows))  # Increased from 0.3
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        # More space per code line
        height = max(height, Inches(0.25 * code_lines))  # Increased from 0.2
    
    # Add a bit extra to prevent tight fit
    return height + Inches(0.2)  # Extra padding

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)

def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        new_y = process_image_with_download(element, text_frame, slide, css_rules, y_position)
        max_y = max(max_y, new_y) if new_y else max_y
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules, slide, prs, slide_index)
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it








def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "basic.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL

Presentation saved as presentation.pptx
Successfully converted basic.html to presentation.pptx


In [2]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function

def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process content within a column with properly aligned non-overlapping unified boxes with adaptive heights"""
    current_y = y_pos
    
    try:
        # Process each row in the column
        for row in column.find_all('div', class_='row'):
            try:
                # Calculate remaining vertical space on slide
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                # Skip if not enough space left on slide
                if remaining_height < Inches(0.5):
                    break
                
                # Get images and text content
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # Extract text content properly to include both tag content and direct text
                text = ""
                try:
                    # Create a temporary copy of the row to work with
                    row_copy = BeautifulSoup(str(row), 'html.parser')
                    for img_tag in row_copy.find_all('img'):
                        img_tag.decompose()  # Remove image tags
                    
                    # Collect all text from all nodes
                    for element in row_copy.descendants:
                        if isinstance(element, str) and element.strip():
                            text += element.strip() + " "
                    
                    text = text.strip()
                except Exception as e:
                    print(f"Error extracting text: {e}")
                    # Fallback to simpler text extraction
                    text = row.get_text().strip()
                
                has_text = bool(text)
                
                # If both text and images are present, create a unified box
                if has_text and has_images:
                    try:
                        # Calculate space needed for text - ADAPTIVE HEIGHT
                        # Estimate how many lines the text will take
                        text_length = len(text)
                        text_words = len(text.split())
                        
                        # Calculate appropriate text height based on content length
                        if text_length < 100:
                            text_height = Inches(0.6)  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0)  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5)  # Longer text
                        else:
                            text_height = Inches(2.0)  # Very long text
                        
                        # Space for images
                        image_space = Inches(1.2)  # Default space for images
                        
                        # Calculate total box height - ADAPTIVE
                        box_height = text_height + image_space + Inches(0.4)  # Text + images + padding
                        
                        # Ensure it fits in remaining space
                        if box_height > remaining_height:
                            # If there's reasonable space, use what we have
                            if remaining_height > Inches(1.5):
                                box_height = remaining_height - Inches(0.1)
                                text_height = box_height - image_space - Inches(0.4)  # Adjust text height to fit
                            else:
                                # Skip this content if not enough space - advance a little and continue
                                current_y += Inches(0.2)
                                continue
                        
                        # Create the unified box (background shape)
                        bg_shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, box_height
                        )
                        bg_shape.fill.solid()
                        bg_shape.fill.fore_color.rgb = get_color_from_class(row)
                        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                        
                        # Add text at the top of the box
                        text_box = slide.shapes.add_textbox(
                            x_pos + Inches(0.2), 
                            current_y + Inches(0.2), 
                            width - Inches(0.4), 
                            text_height
                        )
                        text_frame = text_box.text_frame
                        text_frame.word_wrap = True
                        
                        # Split text into paragraphs if very long
                        if text_length > 300:
                            # Try to split at sentences
                            sentences = re.split(r'(?<=[.!?])\s+', text)
                            
                            # Add first sentence
                            p = text_frame.add_paragraph()
                            p.text = sentences[0]
                            p.font.size = Pt(11)  # Slightly smaller font for long text
                            
                            # Add remaining sentences as separate paragraphs
                            for sentence in sentences[1:]:
                                if sentence.strip():
                                    p = text_frame.add_paragraph()
                                    p.text = sentence
                                    p.font.size = Pt(11)
                        else:
                            # Add as a single paragraph
                            p = text_frame.add_paragraph()
                            p.text = text
                            p.font.size = Pt(12)
                        
                        # Starting Y position for the first image
                        img_y = current_y + text_height + Inches(0.2)
                        
                        # Process first image only (safer)
                        if img_tags and img_y + Inches(1.0) < current_y + box_height:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # Get image dimensions with aspect ratio
                                        try:
                                            with PILImage.open(img_bytes) as pil_img:
                                                aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Calculate image dimensions based on available space
                                            available_width = width - Inches(0.4)
                                            available_height = box_height - text_height - Inches(0.4)
                                            
                                            # Default dimensions
                                            img_width = min(Inches(2.0), available_width)
                                            img_height = img_width / aspect_ratio
                                            
                                            # Adjust if height is too large
                                            if img_height > available_height:
                                                img_height = available_height
                                                img_width = img_height * aspect_ratio
                                                
                                                # Ensure width isn't too large
                                                if img_width > available_width:
                                                    img_width = available_width
                                                    img_height = img_width / aspect_ratio
                                            
                                            # Create picture with proper sizing
                                            picture = slide.shapes.add_picture(
                                                img_bytes, 
                                                x_pos + Inches(0.2), 
                                                img_y, 
                                                width=img_width, 
                                                height=img_height
                                            )
                                        except:
                                            # Fallback to fixed size if aspect ratio calculation fails
                                            img_width = min(Inches(2.0), width - Inches(0.4))
                                            img_height = Inches(1.0)
                                            
                                            picture = slide.shapes.add_picture(
                                                img_bytes, 
                                                x_pos + Inches(0.2), 
                                                img_y, 
                                                width=img_width, 
                                                height=img_height
                                            )
                                        
                                        img_bytes.close()
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                        
                        # Update position for next row
                        current_y += box_height + Inches(0.3)
                        
                    except Exception as unified_error:
                        print(f"Error creating unified box: {unified_error}")
                        # Skip to next row on error
                        current_y += Inches(0.5)
                
                # Handle text-only content with ADAPTIVE HEIGHT
                elif has_text:
                    try:
                        # Calculate appropriate text height based on content length
                        text_length = len(text)
                        
                        if text_length < 100:
                            text_height = Inches(0.6)  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0)  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5)  # Longer text
                        else:
                            text_height = Inches(2.0)  # Very long text
                        
                        # Ensure it fits in remaining space
                        if text_height > remaining_height - Inches(0.2):
                            text_height = remaining_height - Inches(0.2)
                        
                        # Create textbox with background
                        shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, text_height
                        )
                        shape.fill.solid()
                        shape.fill.fore_color.rgb = get_color_from_class(row) 
                        shape.line.color.rgb = RGBColor(200, 200, 200)

                        # Add the text
                        textbox = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.1), 
                            width - Inches(0.2), 
                            text_height - Inches(0.2)
                        )
                        text_frame = textbox.text_frame
                        text_frame.word_wrap = True
                        
                        # Split text into paragraphs if very long
                        if text_length > 300:
                            # Try to split at sentences
                            sentences = re.split(r'(?<=[.!?])\s+', text)
                            
                            # Add first sentence to first paragraph
                            text_frame.text = sentences[0]
                            
                            # Add remaining sentences as separate paragraphs
                            for sentence in sentences[1:]:
                                if sentence.strip():
                                    p = text_frame.add_paragraph()
                                    p.text = sentence
                                    p.font.size = Pt(11)
                        else:
                            # Add as a single paragraph
                            text_frame.text = text
                            paragraph = text_frame.paragraphs[0]
                            paragraph.font.size = Pt(12)
                        
                        # Update position
                        current_y += text_height + Inches(0.3)
                        
                    except Exception as text_error:
                        print(f"Error processing text-only content: {text_error}")
                        current_y += Inches(0.5)
                
                # Handle image-only content with adaptive sizing
                elif has_images:
                    try:
                        # Process first image only (safer)
                        if img_tags:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # Get image dimensions with aspect ratio
                                        try:
                                            with PILImage.open(img_bytes) as pil_img:
                                                aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Adaptive sizing based on available space
                                            img_width = min(Inches(3.0), width - Inches(0.4))
                                            img_height = img_width / aspect_ratio
                                            
                                            # Make sure it fits
                                            if img_height > remaining_height - Inches(0.3):
                                                img_height = remaining_height - Inches(0.3)
                                                img_width = img_height * aspect_ratio
                                                
                                                # Ensure width isn't too large
                                                if img_width > width - Inches(0.4):
                                                    img_width = width - Inches(0.4)
                                                    img_height = img_width / aspect_ratio
                                        except:
                                            # Fallback to fixed dimensions
                                            img_width = min(Inches(2.5), width - Inches(0.4))
                                            img_height = Inches(2.0)
                                        
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            x_pos + Inches(0.2), 
                                            current_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                                        current_y += img_height + Inches(0.3)
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                                current_y += Inches(0.5)
                    except Exception as img_section_error:
                        print(f"Error in image section: {img_section_error}")
                        current_y += Inches(0.5)
                
                # Add spacing between rows
                current_y += Inches(0.15)
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y


def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with simple reliable handling"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # Simplify by just adding all text to a single paragraph
    if all_text:
        # For very long text, use overflow handling
        if slide and prs and len(all_text) > 1000:
            # Use simple overflow handler
            chars_per_slide = 1000
            first_part = all_text[:chars_per_slide] + "..."
            
            p = text_frame.add_paragraph()
            p.text = first_part
            
            # Create a new slide for remaining text
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])
            
            # Add title to continuation slide
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from previous slide"
            p.font.italic = True
            p.font.bold = True
            
            # Add content to continuation slide
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            
            p = next_text_frame.add_paragraph()
            p.text = all_text[chars_per_slide:]
        else:
            # Just add text directly
            p = text_frame.add_paragraph()
            p.text = all_text
            
            # Apply basic formatting if needed
            if element.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 18, 'h4': 16}
                p.font.size = Pt(size_map.get(element.name, 14))




# Also update the handle_text_overflow function to manage text better
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved handling"""
    # Calculate approximately how much text fits on one slide - MORE CONSERVATIVE
    chars_per_slide = 800  # Reduced from 1500 for better fit
    
    if len(text) > chars_per_slide:
        # Find a good break point - end of sentence or paragraph
        break_point = chars_per_slide
        while break_point > chars_per_slide / 2:
            if text[break_point] in '.!?' and (break_point + 1 >= len(text) or text[break_point + 1] in ' \n\r\t'):
                break_point += 1  # Include the punctuation
                break
            elif text[break_point] in ' \n\r\t' and (break_point > 0 and text[break_point - 1] in '.!?'):
                break
            break_point -= 1
        
        if break_point <= chars_per_slide / 2:
            # If no good break found, find a word boundary
            break_point = chars_per_slide
            while break_point < len(text) and text[break_point] not in ' \n\r\t':
                break_point -= 1
            if break_point <= chars_per_slide / 2:
                break_point = chars_per_slide  # Fall back to hard break
        
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:break_point].strip()
        
        # Create a new slide for remaining text with BETTER FORMATTING
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
        
        # Add a title indicating continuation
        title_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = f"Continued from Slide {current_slide_index+1}"
        p.font.italic = True
        p.font.bold = True
        p.font.size = Pt(18)
        
        # Add the content with better positioning
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True
        
        # Recursively handle remaining text
        remaining_text = text[break_point:].strip()
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False


# Also update the process_standard_slide_content function
def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better overflow handling"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Get overall text length to determine if we might need special handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it as overflow text
    if len(full_text) > 1200 and prs:  # Lower threshold for better fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for row in rows:
            # Check remaining space
            remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.7) - current_y
            if remaining_height < Inches(1.0):
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs and len(rows) > 1:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"Continued from Slide {slide_index+1}"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[rows.index(row):]:
                        row_height = estimate_row_height(next_row)
                        
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        
                        new_y = process_content(next_row, text_frame, next_slide, css_rules, next_y, prs, slide_index+1)
                        
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                        
                        # Check if we're running out of space on this slide too
                        if next_y > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            break
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row with MORE SPACE
            current_y = max(current_y + row_height, new_y) + Inches(0.3) if new_y else current_y + row_height + Inches(0.3)


# Update the estimate_row_height function for better height calculation
def estimate_row_height(row):
    """Estimate the height needed for a row based on content - MORE GENEROUS"""
    # Base height
    height = Inches(0.6)  # Increased from 0.5
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height + 0.4))  # Extra margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.4))  # Increased default if can't parse
        else:
            # Default height for images with extra margin
            height = max(height, Inches(2.4))  # Increased from 2.0
    
    # Add height for text with BETTER ESTIMATION
    text_content = row.get_text().strip()
    text_words = len(text_content.split())
    # Approximate words per line based on average word length
    words_per_line = 12  # Conservative estimate
    text_lines = max(1, int(text_words / words_per_line) + 1)
    # More generous line height
    height = max(height, Inches(0.35 * text_lines))  # Increased from 0.3
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        # More space per table row
        height = max(height, Inches(0.4 * rows))  # Increased from 0.3
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        # More space per code line
        height = max(height, Inches(0.25 * code_lines))  # Increased from 0.2
    
    # Add a bit extra to prevent tight fit
    return height + Inches(0.2)  # Extra padding

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)

def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Handle different content types appropriately
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    elif element.find('img'):
        new_y = process_image_with_download(element, text_frame, slide, css_rules, y_position)
        max_y = max(max_y, new_y) if new_y else max_y
    else:
        # Process text content
        process_text_content(element, text_frame, css_rules, slide, prs, slide_index)
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it

def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color






def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL

Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx


In [None]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")

def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function
def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process content within a column with properly formatted headings and content"""
    current_y = y_pos
    
    try:
        # Process each row in the column
        for row in column.find_all('div', class_='row'):
            try:
                # Calculate remaining vertical space on slide
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                # Skip if not enough space left on slide
                if remaining_height < Inches(0.5):
                    break
                
                # Get images and text content
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # IMPROVED TEXT EXTRACTION: Separate headings and paragraphs
                header_text = ""
                paragraph_text = ""
                
                # Extract headers (h1-h6)
                for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                    header_text += header.get_text().strip() + " "
                
                # Extract paragraphs
                for para in row.find_all('p'):
                    paragraph_text += para.get_text().strip() + " "
                
                # Combine with any other text
                other_text = ""
                row_copy = BeautifulSoup(str(row), 'html.parser')
                
                # Remove headers, paragraphs and images
                for tag in row_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']):
                    tag.decompose()
                
                # Get remaining text
                for element in row_copy.descendants:
                    if isinstance(element, str) and element.strip():
                        other_text += element.strip() + " "
                
                # Final combined text
                combined_text = (header_text + " " + paragraph_text + " " + other_text).strip()
                has_text = bool(combined_text)
                
                # If both text and images are present, create a unified box
                if has_text and has_images:
                    try:
                        # Calculate space needed for text - ADAPTIVE HEIGHT
                        text_length = len(combined_text)
                        
                        # Calculate appropriate text height based on content length
                        if text_length < 100:
                            text_height = Inches(0.6)  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0)  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5)  # Longer text
                        else:
                            text_height = Inches(2.0)  # Very long text
                        
                        # Get image sizes from HTML attributes
                        image_height = 0
                        image_width = 0
                        if img_tags:
                            img = img_tags[0]
                            if img.get('height') and img.get('width'):
                                try:
                                    # Convert pixels to inches (approximate)
                                    height_px = int(img.get('height'))
                                    width_px = int(img.get('width'))
                                    image_height = Inches(height_px / 96)
                                    image_width = Inches(width_px / 96)
                                except (ValueError, TypeError):
                                    # Default if conversion fails
                                    image_height = Inches(1.0)
                                    image_width = Inches(1.0)
                            else:
                                # Default sizes if not specified
                                image_height = Inches(1.0)
                                image_width = Inches(1.0)
                        
                        # Space for images - use actual height plus margin
                        image_space = image_height + Inches(0.4)
                        
                        # Calculate total box height - ADAPTIVE
                        box_height = text_height + image_space + Inches(0.2)  # Text + images + padding
                        
                        # Ensure it fits in remaining space
                        if box_height > remaining_height:
                            # If there's reasonable space, use what we have
                            if remaining_height > Inches(1.5):
                                box_height = remaining_height - Inches(0.1)
                                text_height = box_height - image_space - Inches(0.2)  # Adjust text height to fit
                            else:
                                # Skip this content if not enough space - advance a little and continue
                                current_y += Inches(0.2)
                                continue
                        
                        # Create the unified box (background shape)
                        bg_shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, box_height
                        )
                        bg_shape.fill.solid()
                        bg_shape.fill.fore_color.rgb = get_color_from_class(row)
                        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                        
                        # Add text at the top of the box - REDUCED GAP
                        text_box = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.05),  # Reduced from 0.2 to 0.05
                            width - Inches(0.2), 
                            text_height
                        )
                        text_frame = text_box.text_frame
                        text_frame.word_wrap = True
                        text_frame.margin_top = 0  # Remove top margin
                        
                        # Add header text with bold formatting if it exists
                        if header_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = header_text.strip()
                            p.font.bold = True
                            p.font.size = Pt(14)  # Slightly larger font for header
                            p.space_before = 0  # No space before first paragraph
                            p.space_after = Pt(2)  # Small space after header
                        
                        # Add paragraph text if it exists
                        if paragraph_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = paragraph_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                            if not header_text.strip():  # If this is the first paragraph
                                p.space_before = 0
                        
                        # Add other text if it exists
                        if other_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = other_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                        
                        # Calculate position for image AFTER all text
                        img_y = current_y + text_height + Inches(0.1)
                        
                        # Process first image only (safer)
                        if img_tags and img_y + image_height < current_y + box_height:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # Get image dimensions from HTML attributes
                                        img_width = image_width
                                        img_height = image_height
                                        
                                        # Use aspect ratio only if HTML dimensions are not available
                                        if img_width == 0 or img_height == 0:
                                            try:
                                                with PILImage.open(img_bytes) as pil_img:
                                                    aspect_ratio = pil_img.width / pil_img.height
                                                
                                                img_bytes.seek(0)  # Reset file pointer
                                                
                                                # Default dimensions
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            except:
                                                # Fallback to safe defaults
                                                img_width = min(Inches(1.0), width - Inches(0.4))
                                                img_height = Inches(1.0)
                                        
                                        # Center the image horizontally in the box
                                        img_x = x_pos + (width - img_width) / 2
                                        
                                        # Create picture with proper sizing
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            img_x, 
                                            img_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                        
                        # Update position for next row
                        current_y += box_height + Inches(0.2)
                        
                    except Exception as unified_error:
                        print(f"Error creating unified box: {unified_error}")
                        # Skip to next row on error
                        current_y += Inches(0.5)
                
                # Handle text-only content with ADAPTIVE HEIGHT
                elif has_text:
                    try:
                        # Calculate appropriate text height based on content length
                        text_length = len(combined_text)
                        # Add extra height if we have headers
                        header_lines = 1 if header_text.strip() else 0
                        para_lines = 1 if paragraph_text.strip() else 0
                        
                        if text_length < 100:
                            text_height = Inches(0.6 + 0.2 * (header_lines + para_lines))  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0 + 0.2 * (header_lines + para_lines))  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5 + 0.2 * (header_lines + para_lines))  # Longer text
                        else:
                            text_height = Inches(2.0 + 0.2 * (header_lines + para_lines))  # Very long text
                        
                        # Ensure it fits in remaining space
                        if text_height > remaining_height - Inches(0.2):
                            text_height = remaining_height - Inches(0.2)
                        
                        # Create textbox with background
                        shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, text_height
                        )
                        shape.fill.solid()
                        shape.fill.fore_color.rgb = get_color_from_class(row) 
                        shape.line.color.rgb = RGBColor(200, 200, 200)

                        # Add the text - REDUCED GAP
                        textbox = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.05),  # Reduced from 0.1 to 0.05
                            width - Inches(0.2), 
                            text_height - Inches(0.1)
                        )
                        text_frame = textbox.text_frame
                        text_frame.word_wrap = True
                        text_frame.margin_top = 0  # Remove top margin
                        
                        # Add header text with bold formatting if it exists
                        if header_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = header_text.strip()
                            p.font.bold = True
                            p.font.size = Pt(14)  # Slightly larger font for header
                            p.space_before = 0  # No space before first paragraph
                            p.space_after = Pt(2)  # Small space after header
                        
                        # Add paragraph text if it exists
                        if paragraph_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = paragraph_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                            if not header_text.strip():  # If this is the first paragraph
                                p.space_before = 0
                        
                        # Add other text if it exists
                        if other_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = other_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                        
                        # Update position
                        current_y += text_height + Inches(0.2)
                        
                    except Exception as text_error:
                        print(f"Error processing text-only content: {text_error}")
                        current_y += Inches(0.5)
                
                # Handle image-only content with adaptive sizing
                elif has_images:
                    try:
                        # Process first image only (safer)
                        if img_tags:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                
                                # Get image dimensions from HTML attributes
                                img_width = 0
                                img_height = 0
                                if img.get('width') and img.get('height'):
                                    try:
                                        width_px = int(img.get('width'))
                                        height_px = int(img.get('height'))
                                        img_width = Inches(width_px / 96)
                                        img_height = Inches(height_px / 96)
                                    except (ValueError, TypeError):
                                        img_width = 0
                                        img_height = 0
                                
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # If HTML dimensions not available, use aspect ratio
                                        if img_width == 0 or img_height == 0:
                                            try:
                                                with PILImage.open(img_bytes) as pil_img:
                                                    aspect_ratio = pil_img.width / pil_img.height
                                                
                                                img_bytes.seek(0)  # Reset file pointer
                                                
                                                # Adaptive sizing based on available space
                                                img_width = min(Inches(3.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            except:
                                                # Fallback to fixed dimensions
                                                img_width = min(Inches(2.5), width - Inches(0.4))
                                                img_height = Inches(2.0)
                                        
                                        # Make sure it fits
                                        if img_height > remaining_height - Inches(0.3):
                                            img_height = remaining_height - Inches(0.3)
                                            img_width = img_height * aspect_ratio
                                            
                                            # Ensure width isn't too large
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                        
                                        # Center the image horizontally in the column
                                        img_x = x_pos + (width - img_width) / 2
                                        
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            img_x, 
                                            current_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                                        current_y += img_height + Inches(0.3)
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                                current_y += Inches(0.5)
                    except Exception as img_section_error:
                        print(f"Error in image section: {img_section_error}")
                        current_y += Inches(0.5)
                
                # Add spacing between rows
                current_y += Inches(0.1)  # Reduced from 0.15
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y
def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with simple reliable handling"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # Simplify by just adding all text to a single paragraph
    if all_text:
        # For very long text, use overflow handling
        if slide and prs and len(all_text) > 1000:
            # Use simple overflow handler
            chars_per_slide = 1000
            first_part = all_text[:chars_per_slide] + "..."
            
            p = text_frame.add_paragraph()
            p.text = first_part
            
            # Create a new slide for remaining text
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])
            
            # Add title to continuation slide
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from previous slide"
            p.font.italic = True
            p.font.bold = True
            
            # Add content to continuation slide
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            
            p = next_text_frame.add_paragraph()
            p.text = all_text[chars_per_slide:]
        else:
            # Just add text directly
            p = text_frame.add_paragraph()
            p.text = all_text
            
            # Apply basic formatting if needed
            if element.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 18, 'h4': 16}
                p.font.size = Pt(size_map.get(element.name, 14))




# Also update the handle_text_overflow function to manage text better
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved handling"""
    # Calculate approximately how much text fits on one slide - MORE CONSERVATIVE
    chars_per_slide = 800  # Reduced from 1500 for better fit
    
    if len(text) > chars_per_slide:
        # Find a good break point - end of sentence or paragraph
        break_point = chars_per_slide
        while break_point > chars_per_slide / 2:
            if text[break_point] in '.!?' and (break_point + 1 >= len(text) or text[break_point + 1] in ' \n\r\t'):
                break_point += 1  # Include the punctuation
                break
            elif text[break_point] in ' \n\r\t' and (break_point > 0 and text[break_point - 1] in '.!?'):
                break
            break_point -= 1
        
        if break_point <= chars_per_slide / 2:
            # If no good break found, find a word boundary
            break_point = chars_per_slide
            while break_point < len(text) and text[break_point] not in ' \n\r\t':
                break_point -= 1
            if break_point <= chars_per_slide / 2:
                break_point = chars_per_slide  # Fall back to hard break
        
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:break_point].strip()
        
        # Create a new slide for remaining text with BETTER FORMATTING
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
        
        # Add a title indicating continuation
        title_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = f"Continued from Slide {current_slide_index+1}"
        p.font.italic = True
        p.font.bold = True
        p.font.size = Pt(18)
        
        # Add the content with better positioning
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True
        
        # Recursively handle remaining text
        remaining_text = text[break_point:].strip()
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False


# Also update the process_standard_slide_content function
def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better overflow handling"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Get overall text length to determine if we might need special handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it as overflow text
    if len(full_text) > 1200 and prs:  # Lower threshold for better fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for row in rows:
            # Check remaining space
            remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.7) - current_y
            if remaining_height < Inches(1.0):
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs and len(rows) > 1:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"Continued from Slide {slide_index+1}"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[rows.index(row):]:
                        row_height = estimate_row_height(next_row)
                        
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        
                        new_y = process_content(next_row, text_frame, next_slide, css_rules, next_y, prs, slide_index+1)
                        
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                        
                        # Check if we're running out of space on this slide too
                        if next_y > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            break
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row with MORE SPACE
            current_y = max(current_y + row_height, new_y) + Inches(0.3) if new_y else current_y + row_height + Inches(0.3)


# Update the estimate_row_height function for better height calculation
def estimate_row_height(row):
    """Estimate the height needed for a row based on content - MORE GENEROUS"""
    # Base height
    height = Inches(0.6)  # Increased from 0.5
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height + 0.4))  # Extra margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.4))  # Increased default if can't parse
        else:
            # Default height for images with extra margin
            height = max(height, Inches(2.4))  # Increased from 2.0
    
    # Add height for text with BETTER ESTIMATION
    text_content = row.get_text().strip()
    text_words = len(text_content.split())
    # Approximate words per line based on average word length
    words_per_line = 12  # Conservative estimate
    text_lines = max(1, int(text_words / words_per_line) + 1)
    # More generous line height
    height = max(height, Inches(0.35 * text_lines))  # Increased from 0.3
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        # More space per table row
        height = max(height, Inches(0.4 * rows))  # Increased from 0.3
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        # More space per code line
        height = max(height, Inches(0.25 * code_lines))  # Increased from 0.2
    
    # Add a bit extra to prevent tight fit
    return height + Inches(0.2)  # Extra padding

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and dynamic image handling"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)

def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide with custom layout"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Process headers first - they should be bold
    for header in element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        p = text_frame.add_paragraph()
        p.text = header.get_text().strip()
        p.font.bold = True
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        p.font.size = Pt(size_map.get(header.name, 14))
    
    # Process paragraphs - should not be bold, on next line
    for para in element.find_all('p'):
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        p.font.bold = False
        p.font.size = Pt(12)
    
    # Calculate approximate text height based on added paragraphs
    text_height = Inches(0.3) * len(text_frame.paragraphs)
    
    # Now handle specialized content types
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    
    # Handle images - place below text with proper centering
    img = element.find('img')
    if img:
        # Calculate the position for the image below the text
        img_top = max_y + text_height + Inches(0.2)  # Add some spacing
        
        # Get image attributes
        img_url = img.get('src', '')
        img_alt = img.get('alt', 'Image')
        
        try:
            # Download the image with timeout
            response = requests.get(img_url, stream=True, timeout=10)
            
            if response.status_code == 200:
                img_bytes = BytesIO(response.content)
                
                try:
                    # Get image dimensions with aspect ratio
                    with PILImage.open(img_bytes) as pil_img:
                        img_width, img_height = pil_img.size
                        aspect_ratio = img_width / img_height
                    
                    img_bytes.seek(0)  # Reset file pointer
                    
                    # Get dimensions from HTML
                    width_specified = img.get('width')
                    height_specified = img.get('height')
                    
                    # Default dimensions - more reasonable for content images
                    img_width = Inches(2.0)  # Default width for content images
                    img_height = img_width / aspect_ratio
                    
                    # Try to use HTML dimensions if available
                    if width_specified and height_specified:
                        try:
                            width_px = int(width_specified)
                            height_px = int(height_specified)
                            if width_px > 0 and height_px > 0:
                                img_width = Inches(width_px / 96)
                                img_height = Inches(height_px / 96)
                        except (ValueError, TypeError):
                            pass
                    
                    # Calculate center position
                    slide_width = Inches(SLIDE_WIDTH_INCHES)
                    left_position = (slide_width - img_width) / 2  # Center horizontally
                    
                    # Ensure not too large
                    if img_width > Inches(6):
                        img_width = Inches(6)
                        img_height = img_width / aspect_ratio
                    
                    # Add the image with proper centering
                    picture = slide.shapes.add_picture(
                        img_bytes, 
                        left_position, 
                        img_top, 
                        width=img_width, 
                        height=img_height
                    )
                    
                    # Update the maximum y position
                    max_y = max(max_y, img_top + img_height + Inches(0.2))
                    
                except Exception as img_error:
                    print(f"Error processing image: {img_error}")
                    # Add error text to the slide
                    p = text_frame.add_paragraph()
                    p.text = f"[Image Error: {img_alt}]"
            else:
                # Failed to download image
                p = text_frame.add_paragraph()
                p.text = f"[Image not available: {img_alt}]"
                
        except Exception as request_error:
            print(f"Error downloading image: {request_error}")
            p = text_frame.add_paragraph()
            p.text = f"[Image download error: {img_alt}]"
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it

def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color






def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename
def apply_slide_background_color(slide_html, current_slide):
    """Apply background color to the entire slide based on color classes"""
    # Get the background color from the slide's class
    bg_color = get_color_from_class(slide_html)
    
    # Only apply if a color was found (not the default white)
    if bg_color.rgb != (255, 255, 255):
        # Add a background shape that covers the entire slide
        left = top = 0
        width = Inches(SLIDE_WIDTH_INCHES)
        height = Inches(SLIDE_HEIGHT_INCHES)
        
        # Create a rectangle that covers the entire slide
        bg_shape = current_slide.shapes.add_shape(
            MSO_SHAPE.RECTANGLE, 
            left, top, width, height
        )
        bg_shape.fill.solid()
        bg_shape.fill.fore_color.rgb = bg_color
        
        # No border
        bg_shape.line.fill.background()
        
        # Send to back so it doesn't cover other content
        current_slide.shapes._spTree.remove(bg_shape._element)
        current_slide.shapes._spTree.insert(2, bg_shape._element)

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL

Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx


In [None]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")


def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout and apply background color if specified"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Apply background color if the slide has a color class
    apply_slide_background_color(slide, current_slide)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and apply background color if specified"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Apply background color if the slide has a color class
    apply_slide_background_color(slide_html, slide)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function
def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process content within a column with properly formatted headings and content"""
    current_y = y_pos
    
    try:
        # Process each row in the column
        for row in column.find_all('div', class_='row'):
            try:
                # Calculate remaining vertical space on slide
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                # Skip if not enough space left on slide
                if remaining_height < Inches(0.5):
                    break
                
                # Get images and text content
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # IMPROVED TEXT EXTRACTION: Separate headings and paragraphs
                header_text = ""
                paragraph_text = ""
                
                # Extract headers (h1-h6)
                for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                    header_text += header.get_text().strip() + " "
                
                # Extract paragraphs
                for para in row.find_all('p'):
                    paragraph_text += para.get_text().strip() + " "
                
                # Combine with any other text
                other_text = ""
                row_copy = BeautifulSoup(str(row), 'html.parser')
                
                # Remove headers, paragraphs and images
                for tag in row_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']):
                    tag.decompose()
                
                # Get remaining text
                for element in row_copy.descendants:
                    if isinstance(element, str) and element.strip():
                        other_text += element.strip() + " "
                
                # Final combined text
                combined_text = (header_text + " " + paragraph_text + " " + other_text).strip()
                has_text = bool(combined_text)
                
                # If both text and images are present, create a unified box
                if has_text and has_images:
                    try:
                        # Calculate space needed for text - ADAPTIVE HEIGHT
                        text_length = len(combined_text)
                        
                        # Calculate appropriate text height based on content length
                        if text_length < 100:
                            text_height = Inches(0.6)  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0)  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5)  # Longer text
                        else:
                            text_height = Inches(2.0)  # Very long text
                        
                        # Get image sizes from HTML attributes
                        image_height = 0
                        image_width = 0
                        if img_tags:
                            img = img_tags[0]
                            if img.get('height') and img.get('width'):
                                try:
                                    # Convert pixels to inches (approximate)
                                    height_px = int(img.get('height'))
                                    width_px = int(img.get('width'))
                                    image_height = Inches(height_px / 96)
                                    image_width = Inches(width_px / 96)
                                except (ValueError, TypeError):
                                    # Default if conversion fails
                                    image_height = Inches(1.0)
                                    image_width = Inches(1.0)
                            else:
                                # Default sizes if not specified
                                image_height = Inches(1.0)
                                image_width = Inches(1.0)
                        
                        # Space for images - use actual height plus margin
                        image_space = image_height + Inches(0.4)
                        
                        # Calculate total box height - ADAPTIVE
                        box_height = text_height + image_space + Inches(0.2)  # Text + images + padding
                        
                        # Ensure it fits in remaining space
                        if box_height > remaining_height:
                            # If there's reasonable space, use what we have
                            if remaining_height > Inches(1.5):
                                box_height = remaining_height - Inches(0.1)
                                text_height = box_height - image_space - Inches(0.2)  # Adjust text height to fit
                            else:
                                # Skip this content if not enough space - advance a little and continue
                                current_y += Inches(0.2)
                                continue
                        
                        # Create the unified box (background shape)
                        bg_shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, box_height
                        )
                        bg_shape.fill.solid()
                        bg_shape.fill.fore_color.rgb = get_color_from_class(row)
                        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                        
                        # Add text at the top of the box - REDUCED GAP
                        text_box = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.05),  # Reduced from 0.2 to 0.05
                            width - Inches(0.2), 
                            text_height
                        )
                        text_frame = text_box.text_frame
                        text_frame.word_wrap = True
                        text_frame.margin_top = 0  # Remove top margin
                        
                        # Add header text with bold formatting if it exists
                        if header_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = header_text.strip()
                            p.font.bold = True
                            p.font.size = Pt(14)  # Slightly larger font for header
                            p.space_before = 0  # No space before first paragraph
                            p.space_after = Pt(2)  # Small space after header
                        
                        # Add paragraph text if it exists
                        if paragraph_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = paragraph_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                            if not header_text.strip():  # If this is the first paragraph
                                p.space_before = 0
                        
                        # Add other text if it exists
                        if other_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = other_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                        
                        # Calculate position for image AFTER all text
                        img_y = current_y + text_height + Inches(0.1)
                        
                        # Process first image only (safer)
                        if img_tags and img_y + image_height < current_y + box_height:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # Get image dimensions from HTML attributes
                                        img_width = image_width
                                        img_height = image_height
                                        
                                        # Use aspect ratio only if HTML dimensions are not available
                                        if img_width == 0 or img_height == 0:
                                            try:
                                                with PILImage.open(img_bytes) as pil_img:
                                                    aspect_ratio = pil_img.width / pil_img.height
                                                
                                                img_bytes.seek(0)  # Reset file pointer
                                                
                                                # Default dimensions
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            except:
                                                # Fallback to safe defaults
                                                img_width = min(Inches(1.0), width - Inches(0.4))
                                                img_height = Inches(1.0)
                                        
                                        # Center the image horizontally in the box
                                        img_x = x_pos + (width - img_width) / 2
                                        
                                        # Create picture with proper sizing
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            img_x, 
                                            img_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                        
                        # Update position for next row
                        current_y += box_height + Inches(0.2)
                        
                    except Exception as unified_error:
                        print(f"Error creating unified box: {unified_error}")
                        # Skip to next row on error
                        current_y += Inches(0.5)
                
                # Handle text-only content with ADAPTIVE HEIGHT
                elif has_text:
                    try:
                        # Calculate appropriate text height based on content length
                        text_length = len(combined_text)
                        # Add extra height if we have headers
                        header_lines = 1 if header_text.strip() else 0
                        para_lines = 1 if paragraph_text.strip() else 0
                        
                        if text_length < 100:
                            text_height = Inches(0.6 + 0.2 * (header_lines + para_lines))  # Short text
                        elif text_length < 250:
                            text_height = Inches(1.0 + 0.2 * (header_lines + para_lines))  # Medium text
                        elif text_length < 500:
                            text_height = Inches(1.5 + 0.2 * (header_lines + para_lines))  # Longer text
                        else:
                            text_height = Inches(2.0 + 0.2 * (header_lines + para_lines))  # Very long text
                        
                        # Ensure it fits in remaining space
                        if text_height > remaining_height - Inches(0.2):
                            text_height = remaining_height - Inches(0.2)
                        
                        # Create textbox with background
                        shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, text_height
                        )
                        shape.fill.solid()
                        shape.fill.fore_color.rgb = get_color_from_class(row) 
                        shape.line.color.rgb = RGBColor(200, 200, 200)

                        # Add the text - REDUCED GAP
                        textbox = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.05),  # Reduced from 0.1 to 0.05
                            width - Inches(0.2), 
                            text_height - Inches(0.1)
                        )
                        text_frame = textbox.text_frame
                        text_frame.word_wrap = True
                        text_frame.margin_top = 0  # Remove top margin
                        
                        # Add header text with bold formatting if it exists
                        if header_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = header_text.strip()
                            p.font.bold = True
                            p.font.size = Pt(14)  # Slightly larger font for header
                            p.space_before = 0  # No space before first paragraph
                            p.space_after = Pt(2)  # Small space after header
                        
                        # Add paragraph text if it exists
                        if paragraph_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = paragraph_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                            if not header_text.strip():  # If this is the first paragraph
                                p.space_before = 0
                        
                        # Add other text if it exists
                        if other_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = other_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                        
                        # Update position
                        current_y += text_height + Inches(0.2)
                        
                    except Exception as text_error:
                        print(f"Error processing text-only content: {text_error}")
                        current_y += Inches(0.5)
                
                # Handle image-only content with adaptive sizing
                elif has_images:
                    try:
                        # Process first image only (safer)
                        if img_tags:
                            try:
                                img = img_tags[0]  # Just process the first image
                                img_url = img.get('src', '')
                                
                                # Get image dimensions from HTML attributes
                                img_width = 0
                                img_height = 0
                                if img.get('width') and img.get('height'):
                                    try:
                                        width_px = int(img.get('width'))
                                        height_px = int(img.get('height'))
                                        img_width = Inches(width_px / 96)
                                        img_height = Inches(height_px / 96)
                                    except (ValueError, TypeError):
                                        img_width = 0
                                        img_height = 0
                                
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        # If HTML dimensions not available, use aspect ratio
                                        if img_width == 0 or img_height == 0:
                                            try:
                                                with PILImage.open(img_bytes) as pil_img:
                                                    aspect_ratio = pil_img.width / pil_img.height
                                                
                                                img_bytes.seek(0)  # Reset file pointer
                                                
                                                # Adaptive sizing based on available space
                                                img_width = min(Inches(3.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            except:
                                                # Fallback to fixed dimensions
                                                img_width = min(Inches(2.5), width - Inches(0.4))
                                                img_height = Inches(2.0)
                                        
                                        # Make sure it fits
                                        if img_height > remaining_height - Inches(0.3):
                                            img_height = remaining_height - Inches(0.3)
                                            img_width = img_height * aspect_ratio
                                            
                                            # Ensure width isn't too large
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                        
                                        # Center the image horizontally in the column
                                        img_x = x_pos + (width - img_width) / 2
                                        
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            img_x, 
                                            current_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                                        current_y += img_height + Inches(0.3)
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                                current_y += Inches(0.5)
                    except Exception as img_section_error:
                        print(f"Error in image section: {img_section_error}")
                        current_y += Inches(0.5)
                
                # Add spacing between rows
                current_y += Inches(0.1)  # Reduced from 0.15
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y
def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with simple reliable handling"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # Simplify by just adding all text to a single paragraph
    if all_text:
        # For very long text, use overflow handling
        if slide and prs and len(all_text) > 1000:
            # Use simple overflow handler
            chars_per_slide = 1000
            first_part = all_text[:chars_per_slide] + "..."
            
            p = text_frame.add_paragraph()
            p.text = first_part
            
            # Create a new slide for remaining text
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])
            
            # Add title to continuation slide
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from previous slide"
            p.font.italic = True
            p.font.bold = True
            
            # Add content to continuation slide
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            
            p = next_text_frame.add_paragraph()
            p.text = all_text[chars_per_slide:]
        else:
            # Just add text directly
            p = text_frame.add_paragraph()
            p.text = all_text
            
            # Apply basic formatting if needed
            if element.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 18, 'h4': 16}
                p.font.size = Pt(size_map.get(element.name, 14))




# Also update the handle_text_overflow function to manage text better
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved handling"""
    # Calculate approximately how much text fits on one slide - MORE CONSERVATIVE
    chars_per_slide = 800  # Reduced from 1500 for better fit
    
    if len(text) > chars_per_slide:
        # Find a good break point - end of sentence or paragraph
        break_point = chars_per_slide
        while break_point > chars_per_slide / 2:
            if text[break_point] in '.!?' and (break_point + 1 >= len(text) or text[break_point + 1] in ' \n\r\t'):
                break_point += 1  # Include the punctuation
                break
            elif text[break_point] in ' \n\r\t' and (break_point > 0 and text[break_point - 1] in '.!?'):
                break
            break_point -= 1
        
        if break_point <= chars_per_slide / 2:
            # If no good break found, find a word boundary
            break_point = chars_per_slide
            while break_point < len(text) and text[break_point] not in ' \n\r\t':
                break_point -= 1
            if break_point <= chars_per_slide / 2:
                break_point = chars_per_slide  # Fall back to hard break
        
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:break_point].strip()
        
        # Create a new slide for remaining text with BETTER FORMATTING
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
        
        # Add a title indicating continuation
        title_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = f"Continued from Slide {current_slide_index+1}"
        p.font.italic = True
        p.font.bold = True
        p.font.size = Pt(18)
        
        # Add the content with better positioning
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True
        
        # Recursively handle remaining text
        remaining_text = text[break_point:].strip()
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False


# Also update the process_standard_slide_content function
def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better overflow handling"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Get overall text length to determine if we might need special handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it as overflow text
    if len(full_text) > 1200 and prs:  # Lower threshold for better fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for row in rows:
            # Check remaining space
            remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.7) - current_y
            if remaining_height < Inches(1.0):
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs and len(rows) > 1:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"Continued from Slide {slide_index+1}"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[rows.index(row):]:
                        row_height = estimate_row_height(next_row)
                        
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        
                        new_y = process_content(next_row, text_frame, next_slide, css_rules, next_y, prs, slide_index+1)
                        
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                        
                        # Check if we're running out of space on this slide too
                        if next_y > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            break
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row with MORE SPACE
            current_y = max(current_y + row_height, new_y) + Inches(0.3) if new_y else current_y + row_height + Inches(0.3)


# Update the estimate_row_height function for better height calculation
def estimate_row_height(row):
    """Estimate the height needed for a row based on content - MORE GENEROUS"""
    # Base height
    height = Inches(0.6)  # Increased from 0.5
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height + 0.4))  # Extra margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.4))  # Increased default if can't parse
        else:
            # Default height for images with extra margin
            height = max(height, Inches(2.4))  # Increased from 2.0
    
    # Add height for text with BETTER ESTIMATION
    text_content = row.get_text().strip()
    text_words = len(text_content.split())
    # Approximate words per line based on average word length
    words_per_line = 12  # Conservative estimate
    text_lines = max(1, int(text_words / words_per_line) + 1)
    # More generous line height
    height = max(height, Inches(0.35 * text_lines))  # Increased from 0.3
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        # More space per table row
        height = max(height, Inches(0.4 * rows))  # Increased from 0.3
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        # More space per code line
        height = max(height, Inches(0.25 * code_lines))  # Increased from 0.2
    
    # Add a bit extra to prevent tight fit
    return height + Inches(0.2)  # Extra padding

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position



def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    """Process HTML content and add it to a PowerPoint slide with custom layout"""
    # Keep track of the vertical position
    max_y = y_position if y_position is not None else Inches(1.5)
    
    # Process headers first - they should be bold
    for header in element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        p = text_frame.add_paragraph()
        p.text = header.get_text().strip()
        p.font.bold = True
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        p.font.size = Pt(size_map.get(header.name, 14))
    
    # Process paragraphs - should not be bold, on next line
    for para in element.find_all('p'):
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        p.font.bold = False
        p.font.size = Pt(12)
    
    # Calculate approximate text height based on added paragraphs
    text_height = Inches(0.3) * len(text_frame.paragraphs)
    
    # Now handle specialized content types
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    
    # Handle images - place below text with proper centering
    img = element.find('img')
    if img:
        # Calculate the position for the image below the text
        img_top = max_y + text_height + Inches(0.2)  # Add some spacing
        
        # Get image attributes
        img_url = img.get('src', '')
        img_alt = img.get('alt', 'Image')
        
        try:
            # Download the image with timeout
            response = requests.get(img_url, stream=True, timeout=10)
            
            if response.status_code == 200:
                img_bytes = BytesIO(response.content)
                
                try:
                    # Get image dimensions with aspect ratio
                    with PILImage.open(img_bytes) as pil_img:
                        img_width, img_height = pil_img.size
                        aspect_ratio = img_width / img_height
                    
                    img_bytes.seek(0)  # Reset file pointer
                    
                    # Get dimensions from HTML
                    width_specified = img.get('width')
                    height_specified = img.get('height')
                    
                    # Default dimensions - more reasonable for content images
                    img_width = Inches(2.0)  # Default width for content images
                    img_height = img_width / aspect_ratio
                    
                    # Try to use HTML dimensions if available
                    if width_specified and height_specified:
                        try:
                            width_px = int(width_specified)
                            height_px = int(height_specified)
                            if width_px > 0 and height_px > 0:
                                img_width = Inches(width_px / 96)
                                img_height = Inches(height_px / 96)
                        except (ValueError, TypeError):
                            pass
                    
                    # Calculate center position
                    slide_width = Inches(SLIDE_WIDTH_INCHES)
                    left_position = (slide_width - img_width) / 2  # Center horizontally
                    
                    # Ensure not too large
                    if img_width > Inches(6):
                        img_width = Inches(6)
                        img_height = img_width / aspect_ratio
                    
                    # Add the image with proper centering
                    picture = slide.shapes.add_picture(
                        img_bytes, 
                        left_position, 
                        img_top, 
                        width=img_width, 
                        height=img_height
                    )
                    
                    # Update the maximum y position
                    max_y = max(max_y, img_top + img_height + Inches(0.2))
                    
                except Exception as img_error:
                    print(f"Error processing image: {img_error}")
                    # Add error text to the slide
                    p = text_frame.add_paragraph()
                    p.text = f"[Image Error: {img_alt}]"
            else:
                # Failed to download image
                p = text_frame.add_paragraph()
                p.text = f"[Image not available: {img_alt}]"
                
        except Exception as request_error:
            print(f"Error downloading image: {request_error}")
            p = text_frame.add_paragraph()
            p.text = f"[Image download error: {img_alt}]"
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it

def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color






def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename
def apply_slide_background_color(slide_html, current_slide):
    """Apply background color to the entire slide based on color classes"""
    try:
        # Get the background color from the slide's class
        bg_color = get_color_from_class(slide_html)
        
        # Get the RGB values - RGBColor objects store RGB values directly in rgb attribute
        default_color = RGBColor(255, 255, 255)
        
        # Direct comparison of RGBColor objects
        if bg_color != default_color:
            # Add a background shape that covers the entire slide
            left = top = 0
            width = Inches(SLIDE_WIDTH_INCHES)
            height = Inches(SLIDE_HEIGHT_INCHES)
            
            # Create a rectangle that covers the entire slide
            bg_shape = current_slide.shapes.add_shape(
                MSO_SHAPE.RECTANGLE, 
                left, top, width, height
            )
            bg_shape.fill.solid()
            bg_shape.fill.fore_color.rgb = bg_color
            
            # No border
            bg_shape.line.width = 0
            
            # Send to back so it doesn't cover other content
            try:
                # Get all shapes and reorder
                shapes = list(current_slide.shapes)
                if len(shapes) > 1:
                    # Move background to first position (back)
                    last_idx = len(shapes) - 1
                    bg_idx = last_idx  # Assuming it's the last one added
                    
                    try:
                        current_slide.shapes._spTree.remove(bg_shape._element)
                        current_slide.shapes._spTree.insert(0, bg_shape._element)
                        print(f"Successfully applied {get_color_name(bg_color)} background to slide")
                    except:
                        print(f"Could not reorder slide background but color was applied")
            except:
                print(f"Applied {get_color_name(bg_color)} background, but couldn't reorder it")
    except Exception as e:
        # If background color application fails, log it but don't crash
        print(f"Warning: Could not apply slide background color: {e}")

def get_color_name(color):
    """Get a color name from an RGBColor object by comparing values"""
    # Map RGB tuples to color names
    color_map_reverse = {
        (255, 200, 200): "red",
        (200, 200, 255): "blue", 
        (200, 255, 200): "green",
        (255, 255, 200): "yellow",
        (255, 225, 180): "orange",
        (230, 200, 255): "purple",
        (220, 220, 220): "grey",
        (255, 200, 230): "pink",
        (180, 240, 240): "teal"
    }
    
    # Try to find the color by direct comparison
    for rgb_tuple, name in color_map_reverse.items():
        rgb_color = RGBColor(*rgb_tuple)
        if str(color.rgb) == str(rgb_color.rgb):
            return name
    
    # If no match, return generic description
    return "custom"

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL  you can push it to git hub

Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx


In [26]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")


def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout and apply background color if specified"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Apply background color if the slide has a color class
    apply_slide_background_color(slide, current_slide)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and apply background color if specified"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Apply background color if the slide has a color class
    apply_slide_background_color(slide_html, slide)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function

def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with simple reliable handling"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # Simplify by just adding all text to a single paragraph
    if all_text:
        # For very long text, use overflow handling
        if slide and prs and len(all_text) > 1000:
            # Use simple overflow handler
            chars_per_slide = 1000
            first_part = all_text[:chars_per_slide] + "..."
            
            p = text_frame.add_paragraph()
            p.text = first_part
            
            # Create a new slide for remaining text
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])
            
            # Add title to continuation slide
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from previous slide"
            p.font.italic = True
            p.font.bold = True
            
            # Add content to continuation slide
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            
            p = next_text_frame.add_paragraph()
            p.text = all_text[chars_per_slide:]
        else:
            # Just add text directly
            p = text_frame.add_paragraph()
            p.text = all_text
            
            # Apply basic formatting if needed
            if element.name in ['h3', 'h4']:
                p.font.bold = True
                size_map = {'h3': 18, 'h4': 16}
                p.font.size = Pt(size_map.get(element.name, 14))
def process_paragraphs_with_color(element, text_frame, css_rules):
    """Process paragraphs with proper color styling"""
    for para in element.find_all('p'):
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        p.font.bold = False
        p.font.size = Pt(12)
        
        # Try to get color from paragraph itself
        para_color = get_color_from_class(para)
        if para_color != RGBColor(255, 255, 255):  # If color found
            p.font.color.rgb = para_color
        else:
            # Look for parent div with color class
            parent_div = para.find_parent('div')
            if parent_div:
                parent_color = get_color_from_class(parent_div)
                if parent_color != RGBColor(255, 255, 255):  # If color found
                    p.font.color.rgb = parent_color
def process_headers_with_color(element, text_frame, css_rules):
    """Process headers with proper color styling"""
    for header in element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        p = text_frame.add_paragraph()
        p.text = header.get_text().strip()
        p.font.bold = True
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        p.font.size = Pt(size_map.get(header.name, 14))
        
        # Try to get color from header itself
        header_color = get_color_from_class(header)
        if header_color != RGBColor(255, 255, 255):  # If color found
            p.font.color.rgb = header_color
        else:
            # Look for parent div with color class
            parent_div = header.find_parent('div')
            if parent_div:
                parent_color = get_color_from_class(parent_div)
                if parent_color != RGBColor(255, 255, 255):  # If color found
                    p.font.color.rgb = parent_color

def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color


# Also update the handle_text_overflow function to manage text better
def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved handling"""
    # Calculate approximately how much text fits on one slide - MORE CONSERVATIVE
    chars_per_slide = 800  # Reduced from 1500 for better fit
    
    if len(text) > chars_per_slide:
        # Find a good break point - end of sentence or paragraph
        break_point = chars_per_slide
        while break_point > chars_per_slide / 2:
            if text[break_point] in '.!?' and (break_point + 1 >= len(text) or text[break_point + 1] in ' \n\r\t'):
                break_point += 1  # Include the punctuation
                break
            elif text[break_point] in ' \n\r\t' and (break_point > 0 and text[break_point - 1] in '.!?'):
                break
            break_point -= 1
        
        if break_point <= chars_per_slide / 2:
            # If no good break found, find a word boundary
            break_point = chars_per_slide
            while break_point < len(text) and text[break_point] not in ' \n\r\t':
                break_point -= 1
            if break_point <= chars_per_slide / 2:
                break_point = chars_per_slide  # Fall back to hard break
        
        # Add text that fits to current slide
        p = text_frame.add_paragraph()
        p.text = text[:break_point].strip()
        
        # Create a new slide for remaining text with BETTER FORMATTING
        next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
        
        # Add a title indicating continuation
        title_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = f"Continued from Slide {current_slide_index+1}"
        p.font.italic = True
        p.font.bold = True
        p.font.size = Pt(18)
        
        # Add the content with better positioning
        next_text_shape = next_slide.shapes.add_textbox(
            Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
        )
        next_text_frame = next_text_shape.text_frame
        next_text_frame.word_wrap = True
        
        # Recursively handle remaining text
        remaining_text = text[break_point:].strip()
        handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                            current_slide_index+1, prs)
        
        return True
    else:
        # Just add the text as a paragraph - no overflow
        p = text_frame.add_paragraph()
        p.text = text
        return False


# Also update the process_standard_slide_content function
def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better overflow handling"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Get overall text length to determine if we might need special handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it as overflow text
    if len(full_text) > 1200 and prs:  # Lower threshold for better fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for row in rows:
            # Check remaining space
            remaining_height = Inches(SLIDE_HEIGHT_INCHES - 0.7) - current_y
            if remaining_height < Inches(1.0):
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs and len(rows) > 1:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"Continued from Slide {slide_index+1}"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[rows.index(row):]:
                        row_height = estimate_row_height(next_row)
                        
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        
                        new_y = process_content(next_row, text_frame, next_slide, css_rules, next_y, prs, slide_index+1)
                        
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                        
                        # Check if we're running out of space on this slide too
                        if next_y > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            break
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row with MORE SPACE
            current_y = max(current_y + row_height, new_y) + Inches(0.3) if new_y else current_y + row_height + Inches(0.3)


# Update the estimate_row_height function for better height calculation
def estimate_row_height(row):
    """Estimate the height needed for a row based on content - MORE GENEROUS"""
    # Base height
    height = Inches(0.6)  # Increased from 0.5
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches (approximate)
                height = max(height, Inches(img_height + 0.4))  # Extra margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.4))  # Increased default if can't parse
        else:
            # Default height for images with extra margin
            height = max(height, Inches(2.4))  # Increased from 2.0
    
    # Add height for text with BETTER ESTIMATION
    text_content = row.get_text().strip()
    text_words = len(text_content.split())
    # Approximate words per line based on average word length
    words_per_line = 12  # Conservative estimate
    text_lines = max(1, int(text_words / words_per_line) + 1)
    # More generous line height
    height = max(height, Inches(0.35 * text_lines))  # Increased from 0.3
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        # More space per table row
        height = max(height, Inches(0.4 * rows))  # Increased from 0.3
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block')
    if code_block:
        code_lines = len(code_block.get_text().strip().split('\n'))
        # More space per code line
        height = max(height, Inches(0.25 * code_lines))  # Increased from 0.2
    
    # Add a bit extra to prevent tight fit
    return height + Inches(0.2)  # Extra padding

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position



def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    max_y = y_position if y_position is not None else Inches(1.5)
    
    process_headers_with_color(element, text_frame, css_rules)
    process_paragraphs_with_color(element, text_frame, css_rules)
    
    text_height = Inches(0.3) * len(text_frame.paragraphs)
    
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    
    img = element.find('img')
    if img:
        img_top = max_y + text_height + Inches(0.2)
        
        img_url = img.get('src', '')
        img_alt = img.get('alt', 'Image')
        
        try:
            response = requests.get(img_url, stream=True, timeout=10)
            
            if response.status_code == 200:
                img_bytes = BytesIO(response.content)
                
                try:
                    with PILImage.open(img_bytes) as pil_img:
                        img_width, img_height = pil_img.size
                        aspect_ratio = img_width / img_height
                    
                    img_bytes.seek(0)
                    
                    width_specified = img.get('width')
                    height_specified = img.get('height')
                    
                    img_width = Inches(2.0)
                    img_height = img_width / aspect_ratio
                    
                    if width_specified and height_specified:
                        try:
                            width_px = int(width_specified)
                            height_px = int(height_specified)
                            if width_px > 0 and height_px > 0:
                                img_width = Inches(width_px / 96)
                                img_height = Inches(height_px / 96)
                        except (ValueError, TypeError):
                            pass
                    
                    slide_width = Inches(SLIDE_WIDTH_INCHES)
                    left_position = (slide_width - img_width) / 2
                    
                    if img_width > Inches(6):
                        img_width = Inches(6)
                        img_height = img_width / aspect_ratio
                    
                    picture = slide.shapes.add_picture(
                        img_bytes, 
                        left_position, 
                        img_top, 
                        width=img_width, 
                        height=img_height
                    )
                    
                    max_y = max(max_y, img_top + img_height + Inches(0.2))
                    
                except Exception as img_error:
                    print(f"Error processing image: {img_error}")
                    p = text_frame.add_paragraph()
                    p.text = f"[Image Error: {img_alt}]"
            else:
                p = text_frame.add_paragraph()
                p.text = f"[Image not available: {img_alt}]"
                
        except Exception as request_error:
            print(f"Error downloading image: {request_error}")
            p = text_frame.add_paragraph()
            p.text = f"[Image download error: {img_alt}]"
    
    return max_y
def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    current_y = y_pos
    
    try:
        for row in column.find_all('div', class_='row'):
            try:
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                if remaining_height < Inches(0.5):
                    break
                
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                header_text = ""
                paragraph_text = ""
                
                for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                    header_text += header.get_text().strip() + " "
                
                for para in row.find_all('p'):
                    paragraph_text += para.get_text().strip() + " "
                
                other_text = ""
                row_copy = BeautifulSoup(str(row), 'html.parser')
                
                for tag in row_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']):
                    tag.decompose()
                
                for element in row_copy.descendants:
                    if isinstance(element, str) and element.strip():
                        other_text += element.strip() + " "
                
                combined_text = (header_text + " " + paragraph_text + " " + other_text).strip()
                has_text = bool(combined_text)
                
                if has_text and has_images:
                    try:
                        text_length = len(combined_text)
                        
                        if text_length < 100:
                            text_height = Inches(0.6)
                        elif text_length < 250:
                            text_height = Inches(1.0)
                        elif text_length < 500:
                            text_height = Inches(1.5)
                        else:
                            text_height = Inches(2.0)
                        
                        image_height = 0
                        image_width = 0
                        if img_tags:
                            img = img_tags[0]
                            if img.get('height') and img.get('width'):
                                try:
                                    height_px = int(img.get('height'))
                                    width_px = int(img.get('width'))
                                    image_height = Inches(height_px / 96)
                                    image_width = Inches(width_px / 96)
                                except (ValueError, TypeError):
                                    image_height = Inches(1.0)
                                    image_width = Inches(1.0)
                            else:
                                image_height = Inches(1.0)
                                image_width = Inches(1.0)
                        
                        image_space = image_height + Inches(0.4)
                        
                        box_height = text_height + image_space + Inches(0.2)
                        
                        if box_height > remaining_height:
                            if remaining_height > Inches(1.5):
                                box_height = remaining_height - Inches(0.1)
                                text_height = box_height - image_space - Inches(0.2)
                            else:
                                current_y += Inches(0.2)
                                continue
                        
                        row_color = get_color_from_class(row)
                        
                        bg_shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, box_height
                        )
                        bg_shape.fill.solid()
                        bg_shape.fill.fore_color.rgb = row_color
                        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                        
                        text_box = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.05),
                            width - Inches(0.2), 
                            text_height
                        )
                        text_frame = text_box.text_frame
                        text_frame.word_wrap = True
                        text_frame.margin_top = 0
                        
                        if header_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = header_text.strip()
                            p.font.bold = True
                            p.font.size = Pt(14)
                            p.space_before = 0
                            p.space_after = Pt(2)
                            
                            for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                                header_color = get_color_from_class(header)
                                if header_color != RGBColor(255, 255, 255):
                                    p.font.color.rgb = header_color
                                    break
                                parent_div = header.find_parent('div')
                                if parent_div and parent_div != row:
                                    parent_color = get_color_from_class(parent_div)
                                    if parent_color != RGBColor(255, 255, 255):
                                        p.font.color.rgb = parent_color
                                        break
                        
                        if paragraph_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = paragraph_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                            if not header_text.strip():
                                p.space_before = 0
                                
                            for para in row.find_all('p'):
                                para_color = get_color_from_class(para)
                                if para_color != RGBColor(255, 255, 255):
                                    p.font.color.rgb = para_color
                                    break
                                parent_div = para.find_parent('div')
                                if parent_div and parent_div != row:
                                    parent_color = get_color_from_class(parent_div)
                                    if parent_color != RGBColor(255, 255, 255):
                                        p.font.color.rgb = parent_color
                                        break
                        
                        if other_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = other_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                        
                        img_y = current_y + text_height + Inches(0.1)
                        
                        if img_tags and img_y + image_height < current_y + box_height:
                            try:
                                img = img_tags[0]
                                img_url = img.get('src', '')
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        img_width = image_width
                                        img_height = image_height
                                        
                                        if img_width == 0 or img_height == 0:
                                            try:
                                                with PILImage.open(img_bytes) as pil_img:
                                                    aspect_ratio = pil_img.width / pil_img.height
                                                
                                                img_bytes.seek(0)
                                                
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            except:
                                                img_width = min(Inches(1.0), width - Inches(0.4))
                                                img_height = Inches(1.0)
                                        
                                        img_x = x_pos + (width - img_width) / 2
                                        
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            img_x, 
                                            img_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                        
                        current_y += box_height + Inches(0.2)
                        
                    except Exception as unified_error:
                        print(f"Error creating unified box: {unified_error}")
                        current_y += Inches(0.5)
                
                elif has_text:
                    try:
                        text_length = len(combined_text)
                        header_lines = 1 if header_text.strip() else 0
                        para_lines = 1 if paragraph_text.strip() else 0
                        
                        if text_length < 100:
                            text_height = Inches(0.6 + 0.2 * (header_lines + para_lines))
                        elif text_length < 250:
                            text_height = Inches(1.0 + 0.2 * (header_lines + para_lines))
                        elif text_length < 500:
                            text_height = Inches(1.5 + 0.2 * (header_lines + para_lines))
                        else:
                            text_height = Inches(2.0 + 0.2 * (header_lines + para_lines))
                        
                        if text_height > remaining_height - Inches(0.2):
                            text_height = remaining_height - Inches(0.2)
                        
                        row_color = get_color_from_class(row)
                        
                        shape = slide.shapes.add_shape(
                            MSO_SHAPE.ROUNDED_RECTANGLE, 
                            x_pos, current_y, 
                            width, text_height
                        )
                        shape.fill.solid()
                        shape.fill.fore_color.rgb = row_color
                        shape.line.color.rgb = RGBColor(200, 200, 200)

                        textbox = slide.shapes.add_textbox(
                            x_pos + Inches(0.1), 
                            current_y + Inches(0.05),
                            width - Inches(0.2), 
                            text_height - Inches(0.1)
                        )
                        text_frame = textbox.text_frame
                        text_frame.word_wrap = True
                        text_frame.margin_top = 0
                        
                        if header_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = header_text.strip()
                            p.font.bold = True
                            p.font.size = Pt(14)
                            p.space_before = 0
                            p.space_after = Pt(2)
                            
                            for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                                header_color = get_color_from_class(header)
                                if header_color != RGBColor(255, 255, 255):
                                    p.font.color.rgb = header_color
                                    break
                                parent_div = header.find_parent('div')
                                if parent_div and parent_div != row:
                                    parent_color = get_color_from_class(parent_div)
                                    if parent_color != RGBColor(255, 255, 255):
                                        p.font.color.rgb = parent_color
                                        break
                        
                        if paragraph_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = paragraph_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                            if not header_text.strip():
                                p.space_before = 0
                                
                            for para in row.find_all('p'):
                                para_color = get_color_from_class(para)
                                if para_color != RGBColor(255, 255, 255):
                                    p.font.color.rgb = para_color
                                    break
                                parent_div = para.find_parent('div')
                                if parent_div and parent_div != row:
                                    parent_color = get_color_from_class(parent_div)
                                    if parent_color != RGBColor(255, 255, 255):
                                        p.font.color.rgb = parent_color
                                        break
                        
                        if other_text.strip():
                            p = text_frame.add_paragraph()
                            p.text = other_text.strip()
                            p.font.bold = False
                            p.font.size = Pt(12)
                        
                        current_y += text_height + Inches(0.2)
                        
                    except Exception as text_error:
                        print(f"Error processing text-only content: {text_error}")
                        current_y += Inches(0.5)
                
                elif has_images:
                    try:
                        if img_tags:
                            try:
                                img = img_tags[0]
                                img_url = img.get('src', '')
                                
                                img_width = 0
                                img_height = 0
                                if img.get('width') and img.get('height'):
                                    try:
                                        width_px = int(img.get('width'))
                                        height_px = int(img.get('height'))
                                        img_width = Inches(width_px / 96)
                                        img_height = Inches(height_px / 96)
                                    except (ValueError, TypeError):
                                        img_width = 0
                                        img_height = 0
                                
                                if img_url:
                                    response = requests.get(img_url, stream=True, timeout=5)
                                    if response.status_code == 200:
                                        img_bytes = BytesIO(response.content)
                                        
                                        if img_width == 0 or img_height == 0:
                                            try:
                                                with PILImage.open(img_bytes) as pil_img:
                                                    aspect_ratio = pil_img.width / pil_img.height
                                                
                                                img_bytes.seek(0)
                                                
                                                img_width = min(Inches(3.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            except:
                                                img_width = min(Inches(2.5), width - Inches(0.4))
                                                img_height = Inches(2.0)
                                        
                                        if img_height > remaining_height - Inches(0.3):
                                            img_height = remaining_height - Inches(0.3)
                                            img_width = img_height * aspect_ratio
                                            
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                        
                                        img_x = x_pos + (width - img_width) / 2
                                        
                                        picture = slide.shapes.add_picture(
                                            img_bytes, 
                                            img_x, 
                                            current_y, 
                                            width=img_width, 
                                            height=img_height
                                        )
                                        
                                        img_bytes.close()
                                        current_y += img_height + Inches(0.3)
                            except Exception as img_error:
                                print(f"Error with image: {img_error}")
                                current_y += Inches(0.5)
                    except Exception as img_section_error:
                        print(f"Error in image section: {img_section_error}")
                        current_y += Inches(0.5)
                
                current_y += Inches(0.1)
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it





def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename
def apply_slide_background_color(slide_html, current_slide):
    """Apply background color to the entire slide based on color classes"""
    try:
        # Get the background color from the slide's class
        bg_color = get_color_from_class(slide_html)
        
        # Get the RGB values - RGBColor objects store RGB values directly in rgb attribute
        default_color = RGBColor(255, 255, 255)
        
        # Direct comparison of RGBColor objects
        if bg_color != default_color:
            # Add a background shape that covers the entire slide
            left = top = 0
            width = Inches(SLIDE_WIDTH_INCHES)
            height = Inches(SLIDE_HEIGHT_INCHES)
            
            # Create a rectangle that covers the entire slide
            bg_shape = current_slide.shapes.add_shape(
                MSO_SHAPE.RECTANGLE, 
                left, top, width, height
            )
            bg_shape.fill.solid()
            bg_shape.fill.fore_color.rgb = bg_color
            
            # No border
            bg_shape.line.width = 0
            
            # Send to back so it doesn't cover other content
            try:
                # Get all shapes and reorder
                shapes = list(current_slide.shapes)
                if len(shapes) > 1:
                    # Move background to first position (back)
                    last_idx = len(shapes) - 1
                    bg_idx = last_idx  # Assuming it's the last one added
                    
                    try:
                        current_slide.shapes._spTree.remove(bg_shape._element)
                        current_slide.shapes._spTree.insert(0, bg_shape._element)
                        print(f"Successfully applied {get_color_name(bg_color)} background to slide")
                    except:
                        print(f"Could not reorder slide background but color was applied")
            except:
                print(f"Applied {get_color_name(bg_color)} background, but couldn't reorder it")
    except Exception as e:
        # If background color application fails, log it but don't crash
        print(f"Warning: Could not apply slide background color: {e}")

def get_color_name(color):
    """Get a color name from an RGBColor object by comparing values"""
    # Map RGB tuples to color names
    color_map_reverse = {
        (255, 200, 200): "red",
        (200, 200, 255): "blue", 
        (200, 255, 200): "green",
        (255, 255, 200): "yellow",
        (255, 225, 180): "orange",
        (230, 200, 255): "purple",
        (220, 220, 220): "grey",
        (255, 200, 230): "pink",
        (180, 240, 240): "teal"
    }
    
    # Try to find the color by direct comparison
    for rgb_tuple, name in color_map_reverse.items():
        rgb_color = RGBColor(*rgb_tuple)
        if str(color.rgb) == str(rgb_color.rgb):
            return name
    
    # If no match, return generic description
    return "custom"

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL  you can push it to git hub

Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx


In [28]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")


def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout and apply background color if specified"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Apply background color if the slide has a color class
    apply_slide_background_color(slide, current_slide)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and apply background color if specified"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Apply background color if the slide has a color class
    apply_slide_background_color(slide_html, slide)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function


def process_paragraphs_with_color(element, text_frame, css_rules):
    """Process paragraphs with proper color styling"""
    for para in element.find_all('p'):
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        p.font.bold = False
        p.font.size = Pt(12)
        
        # Try to get color from paragraph itself
        para_color = get_color_from_class(para)
        if para_color != RGBColor(255, 255, 255):  # If color found
            p.font.color.rgb = para_color
        else:
            # Look for parent div with color class
            parent_div = para.find_parent('div')
            if parent_div:
                parent_color = get_color_from_class(parent_div)
                if parent_color != RGBColor(255, 255, 255):  # If color found
                    p.font.color.rgb = parent_color
def process_headers_with_color(element, text_frame, css_rules):
    """Process headers with proper color styling"""
    for header in element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        p = text_frame.add_paragraph()
        p.text = header.get_text().strip()
        p.font.bold = True
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        p.font.size = Pt(size_map.get(header.name, 14))
        
        # Try to get color from header itself
        header_color = get_color_from_class(header)
        if header_color != RGBColor(255, 255, 255):  # If color found
            p.font.color.rgb = header_color
        else:
            # Look for parent div with color class
            parent_div = header.find_parent('div')
            if parent_div:
                parent_color = get_color_from_class(parent_div)
                if parent_color != RGBColor(255, 255, 255):  # If color found
                    p.font.color.rgb = parent_color

def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color


# Also update the handle_text_overflow function to manage text better


# Modified text handling functions to properly wrap text and prevent slide overflow

def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with improved text wrapping"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Set appropriate text frame margins
    text_frame.margin_left = 0
    text_frame.margin_right = 0
    text_frame.margin_top = 0
    text_frame.margin_bottom = 0
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # If no text, return early
    if not all_text:
        return
    
    # Calculate how much text might fit based on the text frame dimensions
    # This is a simplified estimate - in practice, PowerPoint handles wrapping
    if slide and prs and len(all_text) > 800:  # Reduced from 1000 for better fit
        # Use the text overflow handler for long text
        handle_text_overflow(all_text, text_frame, slide, slide_index, prs)
    else:
        # Use smart paragraph splitting for better text flow
        paragraphs = all_text.split('\n')
        for para_text in paragraphs:
            if not para_text.strip():
                continue
                
            p = text_frame.add_paragraph()
            p.text = para_text.strip()
            
            # Apply basic formatting if needed
            if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                p.font.bold = True
                size_map = {'h1': 28, 'h2': 24, 'h3': 20, 'h4': 18, 'h5': 16, 'h6': 14}
                p.font.size = Pt(size_map.get(element.name, 14))
            elif element.name in ['strong', 'b']:
                p.font.bold = True
            elif element.name in ['em', 'i']:
                p.font.italic = True


def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved text wrapping"""
    # Use a more conservative character count to ensure text fits
    chars_per_slide = 600  # Even more conservative than before
    
    # Split text into paragraphs first for better formatting
    paragraphs = text.split('\n')
    
    current_chars = 0
    current_para_index = 0
    
    # Add paragraphs until we hit the character limit
    while current_para_index < len(paragraphs):
        para_text = paragraphs[current_para_index].strip()
        
        # Skip empty paragraphs
        if not para_text:
            current_para_index += 1
            continue
        
        # If adding this paragraph would exceed our limit, create a continuation slide
        if current_chars + len(para_text) > chars_per_slide and current_chars > 0:
            # We need to continue on a new slide
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
            
            # Add a title indicating continuation
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from Slide {current_slide_index+1}"
            p.font.italic = True
            p.font.bold = True
            p.font.size = Pt(18)
            
            # Add the content with better positioning
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            next_text_frame.margin_left = 0
            next_text_frame.margin_right = 0
            next_text_frame.margin_top = 0
            next_text_frame.margin_bottom = 0
            
            # Recursively handle remaining paragraphs
            remaining_paras = paragraphs[current_para_index:]
            remaining_text = '\n'.join(remaining_paras)
            handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                               current_slide_index+1, prs)
            return True
        
        # If we get here, we can add this paragraph to the current slide
        p = text_frame.add_paragraph()
        p.text = para_text
        
        current_chars += len(para_text)
        current_para_index += 1
    
    return False


def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better content fitting"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Calculate maximum content height
    max_y = Inches(SLIDE_HEIGHT_INCHES - 0.7)  # 0.7 inch bottom margin
    
    # Get overall text length to determine if we need overflow handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it specially
    if len(full_text) > 1000 and prs:  # Lower threshold for better content fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        content_frame.margin_left = 0
        content_frame.margin_right = 0
        content_frame.margin_top = 0
        content_frame.margin_bottom = 0
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for i, row in enumerate(rows):
            # Check remaining space
            remaining_height = max_y - current_y
            if remaining_height < Inches(1.0) and i < len(rows) - 1:
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_element = slide_html.find('h1') or slide_html.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index+1}"
                    
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[i:]:
                        # Calculate content height
                        row_height = estimate_row_height(next_row)
                        
                        # Check if it fits on the continuation slide
                        if next_y + row_height > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            # Still too much content, need another slide
                            continue_index = rows.index(next_row)
                            if continue_index < len(rows) - 1:
                                # Recursively handle remaining content
                                remaining_rows_html = BeautifulSoup('<div></div>', 'html.parser').div
                                for r in rows[continue_index:]:
                                    remaining_rows_html.append(r.copy())
                                
                                process_standard_slide_content(
                                    remaining_rows_html, next_slide, css_rules, prs, slide_index+1
                                )
                                break
                        
                        # Create a text frame for this row
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        text_frame.word_wrap = True
                        
                        # Process the content of the row
                        new_y = process_content(next_row, text_frame, next_slide, 
                                              css_rules, next_y, prs, slide_index+1)
                        
                        # Update position for next row
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Adjust height if remaining space is limited
            if current_y + row_height > max_y:
                row_height = max_y - current_y - Inches(0.1)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            text_frame.word_wrap = True
            text_frame.margin_left = 0
            text_frame.margin_right = 0
            text_frame.margin_top = 0
            text_frame.margin_bottom = 0
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row
            current_y = max(current_y + row_height, new_y) + Inches(0.2) if new_y else current_y + row_height + Inches(0.2)
            
            # Check if we're running out of space
            if current_y >= max_y and i < len(rows) - 1:
                # Create a new slide for remaining content
                if prs:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_element = slide_html.find('h1') or slide_html.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index+1}"
                    
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Recursively process remaining rows on new slide
                    remaining_rows_html = BeautifulSoup('<div></div>', 'html.parser').div
                    for r in rows[i+1:]:
                        remaining_rows_html.append(r.copy())
                    
                    process_standard_slide_content(
                        remaining_rows_html, next_slide, css_rules, prs, slide_index+1
                    )
                break


def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    """Process column content with improved text wrapping and overflow handling"""
    current_y = y_pos
    max_y = Inches(SLIDE_HEIGHT_INCHES - 0.7)  # Maximum usable Y position
    
    # Process each row in the column
    rows = column.find_all('div', class_='row')
    for i, row in enumerate(rows):
        # Calculate remaining space
        remaining_height = max_y - current_y
        
        # Check if enough space remains
        if remaining_height < Inches(0.8) and i < len(rows) - 1:
            # Not enough space for meaningful content
            if prs:
                # Create continuation slide
                next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                
                # Add continuation title
                title_element = column.parent.find('h1') or column.parent.find('h2')
                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                
                title_box = next_slide.shapes.add_textbox(
                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                )
                title_frame = title_box.text_frame
                p = title_frame.add_paragraph()
                p.text = f"{title_text} (Continued)"
                p.font.size = Pt(24)
                p.font.bold = True
                
                # Process remaining rows on the new slide
                new_column = BeautifulSoup("<div></div>", "html.parser").div
                for r in rows[i:]:
                    new_column.append(r.copy())
                
                process_column_content(new_column, next_slide, x_pos, Inches(1.5), 
                                      width, css_rules, slide_index + 1, prs)
                return current_y
            else:
                # Skip if no presentation object provided
                break
        
        # Extract row content
        text_content = row.get_text().strip()
        img_tags = row.find_all('img')
        has_images = len(img_tags) > 0
        
        # Get row background color
        row_color = get_color_from_class(row)
        
        # Estimate appropriate height for this row content
        text_length = len(text_content)
        
        # Set text height based on content length, but keep it reasonable
        if text_length < 50:
            text_height = Inches(0.5)
        elif text_length < 100:
            text_height = Inches(0.8)
        elif text_length < 200:
            text_height = Inches(1.2)
        elif text_length < 400:
            text_height = Inches(1.8)
        else:
            text_height = Inches(2.5)
        
        # Ensure text height doesn't overflow the slide
        if current_y + text_height > max_y:
            if current_y < max_y - Inches(0.5):
                # Adjust height to fit
                text_height = max_y - current_y - Inches(0.1)
            else:
                # Move to next slide if we can
                if prs and i < len(rows) - 1:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add continuation title
                    title_element = column.parent.find('h1') or column.parent.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                    
                    title_box = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                    )
                    title_frame = title_box.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.size = Pt(24)
                    p.font.bold = True
                    
                    # Process remaining rows on new slide
                    new_column = BeautifulSoup("<div></div>", "html.parser").div
                    for r in rows[i:]:
                        new_column.append(r.copy())
                    
                    process_column_content(new_column, next_slide, x_pos, Inches(1.5), 
                                          width, css_rules, slide_index + 1, prs)
                    return current_y
                else:
                    # Skip if we can't move to a new slide
                    continue
        
        # Create content box
        box_height = text_height + Inches(0.2)  # Add padding
        
        # Create background shape
        bg_shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, 
            x_pos, current_y, 
            width, box_height
        )
        bg_shape.fill.solid()
        bg_shape.fill.fore_color.rgb = row_color
        bg_shape.line.color.rgb = RGBColor(200, 200, 200)
        
        # Create text box for content
        text_box = slide.shapes.add_textbox(
            x_pos + Inches(0.1), 
            current_y + Inches(0.1), 
            width - Inches(0.2), 
            box_height - Inches(0.2)
        )
        text_frame = text_box.text_frame
        text_frame.word_wrap = True
        text_frame.margin_top = 0
        text_frame.margin_bottom = 0
        text_frame.margin_left = 0
        text_frame.margin_right = 0
        
        # Process headers
        for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
            p = text_frame.add_paragraph()
            p.text = header.get_text().strip()
            p.font.bold = True
            size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
            p.font.size = Pt(size_map.get(header.name, 14))
        
        # Process paragraphs
        for para in row.find_all('p'):
            p = text_frame.add_paragraph()
            p.text = para.get_text().strip()
            p.font.size = Pt(12)
        
        # Process any remaining text not in p or header tags
        row_copy = BeautifulSoup(str(row), 'html.parser')
        for tag in row_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']):
            tag.decompose()
        
        remaining_text = row_copy.get_text().strip()
        if remaining_text:
            p = text_frame.add_paragraph()
            p.text = remaining_text
            p.font.size = Pt(12)
        
        # Handle images (if present)
        if has_images and current_y + box_height + Inches(2) <= max_y:
            # Process the first image only to keep it simple
            img = img_tags[0]
            img_url = img.get('src', '')
            
            if img_url:
                try:
                    response = requests.get(img_url, stream=True, timeout=5)
                    if response.status_code == 200:
                        img_bytes = BytesIO(response.content)
                        
                        # Default image dimensions
                        img_width = Inches(2.0)
                        img_height = Inches(1.5)
                        
                        try:
                            # Get actual dimensions
                            with PILImage.open(img_bytes) as pil_img:
                                aspect_ratio = pil_img.width / pil_img.height
                                
                                # Reset file pointer
                                img_bytes.seek(0)
                                
                                # Use specified dimensions if available
                                if img.get('width') and img.get('height'):
                                    try:
                                        width_px = int(img.get('width'))
                                        height_px = int(img.get('height'))
                                        img_width = Inches(width_px / 96)
                                        img_height = Inches(height_px / 96)
                                    except (ValueError, TypeError):
                                        img_width = min(Inches(2.0), width - Inches(0.4))
                                        img_height = img_width / aspect_ratio
                                else:
                                    img_width = min(Inches(2.0), width - Inches(0.4))
                                    img_height = img_width / aspect_ratio
                                
                                # Ensure image fits within column width
                                if img_width > width - Inches(0.4):
                                    img_width = width - Inches(0.4)
                                    img_height = img_width / aspect_ratio
                                
                                # Center the image horizontally
                                img_x = x_pos + (width - img_width) / 2
                                
                                # Position below text content
                                img_y = current_y + box_height + Inches(0.1)
                                
                                # Check if image fits vertically
                                if img_y + img_height <= max_y:
                                    picture = slide.shapes.add_picture(
                                        img_bytes, 
                                        img_x, 
                                        img_y, 
                                        width=img_width, 
                                        height=img_height
                                    )
                                    
                                    # Update box height to include image
                                    box_height += img_height + Inches(0.2)
                                    
                                    # Update background shape size
                                    bg_shape.width = width
                                    bg_shape.height = box_height
                        except Exception as img_error:
                            print(f"Error processing image: {img_error}")
                        
                        img_bytes.close()
                except Exception as img_error:
                    print(f"Error with image: {img_error}")
        
        # Update position for next content
        current_y += box_height + Inches(0.2)
    
    return current_y


def estimate_row_height(row):
    """More accurate estimation of row height based on content quantity"""
    # Base height for any row
    height = Inches(0.5)
    
    # Get text content
    text_content = row.get_text().strip()
    text_length = len(text_content)
    
    # Calculate height based on text length with more realistic estimates
    # Assuming approximately 40 characters per line and 0.2 inches per line
    if text_length > 0:
        lines = max(1, text_length // 40)
        text_height = Inches(0.2 * lines)
        height = max(height, text_height)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches
                height = max(height, Inches(img_height + 0.4))  # Add margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.0))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2.0))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows + 0.3))  # 0.3 inches per row plus header
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block') or row.find('pre')
    if code_block:
        code_text = code_block.get_text().strip()
        code_lines = len(code_text.split('\n'))
        height = max(height, Inches(0.2 * code_lines + 0.3))  # 0.2 inches per line
    
    # Handle special elements
    if row.find('ul') or row.find('ol'):
        list_items = len(row.find_all('li'))
        height = max(height, Inches(0.25 * list_items + 0.3))  # 0.25 inches per list item
    
    # Add extra padding to prevent content being cut off
    return height + Inches(0.2)

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position



def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    max_y = y_position if y_position is not None else Inches(1.5)
    
    process_headers_with_color(element, text_frame, css_rules)
    process_paragraphs_with_color(element, text_frame, css_rules)
    
    text_height = Inches(0.3) * len(text_frame.paragraphs)
    
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    
    img = element.find('img')
    if img:
        img_top = max_y + text_height + Inches(0.2)
        
        img_url = img.get('src', '')
        img_alt = img.get('alt', 'Image')
        
        try:
            response = requests.get(img_url, stream=True, timeout=10)
            
            if response.status_code == 200:
                img_bytes = BytesIO(response.content)
                
                try:
                    with PILImage.open(img_bytes) as pil_img:
                        img_width, img_height = pil_img.size
                        aspect_ratio = img_width / img_height
                    
                    img_bytes.seek(0)
                    
                    width_specified = img.get('width')
                    height_specified = img.get('height')
                    
                    img_width = Inches(2.0)
                    img_height = img_width / aspect_ratio
                    
                    if width_specified and height_specified:
                        try:
                            width_px = int(width_specified)
                            height_px = int(height_specified)
                            if width_px > 0 and height_px > 0:
                                img_width = Inches(width_px / 96)
                                img_height = Inches(height_px / 96)
                        except (ValueError, TypeError):
                            pass
                    
                    slide_width = Inches(SLIDE_WIDTH_INCHES)
                    left_position = (slide_width - img_width) / 2
                    
                    if img_width > Inches(6):
                        img_width = Inches(6)
                        img_height = img_width / aspect_ratio
                    
                    picture = slide.shapes.add_picture(
                        img_bytes, 
                        left_position, 
                        img_top, 
                        width=img_width, 
                        height=img_height
                    )
                    
                    max_y = max(max_y, img_top + img_height + Inches(0.2))
                    
                except Exception as img_error:
                    print(f"Error processing image: {img_error}")
                    p = text_frame.add_paragraph()
                    p.text = f"[Image Error: {img_alt}]"
            else:
                p = text_frame.add_paragraph()
                p.text = f"[Image not available: {img_alt}]"
                
        except Exception as request_error:
            print(f"Error downloading image: {request_error}")
            p = text_frame.add_paragraph()
            p.text = f"[Image download error: {img_alt}]"
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it





def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename
def apply_slide_background_color(slide_html, current_slide):
    """Apply background color to the entire slide based on color classes"""
    try:
        # Get the background color from the slide's class
        bg_color = get_color_from_class(slide_html)
        
        # Get the RGB values - RGBColor objects store RGB values directly in rgb attribute
        default_color = RGBColor(255, 255, 255)
        
        # Direct comparison of RGBColor objects
        if bg_color != default_color:
            # Add a background shape that covers the entire slide
            left = top = 0
            width = Inches(SLIDE_WIDTH_INCHES)
            height = Inches(SLIDE_HEIGHT_INCHES)
            
            # Create a rectangle that covers the entire slide
            bg_shape = current_slide.shapes.add_shape(
                MSO_SHAPE.RECTANGLE, 
                left, top, width, height
            )
            bg_shape.fill.solid()
            bg_shape.fill.fore_color.rgb = bg_color
            
            # No border
            bg_shape.line.width = 0
            
            # Send to back so it doesn't cover other content
            try:
                # Get all shapes and reorder
                shapes = list(current_slide.shapes)
                if len(shapes) > 1:
                    # Move background to first position (back)
                    last_idx = len(shapes) - 1
                    bg_idx = last_idx  # Assuming it's the last one added
                    
                    try:
                        current_slide.shapes._spTree.remove(bg_shape._element)
                        current_slide.shapes._spTree.insert(0, bg_shape._element)
                        print(f"Successfully applied {get_color_name(bg_color)} background to slide")
                    except:
                        print(f"Could not reorder slide background but color was applied")
            except:
                print(f"Applied {get_color_name(bg_color)} background, but couldn't reorder it")
    except Exception as e:
        # If background color application fails, log it but don't crash
        print(f"Warning: Could not apply slide background color: {e}")

def get_color_name(color):
    """Get a color name from an RGBColor object by comparing values"""
    # Map RGB tuples to color names
    color_map_reverse = {
        (255, 200, 200): "red",
        (200, 200, 255): "blue", 
        (200, 255, 200): "green",
        (255, 255, 200): "yellow",
        (255, 225, 180): "orange",
        (230, 200, 255): "purple",
        (220, 220, 220): "grey",
        (255, 200, 230): "pink",
        (180, 240, 240): "teal"
    }
    
    # Try to find the color by direct comparison
    for rgb_tuple, name in color_map_reverse.items():
        rgb_color = RGBColor(*rgb_tuple)
        if str(color.rgb) == str(rgb_color.rgb):
            return name
    
    # If no match, return generic description
    return "custom"

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL  you can push it to git hub

Error processing image: value must be an integral type, got <class 'float'>
Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx


In [36]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")


def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout and apply background color if specified"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Apply background color if the slide has a color class
    apply_slide_background_color(slide, current_slide)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and apply background color if specified"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Apply background color if the slide has a color class
    apply_slide_background_color(slide_html, slide)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function





def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color


# Also update the handle_text_overflow function to manage text better


# Modified text handling functions to properly wrap text and prevent slide overflow

def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with improved text wrapping"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Set appropriate text frame margins
    text_frame.margin_left = 0
    text_frame.margin_right = 0
    text_frame.margin_top = 0
    text_frame.margin_bottom = 0
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # If no text, return early
    if not all_text:
        return
    
    # Calculate how much text might fit based on the text frame dimensions
    # This is a simplified estimate - in practice, PowerPoint handles wrapping
    if slide and prs and len(all_text) > 800:  # Reduced from 1000 for better fit
        # Use the text overflow handler for long text
        handle_text_overflow(all_text, text_frame, slide, slide_index, prs)
    else:
        # Use smart paragraph splitting for better text flow
        paragraphs = all_text.split('\n')
        for para_text in paragraphs:
            if not para_text.strip():
                continue
                
            p = text_frame.add_paragraph()
            p.text = para_text.strip()
            
            # Apply basic formatting if needed
            if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                p.font.bold = True
                size_map = {'h1': 28, 'h2': 24, 'h3': 20, 'h4': 18, 'h5': 16, 'h6': 14}
                p.font.size = Pt(size_map.get(element.name, 14))
            elif element.name in ['strong', 'b']:
                p.font.bold = True
            elif element.name in ['em', 'i']:
                p.font.italic = True


def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved text wrapping"""
    # Use a more conservative character count to ensure text fits
    chars_per_slide = 600  # Even more conservative than before
    
    # Split text into paragraphs first for better formatting
    paragraphs = text.split('\n')
    
    current_chars = 0
    current_para_index = 0
    
    # Add paragraphs until we hit the character limit
    while current_para_index < len(paragraphs):
        para_text = paragraphs[current_para_index].strip()
        
        # Skip empty paragraphs
        if not para_text:
            current_para_index += 1
            continue
        
        # If adding this paragraph would exceed our limit, create a continuation slide
        if current_chars + len(para_text) > chars_per_slide and current_chars > 0:
            # We need to continue on a new slide
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
            
            # Add a title indicating continuation
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from Slide {current_slide_index+1}"
            p.font.italic = True
            p.font.bold = True
            p.font.size = Pt(18)
            
            # Add the content with better positioning
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            next_text_frame.margin_left = 0
            next_text_frame.margin_right = 0
            next_text_frame.margin_top = 0
            next_text_frame.margin_bottom = 0
            
            # Recursively handle remaining paragraphs
            remaining_paras = paragraphs[current_para_index:]
            remaining_text = '\n'.join(remaining_paras)
            handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                               current_slide_index+1, prs)
            return True
        
        # If we get here, we can add this paragraph to the current slide
        p = text_frame.add_paragraph()
        p.text = para_text
        
        current_chars += len(para_text)
        current_para_index += 1
    
    return False


def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better content fitting"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Calculate maximum content height
    max_y = Inches(SLIDE_HEIGHT_INCHES - 0.7)  # 0.7 inch bottom margin
    
    # Get overall text length to determine if we need overflow handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it specially
    if len(full_text) > 1000 and prs:  # Lower threshold for better content fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        content_frame.margin_left = 0
        content_frame.margin_right = 0
        content_frame.margin_top = 0
        content_frame.margin_bottom = 0
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for i, row in enumerate(rows):
            # Check remaining space
            remaining_height = max_y - current_y
            if remaining_height < Inches(1.0) and i < len(rows) - 1:
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_element = slide_html.find('h1') or slide_html.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index+1}"
                    
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[i:]:
                        # Calculate content height
                        row_height = estimate_row_height(next_row)
                        
                        # Check if it fits on the continuation slide
                        if next_y + row_height > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            # Still too much content, need another slide
                            continue_index = rows.index(next_row)
                            if continue_index < len(rows) - 1:
                                # Recursively handle remaining content
                                remaining_rows_html = BeautifulSoup('<div></div>', 'html.parser').div
                                for r in rows[continue_index:]:
                                    remaining_rows_html.append(r.copy())
                                
                                process_standard_slide_content(
                                    remaining_rows_html, next_slide, css_rules, prs, slide_index+1
                                )
                                break
                        
                        # Create a text frame for this row
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        text_frame.word_wrap = True
                        
                        # Process the content of the row
                        new_y = process_content(next_row, text_frame, next_slide, 
                                              css_rules, next_y, prs, slide_index+1)
                        
                        # Update position for next row
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Adjust height if remaining space is limited
            if current_y + row_height > max_y:
                row_height = max_y - current_y - Inches(0.1)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            text_frame.word_wrap = True
            text_frame.margin_left = 0
            text_frame.margin_right = 0
            text_frame.margin_top = 0
            text_frame.margin_bottom = 0
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row
            current_y = max(current_y + row_height, new_y) + Inches(0.2) if new_y else current_y + row_height + Inches(0.2)
            
            # Check if we're running out of space
            if current_y >= max_y and i < len(rows) - 1:
                # Create a new slide for remaining content
                if prs:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_element = slide_html.find('h1') or slide_html.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index+1}"
                    
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Recursively process remaining rows on new slide
                    remaining_rows_html = BeautifulSoup('<div></div>', 'html.parser').div
                    for r in rows[i+1:]:
                        remaining_rows_html.append(r.copy())
                    
                    process_standard_slide_content(
                        remaining_rows_html, next_slide, css_rules, prs, slide_index+1
                    )
                break


# Two specific fixes for the HTML to PowerPoint converter:
# 1. Better processing of colors in div class tags for headings and paragraphs
# 2. Fix for the image in slide 2's right column to keep it inside the row box

# FIX 1: Improved color handling from div tags
def process_headers_with_color(element, text_frame, css_rules):
    """Process headers with improved color styling"""
    for header in element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        p = text_frame.add_paragraph()
        p.text = header.get_text().strip()
        p.font.bold = True
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        p.font.size = Pt(size_map.get(header.name, 14))
        
        # Check color in this priority: header tag itself, parent div, grandparent div
        header_color = get_color_from_class(header)
        
        # If no color found in the header itself, look at parent div
        if header_color == RGBColor(255, 255, 255):
            parent_div = header.find_parent('div')
            if parent_div:
                header_color = get_color_from_class(parent_div)
                
                # If still no color, try grandparent
                if header_color == RGBColor(255, 255, 255):
                    grandparent_div = parent_div.find_parent('div')
                    if grandparent_div:
                        header_color = get_color_from_class(grandparent_div)
        
        # Apply the color if one was found
        if header_color != RGBColor(255, 255, 255):
            p.font.color.rgb = header_color


def process_paragraphs_with_color(element, text_frame, css_rules):
    """Process paragraphs with improved color styling"""
    for para in element.find_all('p'):
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        p.font.bold = False
        p.font.size = Pt(12)
        
        # Check color in this priority: paragraph tag itself, parent div, grandparent div
        para_color = get_color_from_class(para)
        
        # If no color found in the paragraph itself, look at parent div
        if para_color == RGBColor(255, 255, 255):
            parent_div = para.find_parent('div')
            if parent_div:
                para_color = get_color_from_class(parent_div)
                
                # If still no color, try grandparent
                if para_color == RGBColor(255, 255, 255):
                    grandparent_div = parent_div.find_parent('div')
                    if grandparent_div:
                        para_color = get_color_from_class(grandparent_div)
        
        # Apply the color if one was found
        if para_color != RGBColor(255, 255, 255):
            p.font.color.rgb = para_color


# FIX 2: Keep images inside row boxes in column layouts
def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    current_y = y_pos
    
    try:
        for row in column.find_all('div', class_='row'):
            try:
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                # Skip if not enough space left on slide and create continuation slide
                if remaining_height < Inches(0.5):
                    if prs:
                        # Create a new slide for overflow content
                        slide_layout = prs.slide_layouts[6]  # Blank slide
                        next_slide = prs.slides.add_slide(slide_layout)
                        
                        # Add continuation title
                        title_element = column.parent.find('h1') or column.parent.find('h2')
                        title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                        
                        title_box = next_slide.shapes.add_textbox(
                            Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                        )
                        title_frame = title_box.text_frame
                        p = title_frame.add_paragraph()
                        p.text = f"{title_text} (Continued)"
                        p.font.size = Pt(24)
                        p.font.bold = True
                        
                        # Process remaining rows on the new slide
                        remaining_rows_index = column.find_all('div', class_='row').index(row)
                        if remaining_rows_index >= 0:
                            # Create a new column with only remaining rows
                            new_column = BeautifulSoup("<div></div>", "html.parser").div
                            for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                new_column.append(r.copy())
                            
                            process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                    break
                
                # Extract content from the row
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # Extract headers
                header_text = ""
                for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                    header_text += header.get_text().strip() + " "
                
                # Extract paragraphs
                paragraph_text = ""
                for para in row.find_all('p'):
                    paragraph_text += para.get_text().strip() + " "
                
                # Extract any other text
                other_text = ""
                row_copy = BeautifulSoup(str(row), 'html.parser')
                for tag in row_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']):
                    tag.decompose()
                
                for element in row_copy.descendants:
                    if isinstance(element, str) and element.strip():
                        other_text += element.strip() + " "
                
                combined_text = (header_text + " " + paragraph_text + " " + other_text).strip()
                has_text = bool(combined_text)
                
                # Get row background color
                row_color = get_color_from_class(row)
                
                # Process text with images
                if has_text and has_images:
                    # Estimate appropriate height for the text
                    text_length = len(combined_text)
                    
                    # Estimate text height with more lines for longer text
                    if text_length < 100:
                        text_height = Inches(0.6)
                    elif text_length < 250:
                        text_height = Inches(1.0)
                    elif text_length < 500:
                        text_height = Inches(1.8)  # Increased from 1.5 for longer text
                    else:
                        # Use more space for very long text
                        text_height = Inches(2.8)  # Increased from 2.5
                    
                    # Calculate height needed for images
                    image_height = Inches(0)  # Start with zero, will add actual image height
                    if img_tags:
                        img = img_tags[0]
                        if img.get('height'):
                            try:
                                img_height = int(img.get('height'))
                                image_height = Inches(img_height / 96 + 0.2)  # Add margin
                            except (ValueError, TypeError):
                                image_height = Inches(1.5)  # Default if can't parse
                        else:
                            image_height = Inches(1.5)  # Default height for images
                    
                    # IMPORTANT FIX: Create a box tall enough for both text and image with EXTRA padding
                    box_height = text_height + image_height + Inches(0.6)  # Increased padding
                    
                    # Check if it fits in available space
                    if box_height > remaining_height:
                        if remaining_height > Inches(1.5):
                            # Adjust to fit
                            box_height = remaining_height - Inches(0.1)
                            text_height = box_height - image_height - Inches(0.6)
                        else:
                            # Move to next slide if no room
                            if prs:
                                # Create continuation slide
                                slide_layout = prs.slide_layouts[6]
                                next_slide = prs.slides.add_slide(slide_layout)
                                
                                title_element = column.parent.find('h1') or column.parent.find('h2')
                                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                                
                                title_box = next_slide.shapes.add_textbox(
                                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                                )
                                title_frame = title_box.text_frame
                                p = title_frame.add_paragraph()
                                p.text = f"{title_text} (Continued)"
                                p.font.size = Pt(24)
                                p.font.bold = True
                                
                                # Process current row on new slide
                                new_column = BeautifulSoup("<div></div>", "html.parser").div
                                new_column.append(row.copy())
                                
                                # Process remaining rows
                                remaining_rows_index = column.find_all('div', class_='row').index(row) + 1
                                if remaining_rows_index < len(column.find_all('div', class_='row')):
                                    for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                        new_column.append(r.copy())
                                
                                process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                                break
                            else:
                                # Skip if no room and cannot create new slide
                                current_y += Inches(0.2)
                                continue
                    
                    # Create background box
                    bg_shape = slide.shapes.add_shape(
                        MSO_SHAPE.ROUNDED_RECTANGLE, 
                        x_pos, current_y, 
                        width, box_height
                    )
                    bg_shape.fill.solid()
                    bg_shape.fill.fore_color.rgb = row_color
                    bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                    
                    # Create text box with header and paragraph text
                    text_box = slide.shapes.add_textbox(
                        x_pos + Inches(0.1),  # Reduced left padding
                        current_y + Inches(0.1),  # Reduced top padding
                        width - Inches(0.2),  # More width for text
                        text_height
                    )
                    text_frame = text_box.text_frame
                    text_frame.word_wrap = True  # Ensure word wrap is enabled
                    text_frame.margin_top = 0
                    text_frame.margin_bottom = 0
                    text_frame.margin_left = 0
                    text_frame.margin_right = 0
                    
                    # Add header text
                    if header_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = header_text.strip()
                        p.font.bold = True
                        p.font.size = Pt(14)
                        p.space_before = 0
                        p.space_after = Pt(2)
                        
                        # Apply text contrast for better visibility
                        apply_text_contrast(p, row_color)
                        
                        # Apply color from row to header if the header has a specific color
                        header_element = row.find(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
                        if header_element:
                            header_color = get_color_from_class(header_element)
                            parent_div = header_element.find_parent('div', class_=lambda c: c and any(color in c.lower() for color in ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'grey', 'gray', 'pink', 'teal']))
                            if parent_div:
                                header_color = get_color_from_class(parent_div)
                            
                            if header_color != RGBColor(255, 255, 255):
                                p.font.color.rgb = header_color
                    
                    # Add paragraph text
                    if paragraph_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = paragraph_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                        p.space_before = 0
                        p.space_after = 0
                        
                        # Apply text contrast for better visibility
                        apply_text_contrast(p, row_color)
                    
                    # Add other text if present
                    if other_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = other_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                        
                        # Apply text contrast for better visibility
                        apply_text_contrast(p, row_color)
                    
                    # Add image below text content with proper spacing
                    if img_tags:
                        try:
                            img = img_tags[0]
                            img_url = img.get('src', '')
                            
                            if img_url:
                                response = requests.get(img_url, stream=True, timeout=5)
                                if response.status_code == 200:
                                    img_bytes = BytesIO(response.content)
                                    
                                    # Determine image size
                                    img_width = Inches(1.0)
                                    img_height = Inches(1.0)
                                    
                                    try:
                                        # Get dimensions from image
                                        with PILImage.open(img_bytes) as pil_img:
                                            aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Calculate image size
                                            if img.get('width') and img.get('height'):
                                                try:
                                                    width_px = int(img.get('width'))
                                                    height_px = int(img.get('height'))
                                                    img_width = Inches(width_px / 96)
                                                    img_height = Inches(height_px / 96)
                                                except (ValueError, TypeError):
                                                    img_width = min(Inches(2.0), width - Inches(0.4))
                                                    img_height = img_width / aspect_ratio
                                            else:
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            
                                            # Ensure image fits within the box width
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                            
                                            # Center the image horizontally
                                            img_x = x_pos + (width - img_width) / 2
                                            
                                            # IMPORTANT FIX: Position image below text with more room
                                            # Add more space between text and image
                                            img_y = current_y + text_height + Inches(0.3)
                                            
                                            # Make sure image stays inside the box
                                            max_allowed_y = current_y + box_height - img_height - Inches(0.1)
                                            if img_y > max_allowed_y:
                                                img_y = max_allowed_y
                                            
                                            # FIX: Ensure image doesn't exceed box bottom by checking if it fits
                                            if (img_y + img_height) <= (current_y + box_height - Inches(0.1)):
                                                picture = slide.shapes.add_picture(
                                                    img_bytes, 
                                                    img_x, 
                                                    img_y, 
                                                    width=img_width, 
                                                    height=img_height
                                                )
                                            else:
                                                # Image doesn't fit, resize it
                                                available_height = current_y + box_height - img_y - Inches(0.1)
                                                if available_height > Inches(0.2):  # Only add if there's space
                                                    img_height = available_height
                                                    img_width = img_height * aspect_ratio
                                                    
                                                    # Recenter horizontally
                                                    img_x = x_pos + (width - img_width) / 2
                                                    
                                                    picture = slide.shapes.add_picture(
                                                        img_bytes, 
                                                        img_x, 
                                                        img_y, 
                                                        width=img_width, 
                                                        height=img_height
                                                    )
                                    except Exception as img_error:
                                        print(f"Error processing image: {img_error}")
                                    
                                    img_bytes.close()
                        except Exception as img_error:
                            print(f"Error with image: {img_error}")
                    
                    # Update position for next row
                    current_y += box_height + Inches(0.2)
                
                # Process text-only content
                elif has_text:
                    # Estimate text height
                    text_length = len(combined_text)
                    
                    # Calculate height based on text length
                    if text_length < 100:
                        text_height = Inches(0.6)
                    elif text_length < 250:
                        text_height = Inches(1.0)
                    elif text_length < 500:
                        text_height = Inches(1.5)
                    else:
                        # More height for long text
                        text_height = Inches(2.5)
                    
                    # Add extra height for headers and paragraphs
                    header_lines = 1 if header_text.strip() else 0
                    para_lines = 1 if paragraph_text.strip() else 0
                    text_height += Inches(0.2 * (header_lines + para_lines))
                    
                    # Check if it fits available space
                    if text_height > remaining_height - Inches(0.2):
                        if remaining_height > Inches(1.0):
                            text_height = remaining_height - Inches(0.2)
                        else:
                            # Move to next slide if no room
                            if prs:
                                # Create continuation slide (similar to above)
                                slide_layout = prs.slide_layouts[6]
                                next_slide = prs.slides.add_slide(slide_layout)
                                
                                title_element = column.parent.find('h1') or column.parent.find('h2')
                                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                                
                                title_box = next_slide.shapes.add_textbox(
                                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                                )
                                title_frame = title_box.text_frame
                                p = title_frame.add_paragraph()
                                p.text = f"{title_text} (Continued)"
                                p.font.size = Pt(24)
                                p.font.bold = True
                                
                                # Process remaining content on new slide
                                remaining_rows_index = column.find_all('div', class_='row').index(row)
                                new_column = BeautifulSoup("<div></div>", "html.parser").div
                                for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                    new_column.append(r.copy())
                                
                                process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                                break
                            else:
                                current_y += Inches(0.2)
                                continue
                    
                    # Create background shape
                    shape = slide.shapes.add_shape(
                        MSO_SHAPE.ROUNDED_RECTANGLE, 
                        x_pos, current_y, 
                        width, text_height
                    )
                    shape.fill.solid()
                    shape.fill.fore_color.rgb = row_color
                    shape.line.color.rgb = RGBColor(200, 200, 200)
                    
                    # Create text box
                    textbox = slide.shapes.add_textbox(
                        x_pos + Inches(0.1),  # Reduced left padding
                        current_y + Inches(0.1),  # Reduced top padding
                        width - Inches(0.2),  # More width for text
                        text_height - Inches(0.2)
                    )
                    text_frame = textbox.text_frame
                    text_frame.word_wrap = True  # Ensure word wrap is enabled
                    text_frame.margin_top = 0
                    text_frame.margin_bottom = 0
                    text_frame.margin_left = 0
                    text_frame.margin_right = 0
                    
                    # Add header text with color
                    if header_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = header_text.strip()
                        p.font.bold = True
                        p.font.size = Pt(14)
                        p.space_before = 0
                        p.space_after = Pt(2)
                        
                        # Apply text contrast for better visibility
                        #apply_text_contrast(p, row_color)
                        
                        # Apply header color if available
                        header_element = row.find(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
                        if header_element:
                            header_color = get_color_from_class(header_element)
                            parent_div = header_element.find_parent('div', class_=lambda c: c and any(color in c.lower() for color in ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'grey', 'gray', 'pink', 'teal']))
                            if parent_div:
                                header_color = get_color_from_class(parent_div)
                                
                            if header_color != RGBColor(255, 255, 255):
                                p.font.color.rgb = header_color
                    
                    # Add paragraph text with color
                    if paragraph_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = paragraph_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                        p.space_before = 0
                        p.space_after = 0
                        
                        # Apply text contrast for better visibility
                        #apply_text_contrast(p, row_color)
                    
                    # Add other text if present
                    if other_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = other_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                        
                        # Apply text contrast for better visibility
                        #apply_text_contrast(p, row_color)
                    
                    # Update position for next row
                    current_y += text_height + Inches(0.2)
                
                # Process image-only content
                elif has_images:
                    # Default height for image box
                    box_height = Inches(2.0)
                    
                    # Try to get image dimensions
                    if img_tags:
                        img = img_tags[0]
                        height_specified = img.get('height')
                        if height_specified:
                            try:
                                height_px = int(height_specified)
                                box_height = Inches(height_px / 96 + 0.4)  # Add margin
                            except (ValueError, TypeError):
                                pass
                    
                    # Check if image fits
                    if box_height > remaining_height:
                        if remaining_height > Inches(1.0):
                            box_height = remaining_height - Inches(0.1)
                        else:
                            # Move to next slide if needed
                            if prs:
                                # Similar continuation slide creation as above
                                # (Code omitted for brevity but would go here)
                                slide_layout = prs.slide_layouts[6]
                                next_slide = prs.slides.add_slide(slide_layout)
                                
                                title_element = column.parent.find('h1') or column.parent.find('h2')
                                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                                
                                title_box = next_slide.shapes.add_textbox(
                                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                                )
                                title_frame = title_box.text_frame
                                p = title_frame.add_paragraph()
                                p.text = f"{title_text} (Continued)"
                                p.font.size = Pt(24)
                                p.font.bold = True
                                
                                new_column = BeautifulSoup("<div></div>", "html.parser").div
                                new_column.append(row.copy())
                                
                                remaining_rows_index = column.find_all('div', class_='row').index(row) + 1
                                if remaining_rows_index < len(column.find_all('div', class_='row')):
                                    for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                        new_column.append(r.copy())
                                
                                process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                                break
                            else:
                                current_y += Inches(0.2)
                                continue
                    
                    # Create background shape for image
                    bg_shape = slide.shapes.add_shape(
                        MSO_SHAPE.ROUNDED_RECTANGLE, 
                        x_pos, current_y, 
                        width, box_height
                    )
                    bg_shape.fill.solid()
                    bg_shape.fill.fore_color.rgb = row_color
                    bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                    
                    # Add image inside the background shape
                    if img_tags:
                        try:
                            img = img_tags[0]
                            img_url = img.get('src', '')
                            
                            if img_url:
                                response = requests.get(img_url, stream=True, timeout=5)
                                if response.status_code == 200:
                                    img_bytes = BytesIO(response.content)
                                    
                                    try:
                                        # Get dimensions from image
                                        with PILImage.open(img_bytes) as pil_img:
                                            aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Calculate image size
                                            if img.get('width') and img.get('height'):
                                                try:
                                                    width_px = int(img.get('width'))
                                                    height_px = int(img.get('height'))
                                                    img_width = Inches(width_px / 96)
                                                    img_height = Inches(height_px / 96)
                                                except (ValueError, TypeError):
                                                    img_width = min(Inches(2.0), width - Inches(0.4))
                                                    img_height = img_width / aspect_ratio
                                            else:
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            
                                            # Make sure image fits within box
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                                
                                            if img_height > box_height - Inches(0.2):
                                                img_height = box_height - Inches(0.2)
                                                img_width = img_height * aspect_ratio
                                            
                                            # Center the image in the box
                                            img_x = x_pos + (width - img_width) / 2
                                            img_y = current_y + (box_height - img_height) / 2
                                            
                                            picture = slide.shapes.add_picture(
                                                img_bytes, 
                                                img_x, 
                                                img_y, 
                                                width=img_width, 
                                                height=img_height
                                            )
                                    except Exception as img_error:
                                        print(f"Error processing image: {img_error}")
                                    
                                    img_bytes.close()
                        except Exception as img_error:
                            print(f"Error with image: {img_error}")
                    
                    # Update position for next row
                    current_y += box_height + Inches(0.2)
                
                # Add spacing between rows
                current_y += Inches(0.1)
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y


def estimate_row_height(row):
    """More accurate estimation of row height based on content quantity"""
    # Base height for any row
    height = Inches(0.5)
    
    # Get text content
    text_content = row.get_text().strip()
    text_length = len(text_content)
    
    # Calculate height based on text length with more realistic estimates
    # Assuming approximately 40 characters per line and 0.2 inches per line
    if text_length > 0:
        lines = max(1, text_length // 40)
        text_height = Inches(0.2 * lines)
        height = max(height, text_height)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches
                height = max(height, Inches(img_height + 0.4))  # Add margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.0))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2.0))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows + 0.3))  # 0.3 inches per row plus header
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block') or row.find('pre')
    if code_block:
        code_text = code_block.get_text().strip()
        code_lines = len(code_text.split('\n'))
        height = max(height, Inches(0.2 * code_lines + 0.3))  # 0.2 inches per line
    
    # Handle special elements
    if row.find('ul') or row.find('ol'):
        list_items = len(row.find_all('li'))
        height = max(height, Inches(0.25 * list_items + 0.3))  # 0.25 inches per list item
    
    # Add extra padding to prevent content being cut off
    return height + Inches(0.2)

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position



def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    max_y = y_position if y_position is not None else Inches(1.5)
    
    process_headers_with_color(element, text_frame, css_rules)
    process_paragraphs_with_color(element, text_frame, css_rules)
    
    text_height = Inches(0.3) * len(text_frame.paragraphs)
    
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    
    img = element.find('img')
    if img:
        img_top = max_y + text_height + Inches(0.2)
        
        img_url = img.get('src', '')
        img_alt = img.get('alt', 'Image')
        
        try:
            response = requests.get(img_url, stream=True, timeout=10)
            
            if response.status_code == 200:
                img_bytes = BytesIO(response.content)
                
                try:
                    with PILImage.open(img_bytes) as pil_img:
                        img_width, img_height = pil_img.size
                        aspect_ratio = img_width / img_height
                    
                    img_bytes.seek(0)
                    
                    width_specified = img.get('width')
                    height_specified = img.get('height')
                    
                    img_width = Inches(2.0)
                    img_height = img_width / aspect_ratio
                    
                    if width_specified and height_specified:
                        try:
                            width_px = int(width_specified)
                            height_px = int(height_specified)
                            if width_px > 0 and height_px > 0:
                                img_width = Inches(width_px / 96)
                                img_height = Inches(height_px / 96)
                        except (ValueError, TypeError):
                            pass
                    
                    slide_width = Inches(SLIDE_WIDTH_INCHES)
                    left_position = (slide_width - img_width) / 2
                    
                    if img_width > Inches(6):
                        img_width = Inches(6)
                        img_height = img_width / aspect_ratio
                    
                    picture = slide.shapes.add_picture(
                        img_bytes, 
                        left_position, 
                        img_top, 
                        width=img_width, 
                        height=img_height
                    )
                    
                    max_y = max(max_y, img_top + img_height + Inches(0.2))
                    
                except Exception as img_error:
                    print(f"Error processing image: {img_error}")
                    p = text_frame.add_paragraph()
                    p.text = f"[Image Error: {img_alt}]"
            else:
                p = text_frame.add_paragraph()
                p.text = f"[Image not available: {img_alt}]"
                
        except Exception as request_error:
            print(f"Error downloading image: {request_error}")
            p = text_frame.add_paragraph()
            p.text = f"[Image download error: {img_alt}]"
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it





def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename
def apply_slide_background_color(slide_html, current_slide):
    """Apply background color to the entire slide based on color classes"""
    try:
        # Get the background color from the slide's class
        bg_color = get_color_from_class(slide_html)
        
        # Get the RGB values - RGBColor objects store RGB values directly in rgb attribute
        default_color = RGBColor(255, 255, 255)
        
        # Direct comparison of RGBColor objects
        if bg_color != default_color:
            # Add a background shape that covers the entire slide
            left = top = 0
            width = Inches(SLIDE_WIDTH_INCHES)
            height = Inches(SLIDE_HEIGHT_INCHES)
            
            # Create a rectangle that covers the entire slide
            bg_shape = current_slide.shapes.add_shape(
                MSO_SHAPE.RECTANGLE, 
                left, top, width, height
            )
            bg_shape.fill.solid()
            bg_shape.fill.fore_color.rgb = bg_color
            
            # No border
            bg_shape.line.width = 0
            
            # Send to back so it doesn't cover other content
            try:
                # Get all shapes and reorder
                shapes = list(current_slide.shapes)
                if len(shapes) > 1:
                    # Move background to first position (back)
                    last_idx = len(shapes) - 1
                    bg_idx = last_idx  # Assuming it's the last one added
                    
                    try:
                        current_slide.shapes._spTree.remove(bg_shape._element)
                        current_slide.shapes._spTree.insert(0, bg_shape._element)
                        print(f"Successfully applied {get_color_name(bg_color)} background to slide")
                    except:
                        print(f"Could not reorder slide background but color was applied")
            except:
                print(f"Applied {get_color_name(bg_color)} background, but couldn't reorder it")
    except Exception as e:
        # If background color application fails, log it but don't crash
        print(f"Warning: Could not apply slide background color: {e}")

def get_color_name(color):
    """Get a color name from an RGBColor object by comparing values"""
    # Map RGB tuples to color names
    color_map_reverse = {
        (255, 200, 200): "red",
        (200, 200, 255): "blue", 
        (200, 255, 200): "green",
        (255, 255, 200): "yellow",
        (255, 225, 180): "orange",
        (230, 200, 255): "purple",
        (220, 220, 220): "grey",
        (255, 200, 230): "pink",
        (180, 240, 240): "teal"
    }
    
    # Try to find the color by direct comparison
    for rgb_tuple, name in color_map_reverse.items():
        rgb_color = RGBColor(*rgb_tuple)
        if str(color.rgb) == str(rgb_color.rgb):
            return name
    
    # If no match, return generic description
    return "custom"

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL  you can push it to git hub final

Error processing row: name 'apply_text_contrast' is not defined
Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx


In [38]:
from bs4 import BeautifulSoup
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor
from PIL import Image as PILImage
from io import BytesIO
import requests
import re
import html
import sys
import os

# Standard slide dimensions in inches
SLIDE_WIDTH_INCHES = 10
SLIDE_HEIGHT_INCHES = 7.5

def html_to_pptx(html_content, output_filename="presentation.pptx"):
    """
    Convert HTML to PowerPoint presentation with support for mixed layouts
    
    Args:
        html_content (str): HTML content with slides
        output_filename (str): Output PowerPoint file name
    """
    # Create a new presentation
    prs = Presentation()
    
    # Parse HTML content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract styles from the HTML
    css_rules = extract_css_rules(soup)
    
    # Find all slide divs
    slides = soup.find_all('div', class_='slide')
    
    # Process each slide based on its content
    for slide_index, slide_html in enumerate(slides):
        # Check if this slide has column layout
        left_column = slide_html.find('div', class_='left-column')
        right_column = slide_html.find('div', class_='right-column')
        use_columns_for_slide = bool(left_column or right_column)
        
        if use_columns_for_slide:
            # Process as column layout
            process_column_slide(slide_html, prs, slide_index, css_rules)
        else:
            # Process as standard layout
            process_standard_slide(slide_html, prs, slide_index, css_rules)
    
    # Save the presentation
    prs.save(output_filename)
    print(f"Presentation saved as {output_filename}")


def process_standard_slide(slide, prs, slide_index, css_rules):
    """Process a slide with standard layout and apply background color if specified"""
    # Use a blank slide to avoid placeholders
    slide_layout = prs.slide_layouts[6]  # Blank slide
    current_slide = prs.slides.add_slide(slide_layout)
    
    # Apply background color if the slide has a color class
    apply_slide_background_color(slide, current_slide)
    
    # Add title manually instead of using placeholder
    title_element = slide.find('h1') or slide.find('h2')
    if title_element:
        title_shape = current_slide.shapes.add_textbox(
            Inches(0.5), Inches(0.5), Inches(9), Inches(1)
        )
        title_frame = title_shape.text_frame
        p = title_frame.add_paragraph()
        p.text = title_element.text.strip()
        p.font.size = Pt(32)
        p.font.bold = True
        p.alignment = PP_ALIGN.CENTER
    
    # Process the slide content - now passing prs and slide_index
    process_standard_slide_content(slide, current_slide, css_rules, prs, slide_index)
    
    # Clean up any lingering placeholders
    clean_slide_placeholders(current_slide)

def process_column_slide(slide_html, prs, slide_idx, css_rules):
    """Process a slide with column layout and apply background color if specified"""
    slide_layout = prs.slide_layouts[6]  # Blank slide
    slide = prs.slides.add_slide(slide_layout)

    # Apply background color if the slide has a color class
    apply_slide_background_color(slide_html, slide)

    # Title
    title_element = slide_html.find('h1') or slide_html.find('h2')
    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_idx + 1}"

    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), Inches(slide_width_inches - 1), Inches(1))
    title_frame = title_box.text_frame
    title_frame.text = title_text
    title_frame.paragraphs[0].font.size = Pt(28)
    title_frame.paragraphs[0].font.bold = True

    # Left and Right columns
    left_column = slide_html.find('div', class_='left-column')
    right_column = slide_html.find('div', class_='right-column')

    # Column layout setup
    # Calculate dynamic column widths
    margin = Inches(0.5)
    col_spacing = Inches(0.5)
    
    usable_width = Inches(slide_width_inches - 1 - 0.5)  # Total width minus margins
    col_width = (usable_width - col_spacing) / 2  # Equal width for both columns
    
    left_x = margin
    right_x = margin + col_width + col_spacing
    start_y = Inches(1.5)  # Start below title

    y_left = start_y
    if left_column:
        y_left = process_column_content(left_column, slide, left_x, y_left, col_width, css_rules, slide_idx, prs)

    # Process right column
    y_right = start_y
    if right_column:
        y_right = process_column_content(right_column, slide, right_x, y_right, col_width, css_rules, slide_idx, prs)



# This provides a comprehensive fix for the HTML to PowerPoint converter
# Replace these two key functions with the versions below

# Fix for a common error in the image centering code in process_column_content function





def get_color_from_class(element, default_color=RGBColor(255, 255, 255)):
    """Extract background color based on color classes (red, blue, green, etc.)"""
    # Standard color mapping
    color_map = {
        'red': RGBColor(255, 200, 200),     # Light red
        'blue': RGBColor(200, 200, 255),    # Light blue
        'green': RGBColor(200, 255, 200),   # Light green
        'yellow': RGBColor(255, 255, 200),  # Light yellow
        'orange': RGBColor(255, 225, 180),  # Light orange
        'purple': RGBColor(230, 200, 255),  # Light purple
        'grey': RGBColor(220, 220, 220),    # Light grey
        'gray': RGBColor(220, 220, 220),    # Light gray
        'pink': RGBColor(255, 200, 230),    # Light pink
        'teal': RGBColor(180, 240, 240),    # Light teal
    }
    
    # Check if element has any of the color classes
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    for cls in classes:
        if cls.lower() in color_map:
            return color_map[cls.lower()]
            
    # Return default if no color class found
    return default_color


# Also update the handle_text_overflow function to manage text better


# Modified text handling functions to properly wrap text and prevent slide overflow

def process_text_content(element, text_frame, css_rules, slide=None, prs=None, slide_index=0):
    """Process text content and add it to the text frame with improved text wrapping"""
    # Enable word wrap for the text frame
    text_frame.word_wrap = True
    
    # Set appropriate text frame margins
    text_frame.margin_left = 0
    text_frame.margin_right = 0
    text_frame.margin_top = 0
    text_frame.margin_bottom = 0
    
    # Extract all text with a simpler approach
    all_text = element.get_text().strip()
    
    # If no text, return early
    if not all_text:
        return
    
    # Calculate how much text might fit based on the text frame dimensions
    # This is a simplified estimate - in practice, PowerPoint handles wrapping
    if slide and prs and len(all_text) > 800:  # Reduced from 1000 for better fit
        # Use the text overflow handler for long text
        handle_text_overflow(all_text, text_frame, slide, slide_index, prs)
    else:
        # Use smart paragraph splitting for better text flow
        paragraphs = all_text.split('\n')
        for para_text in paragraphs:
            if not para_text.strip():
                continue
                
            p = text_frame.add_paragraph()
            p.text = para_text.strip()
            
            # Apply basic formatting if needed
            if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                p.font.bold = True
                size_map = {'h1': 28, 'h2': 24, 'h3': 20, 'h4': 18, 'h5': 16, 'h6': 14}
                p.font.size = Pt(size_map.get(element.name, 14))
            elif element.name in ['strong', 'b']:
                p.font.bold = True
            elif element.name in ['em', 'i']:
                p.font.italic = True


def handle_text_overflow(text, text_frame, slide, current_slide_index, prs):
    """Break long text content across multiple slides with improved text wrapping"""
    # Use a more conservative character count to ensure text fits
    chars_per_slide = 600  # Even more conservative than before
    
    # Split text into paragraphs first for better formatting
    paragraphs = text.split('\n')
    
    current_chars = 0
    current_para_index = 0
    
    # Add paragraphs until we hit the character limit
    while current_para_index < len(paragraphs):
        para_text = paragraphs[current_para_index].strip()
        
        # Skip empty paragraphs
        if not para_text:
            current_para_index += 1
            continue
        
        # If adding this paragraph would exceed our limit, create a continuation slide
        if current_chars + len(para_text) > chars_per_slide and current_chars > 0:
            # We need to continue on a new slide
            next_slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank slide
            
            # Add a title indicating continuation
            title_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
            )
            title_frame = title_shape.text_frame
            p = title_frame.add_paragraph()
            p.text = f"Continued from Slide {current_slide_index+1}"
            p.font.italic = True
            p.font.bold = True
            p.font.size = Pt(18)
            
            # Add the content with better positioning
            next_text_shape = next_slide.shapes.add_textbox(
                Inches(0.5), Inches(1.5), Inches(9), Inches(5.5)
            )
            next_text_frame = next_text_shape.text_frame
            next_text_frame.word_wrap = True
            next_text_frame.margin_left = 0
            next_text_frame.margin_right = 0
            next_text_frame.margin_top = 0
            next_text_frame.margin_bottom = 0
            
            # Recursively handle remaining paragraphs
            remaining_paras = paragraphs[current_para_index:]
            remaining_text = '\n'.join(remaining_paras)
            handle_text_overflow(remaining_text, next_text_frame, next_slide, 
                               current_slide_index+1, prs)
            return True
        
        # If we get here, we can add this paragraph to the current slide
        p = text_frame.add_paragraph()
        p.text = para_text
        
        current_chars += len(para_text)
        current_para_index += 1
    
    return False


def process_standard_slide_content(slide_html, current_slide, css_rules, prs=None, slide_index=0):
    """Process content for a standard slide layout with better content fitting"""
    # Track vertical position for adding content
    current_y = Inches(1.5)  # Start after title
    
    # Calculate maximum content height
    max_y = Inches(SLIDE_HEIGHT_INCHES - 0.7)  # 0.7 inch bottom margin
    
    # Get overall text length to determine if we need overflow handling
    full_text = slide_html.get_text().strip()
    
    # If the entire content is very long, handle it specially
    if len(full_text) > 1000 and prs:  # Lower threshold for better content fit
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        content_frame.word_wrap = True
        content_frame.margin_left = 0
        content_frame.margin_right = 0
        content_frame.margin_top = 0
        content_frame.margin_bottom = 0
        
        # Handle as overflow text
        handle_text_overflow(full_text, content_frame, current_slide, slide_index, prs)
        return
    
    # Find and process all row divs
    rows = slide_html.find_all('div', class_='row')
    
    # If no rows are found, process the slide content directly
    if not rows:
        content_shape = current_slide.shapes.add_textbox(
            Inches(0.5), current_y, Inches(9), Inches(5)
        )
        content_frame = content_shape.text_frame
        process_content(slide_html, content_frame, current_slide, css_rules, current_y, prs, slide_index)
    else:
        # Process each row with better spacing management
        for i, row in enumerate(rows):
            # Check remaining space
            remaining_height = max_y - current_y
            if remaining_height < Inches(1.0) and i < len(rows) - 1:
                # Not enough space for meaningful content
                # Create a new slide for remaining content
                if prs:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_element = slide_html.find('h1') or slide_html.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index+1}"
                    
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Process remaining rows on new slide
                    next_y = Inches(1.5)
                    for next_row in rows[i:]:
                        # Calculate content height
                        row_height = estimate_row_height(next_row)
                        
                        # Check if it fits on the continuation slide
                        if next_y + row_height > Inches(SLIDE_HEIGHT_INCHES - 0.7):
                            # Still too much content, need another slide
                            continue_index = rows.index(next_row)
                            if continue_index < len(rows) - 1:
                                # Recursively handle remaining content
                                remaining_rows_html = BeautifulSoup('<div></div>', 'html.parser').div
                                for r in rows[continue_index:]:
                                    remaining_rows_html.append(r.copy())
                                
                                process_standard_slide_content(
                                    remaining_rows_html, next_slide, css_rules, prs, slide_index+1
                                )
                                break
                        
                        # Create a text frame for this row
                        text_shape = next_slide.shapes.add_textbox(
                            Inches(0.5), next_y, Inches(9), row_height
                        )
                        text_frame = text_shape.text_frame
                        text_frame.word_wrap = True
                        
                        # Process the content of the row
                        new_y = process_content(next_row, text_frame, next_slide, 
                                              css_rules, next_y, prs, slide_index+1)
                        
                        # Update position for next row
                        next_y = max(next_y + row_height, new_y) + Inches(0.3) if new_y else next_y + row_height + Inches(0.3)
                    
                    # No need to process more rows on the original slide
                    break
            
            # Estimate row height with more conservative calculation
            row_height = estimate_row_height(row)
            
            # Adjust height if remaining space is limited
            if current_y + row_height > max_y:
                row_height = max_y - current_y - Inches(0.1)
            
            # Create a text frame for this row
            text_shape = current_slide.shapes.add_textbox(
                Inches(0.5), current_y, Inches(9), row_height
            )
            text_frame = text_shape.text_frame
            text_frame.word_wrap = True
            text_frame.margin_left = 0
            text_frame.margin_right = 0
            text_frame.margin_top = 0
            text_frame.margin_bottom = 0
            
            # Process the content of the row
            new_y = process_content(row, text_frame, current_slide, css_rules, current_y, prs, slide_index)
            
            # Update the vertical position for the next row
            current_y = max(current_y + row_height, new_y) + Inches(0.2) if new_y else current_y + row_height + Inches(0.2)
            
            # Check if we're running out of space
            if current_y >= max_y and i < len(rows) - 1:
                # Create a new slide for remaining content
                if prs:
                    next_slide = prs.slides.add_slide(prs.slide_layouts[6])
                    
                    # Add a title indicating continuation
                    title_element = slide_html.find('h1') or slide_html.find('h2')
                    title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index+1}"
                    
                    title_shape = next_slide.shapes.add_textbox(
                        Inches(0.5), Inches(0.5), Inches(9), Inches(0.8)
                    )
                    title_frame = title_shape.text_frame
                    p = title_frame.add_paragraph()
                    p.text = f"{title_text} (Continued)"
                    p.font.italic = True
                    p.font.bold = True
                    p.font.size = Pt(18)
                    
                    # Recursively process remaining rows on new slide
                    remaining_rows_html = BeautifulSoup('<div></div>', 'html.parser').div
                    for r in rows[i+1:]:
                        remaining_rows_html.append(r.copy())
                    
                    process_standard_slide_content(
                        remaining_rows_html, next_slide, css_rules, prs, slide_index+1
                    )
                break


# Two specific fixes for the HTML to PowerPoint converter:
# 1. Better processing of colors in div class tags for headings and paragraphs
# 2. Fix for the image in slide 2's right column to keep it inside the row box

# FIX 1: Improved color handling from div tags
def process_headers_with_color(element, text_frame, css_rules):
    """Process headers with improved color styling"""
    for header in element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        p = text_frame.add_paragraph()
        p.text = header.get_text().strip()
        p.font.bold = True
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        p.font.size = Pt(size_map.get(header.name, 14))
        
        # Check color in this priority: header tag itself, parent div, grandparent div
        header_color = get_color_from_class(header)
        
        # If no color found in the header itself, look at parent div
        if header_color == RGBColor(255, 255, 255):
            parent_div = header.find_parent('div')
            if parent_div:
                header_color = get_color_from_class(parent_div)
                
                # If still no color, try grandparent
                if header_color == RGBColor(255, 255, 255):
                    grandparent_div = parent_div.find_parent('div')
                    if grandparent_div:
                        header_color = get_color_from_class(grandparent_div)
        
        # Apply the color if one was found
        if header_color != RGBColor(255, 255, 255):
            p.font.color.rgb = header_color


def process_paragraphs_with_color(element, text_frame, css_rules):
    """Process paragraphs with improved color styling"""
    for para in element.find_all('p'):
        p = text_frame.add_paragraph()
        p.text = para.get_text().strip()
        p.font.bold = False
        p.font.size = Pt(12)
        
        # Check color in this priority: paragraph tag itself, parent div, grandparent div
        para_color = get_color_from_class(para)
        
        # If no color found in the paragraph itself, look at parent div
        if para_color == RGBColor(255, 255, 255):
            parent_div = para.find_parent('div')
            if parent_div:
                para_color = get_color_from_class(parent_div)
                
                # If still no color, try grandparent
                if para_color == RGBColor(255, 255, 255):
                    grandparent_div = parent_div.find_parent('div')
                    if grandparent_div:
                        para_color = get_color_from_class(grandparent_div)
        
        # Apply the color if one was found
        if para_color != RGBColor(255, 255, 255):
            p.font.color.rgb = para_color


# FIX 2: Keep images inside row boxes in column layouts
# Targeted fix for image overlap in column content while keeping everything in the same box

def process_column_content(column, slide, x_pos, y_pos, width, css_rules=None, slide_index=0, prs=None):
    current_y = y_pos
    
    try:
        for row in column.find_all('div', class_='row'):
            try:
                remaining_height = Inches(SLIDE_HEIGHT_INCHES - 1.0) - current_y
                
                # Skip if not enough space left on slide and create continuation slide
                if remaining_height < Inches(0.5):
                    if prs:
                        # Create a new slide for overflow content (unchanged)
                        slide_layout = prs.slide_layouts[6]
                        next_slide = prs.slides.add_slide(slide_layout)
                        
                        # Add continuation title
                        title_element = column.parent.find('h1') or column.parent.find('h2')
                        title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                        
                        title_box = next_slide.shapes.add_textbox(
                            Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                        )
                        title_frame = title_box.text_frame
                        p = title_frame.add_paragraph()
                        p.text = f"{title_text} (Continued)"
                        p.font.size = Pt(24)
                        p.font.bold = True
                        
                        # Process remaining rows on new slide
                        remaining_rows_index = column.find_all('div', class_='row').index(row)
                        if remaining_rows_index >= 0:
                            new_column = BeautifulSoup("<div></div>", "html.parser").div
                            for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                new_column.append(r.copy())
                            
                            process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                    break
                
                # Extract content from the row
                img_tags = row.find_all('img')
                has_images = len(img_tags) > 0
                
                # Extract headers and paragraphs
                header_text = ""
                for header in row.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
                    header_text += header.get_text().strip() + " "
                
                paragraph_text = ""
                for para in row.find_all('p'):
                    paragraph_text += para.get_text().strip() + " "
                
                # Extract any other text
                other_text = ""
                row_copy = BeautifulSoup(str(row), 'html.parser')
                for tag in row_copy.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']):
                    tag.decompose()
                
                for element in row_copy.descendants:
                    if isinstance(element, str) and element.strip():
                        other_text += element.strip() + " "
                
                combined_text = (header_text + " " + paragraph_text + " " + other_text).strip()
                has_text = bool(combined_text)
                
                # Get row background color
                row_color = get_color_from_class(row)
                
                # Process text with images
                if has_text and has_images:
                    # FIXED: Calculate text height properly based on character count and length
                    text_length = len(combined_text)
                    chars_per_inch = 100  # Approximate number of characters per inch of height
                    
                    # Calculate minimum text height based on text length
                    calculated_text_height = Inches(max(0.5, text_length / chars_per_inch))
                    
                    # Use calculated height or a minimum based on text length
                    if text_length < 100:
                        text_height = max(Inches(0.6), calculated_text_height)
                    elif text_length < 250:
                        text_height = max(Inches(1.0), calculated_text_height)
                    elif text_length < 500:
                        text_height = max(Inches(2.0), calculated_text_height)
                    else:
                        text_height = max(Inches(3.0), calculated_text_height)
                    
                    # Calculate heights for images - with realistic dimensions
                    image_height = Inches(0)
                    if img_tags:
                        img = img_tags[0]
                        if img.get('height'):
                            try:
                                img_height = int(img.get('height'))
                                image_height = Inches(img_height / 96 + 0.4)
                            except (ValueError, TypeError):
                                image_height = Inches(1.0)  # Default if parsing fails
                        else:
                            image_height = Inches(1.0)  # Default image height
                    
                    # FIXED: Add extra buffer space between text and image
                    buffer_space = Inches(0.5)  # Increased from previous versions
                    
                    # Calculate total box height with text, buffer, and image
                    box_height = text_height + buffer_space + image_height + Inches(0.2)  # Added padding
                    
                    # Check if it fits available space
                    if box_height > remaining_height:
                        if remaining_height > Inches(2.0):  # Minimum viable space
                            # Adjust components to fit
                            box_height = remaining_height - Inches(0.1)
                            
                            # Prioritize text space over image size if needed
                            if text_height > (box_height * 0.6):
                                text_height = box_height * 0.6
                                buffer_space = Inches(0.3)
                                image_height = box_height - text_height - buffer_space - Inches(0.2)
                            else:
                                image_height = box_height - text_height - buffer_space - Inches(0.2)
                        else:
                            # Move to next slide if no room (code unchanged)
                            if prs:
                                # Create continuation slide
                                slide_layout = prs.slide_layouts[6]
                                next_slide = prs.slides.add_slide(slide_layout)
                                
                                title_element = column.parent.find('h1') or column.parent.find('h2')
                                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                                
                                title_box = next_slide.shapes.add_textbox(
                                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                                )
                                title_frame = title_box.text_frame
                                p = title_frame.add_paragraph()
                                p.text = f"{title_text} (Continued)"
                                p.font.size = Pt(24)
                                p.font.bold = True
                                
                                # Process remaining content on next slide
                                new_column = BeautifulSoup("<div></div>", "html.parser").div
                                new_column.append(row.copy())
                                
                                remaining_rows_index = column.find_all('div', class_='row').index(row) + 1
                                if remaining_rows_index < len(column.find_all('div', class_='row')):
                                    for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                        new_column.append(r.copy())
                                
                                process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                                break
                            else:
                                current_y += Inches(0.2)
                                continue
                    
                    # Create background box
                    bg_shape = slide.shapes.add_shape(
                        MSO_SHAPE.ROUNDED_RECTANGLE, 
                        x_pos, current_y, 
                        width, box_height
                    )
                    bg_shape.fill.solid()
                    bg_shape.fill.fore_color.rgb = row_color
                    bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                    
                    # FIXED: Create text box that only covers the text portion of the content
                    text_box = slide.shapes.add_textbox(
                        x_pos + Inches(0.1),
                        current_y + Inches(0.1),
                        width - Inches(0.2),
                        text_height
                    )
                    text_frame = text_box.text_frame
                    text_frame.word_wrap = True
                    text_frame.margin_top = 0
                    text_frame.margin_bottom = 0
                    text_frame.margin_left = 0
                    text_frame.margin_right = 0
                    
                    # Add header text (unchanged)
                    if header_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = header_text.strip()
                        p.font.bold = True
                        p.font.size = Pt(14)
                        p.space_before = 0
                        p.space_after = Pt(2)
                    
                    # Add paragraph text (unchanged)
                    if paragraph_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = paragraph_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                        p.space_before = 0
                        p.space_after = 0
                    
                    # Add other text if present (unchanged)
                    if other_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = other_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                    
                    # FIXED: Add image with appropriate positioning - clearly below the text
                    if img_tags and image_height > Inches(0.2):
                        try:
                            img = img_tags[0]
                            img_url = img.get('src', '')
                            
                            if img_url:
                                response = requests.get(img_url, stream=True, timeout=5)
                                if response.status_code == 200:
                                    img_bytes = BytesIO(response.content)
                                    
                                    try:
                                        # Get dimensions from image
                                        with PILImage.open(img_bytes) as pil_img:
                                            aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Calculate image size (standard code)
                                            if img.get('width') and img.get('height'):
                                                try:
                                                    width_px = int(img.get('width'))
                                                    height_px = int(img.get('height'))
                                                    img_width = Inches(width_px / 96)
                                                    img_height = Inches(height_px / 96)
                                                except (ValueError, TypeError):
                                                    img_width = min(Inches(2.0), width - Inches(0.4))
                                                    img_height = img_width / aspect_ratio
                                            else:
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            
                                            # Ensure image fits within column width
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                            
                                            # CRUCIAL FIX: Place image properly below text with enough space
                                            img_x = x_pos + (width - img_width) / 2  # Center horizontally
                                            
                                            # Calculate position based on text height, using the buffer space
                                            img_y = current_y + text_height + buffer_space
                                            
                                            # Double check that image fits in the box
                                            if (img_y + img_height) <= (current_y + box_height - Inches(0.1)):
                                                picture = slide.shapes.add_picture(
                                                    img_bytes, 
                                                    img_x, 
                                                    img_y, 
                                                    width=img_width, 
                                                    height=img_height
                                                )
                                            else:
                                                # Must resize image to fit
                                                available_height = current_y + box_height - img_y - Inches(0.1)
                                                if available_height > Inches(0.2):  # Only add if space available
                                                    img_height = available_height
                                                    img_width = img_height * aspect_ratio
                                                    
                                                    # Recenter with new dimensions
                                                    img_x = x_pos + (width - img_width) / 2
                                                    
                                                    picture = slide.shapes.add_picture(
                                                        img_bytes, 
                                                        img_x, 
                                                        img_y, 
                                                        width=img_width, 
                                                        height=img_height
                                                    )
                                    except Exception as img_error:
                                        print(f"Error processing image: {img_error}")
                                    
                                    img_bytes.close()
                        except Exception as img_error:
                            print(f"Error with image: {img_error}")
                    
                    # Update position for next row
                    current_y += box_height + Inches(0.2)
                
                # Process text-only content (unchanged)
                elif has_text:
                    # Standard text handling code...
                    # (code unchanged from your original implementation)
                    # Estimate text height
                    text_length = len(combined_text)
                    
                    # Calculate height based on text length
                    if text_length < 100:
                        text_height = Inches(0.6)
                    elif text_length < 250:
                        text_height = Inches(1.0)
                    elif text_length < 500:
                        text_height = Inches(1.5)
                    else:
                        # More height for long text
                        text_height = Inches(2.5)
                    
                    # Add extra height for headers and paragraphs
                    header_lines = 1 if header_text.strip() else 0
                    para_lines = 1 if paragraph_text.strip() else 0
                    text_height += Inches(0.2 * (header_lines + para_lines))
                    
                    # Rest of text-only handling (standard code)
                    # Check if it fits available space
                    if text_height > remaining_height - Inches(0.2):
                        if remaining_height > Inches(1.0):
                            text_height = remaining_height - Inches(0.2)
                        else:
                            # Move to next slide if no room
                            if prs:
                                # Standard continuation slide code
                                slide_layout = prs.slide_layouts[6]
                                next_slide = prs.slides.add_slide(slide_layout)
                                
                                title_element = column.parent.find('h1') or column.parent.find('h2')
                                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                                
                                title_box = next_slide.shapes.add_textbox(
                                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                                )
                                title_frame = title_box.text_frame
                                p = title_frame.add_paragraph()
                                p.text = f"{title_text} (Continued)"
                                p.font.size = Pt(24)
                                p.font.bold = True
                                
                                # Process remaining content on new slide
                                remaining_rows_index = column.find_all('div', class_='row').index(row)
                                new_column = BeautifulSoup("<div></div>", "html.parser").div
                                for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                    new_column.append(r.copy())
                                
                                process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                                break
                            else:
                                current_y += Inches(0.2)
                                continue
                    
                    # Create background shape
                    shape = slide.shapes.add_shape(
                        MSO_SHAPE.ROUNDED_RECTANGLE, 
                        x_pos, current_y, 
                        width, text_height
                    )
                    shape.fill.solid()
                    shape.fill.fore_color.rgb = row_color
                    shape.line.color.rgb = RGBColor(200, 200, 200)
                    
                    # Create text box
                    textbox = slide.shapes.add_textbox(
                        x_pos + Inches(0.1),  # Reduced left padding
                        current_y + Inches(0.1),  # Reduced top padding
                        width - Inches(0.2),  # More width for text
                        text_height - Inches(0.2)
                    )
                    text_frame = textbox.text_frame
                    text_frame.word_wrap = True  # Ensure word wrap is enabled
                    text_frame.margin_top = 0
                    text_frame.margin_bottom = 0
                    text_frame.margin_left = 0
                    text_frame.margin_right = 0
                    
                    # Add header text with color
                    if header_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = header_text.strip()
                        p.font.bold = True
                        p.font.size = Pt(14)
                        p.space_before = 0
                        p.space_after = Pt(2)
                    
                    # Add paragraph text with color
                    if paragraph_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = paragraph_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                        p.space_before = 0
                        p.space_after = 0
                    
                    # Add other text if present
                    if other_text.strip():
                        p = text_frame.add_paragraph()
                        p.text = other_text.strip()
                        p.font.bold = False
                        p.font.size = Pt(12)
                    
                    # Update position for next row
                    current_y += text_height + Inches(0.2)
                
                # Process image-only content (unchanged)
                elif has_images:
                    # Default height for image box
                    box_height = Inches(2.0)
                    
                    # Try to get image dimensions
                    if img_tags:
                        img = img_tags[0]
                        height_specified = img.get('height')
                        if height_specified:
                            try:
                                height_px = int(height_specified)
                                box_height = Inches(height_px / 96 + 0.4)  # Add margin
                            except (ValueError, TypeError):
                                pass
                    
                    # Rest of image-only handling (unchanged)
                    # Check if image fits
                    if box_height > remaining_height:
                        if remaining_height > Inches(1.0):
                            box_height = remaining_height - Inches(0.1)
                        else:
                            # Move to next slide if needed
                            if prs:
                                slide_layout = prs.slide_layouts[6]
                                next_slide = prs.slides.add_slide(slide_layout)
                                
                                title_element = column.parent.find('h1') or column.parent.find('h2')
                                title_text = title_element.get_text().strip() if title_element else f"Slide {slide_index + 1}"
                                
                                title_box = next_slide.shapes.add_textbox(
                                    Inches(0.5), Inches(0.3), Inches(SLIDE_WIDTH_INCHES - 1), Inches(0.8)
                                )
                                title_frame = title_box.text_frame
                                p = title_frame.add_paragraph()
                                p.text = f"{title_text} (Continued)"
                                p.font.size = Pt(24)
                                p.font.bold = True
                                
                                new_column = BeautifulSoup("<div></div>", "html.parser").div
                                new_column.append(row.copy())
                                
                                remaining_rows_index = column.find_all('div', class_='row').index(row) + 1
                                if remaining_rows_index < len(column.find_all('div', class_='row')):
                                    for r in column.find_all('div', class_='row')[remaining_rows_index:]:
                                        new_column.append(r.copy())
                                
                                process_column_content(new_column, next_slide, x_pos, Inches(1.5), width, css_rules, slide_index + 1, prs)
                                break
                            else:
                                current_y += Inches(0.2)
                                continue
                    
                    # Create background shape for image
                    bg_shape = slide.shapes.add_shape(
                        MSO_SHAPE.ROUNDED_RECTANGLE, 
                        x_pos, current_y, 
                        width, box_height
                    )
                    bg_shape.fill.solid()
                    bg_shape.fill.fore_color.rgb = row_color
                    bg_shape.line.color.rgb = RGBColor(200, 200, 200)
                    
                    # Add image inside the background shape
                    if img_tags:
                        try:
                            img = img_tags[0]
                            img_url = img.get('src', '')
                            
                            if img_url:
                                response = requests.get(img_url, stream=True, timeout=5)
                                if response.status_code == 200:
                                    img_bytes = BytesIO(response.content)
                                    
                                    try:
                                        # Get dimensions from image
                                        with PILImage.open(img_bytes) as pil_img:
                                            aspect_ratio = pil_img.width / pil_img.height
                                            
                                            img_bytes.seek(0)  # Reset file pointer
                                            
                                            # Calculate image size
                                            if img.get('width') and img.get('height'):
                                                try:
                                                    width_px = int(img.get('width'))
                                                    height_px = int(img.get('height'))
                                                    img_width = Inches(width_px / 96)
                                                    img_height = Inches(height_px / 96)
                                                except (ValueError, TypeError):
                                                    img_width = min(Inches(2.0), width - Inches(0.4))
                                                    img_height = img_width / aspect_ratio
                                            else:
                                                img_width = min(Inches(2.0), width - Inches(0.4))
                                                img_height = img_width / aspect_ratio
                                            
                                            # Make sure image fits within box
                                            if img_width > width - Inches(0.4):
                                                img_width = width - Inches(0.4)
                                                img_height = img_width / aspect_ratio
                                                
                                            if img_height > box_height - Inches(0.2):
                                                img_height = box_height - Inches(0.2)
                                                img_width = img_height * aspect_ratio
                                            
                                            # Center the image in the box
                                            img_x = x_pos + (width - img_width) / 2
                                            img_y = current_y + (box_height - img_height) / 2
                                            
                                            picture = slide.shapes.add_picture(
                                                img_bytes, 
                                                img_x, 
                                                img_y, 
                                                width=img_width, 
                                                height=img_height
                                            )
                                    except Exception as img_error:
                                        print(f"Error processing image: {img_error}")
                                    
                                    img_bytes.close()
                        except Exception as img_error:
                            print(f"Error with image: {img_error}")
                    
                    # Update position for next row
                    current_y += box_height + Inches(0.2)
                
                # Add spacing between rows
                current_y += Inches(0.1)
                
            except Exception as row_error:
                print(f"Error processing row: {row_error}")
                current_y += Inches(0.5)
    
    except Exception as column_error:
        print(f"Error processing column: {column_error}")
    
    return current_y


def estimate_row_height(row):
    """More accurate estimation of row height based on content quantity"""
    # Base height for any row
    height = Inches(0.5)
    
    # Get text content
    text_content = row.get_text().strip()
    text_length = len(text_content)
    
    # Calculate height based on text length with more realistic estimates
    # Assuming approximately 40 characters per line and 0.2 inches per line
    if text_length > 0:
        lines = max(1, text_length // 40)
        text_height = Inches(0.2 * lines)
        height = max(height, text_height)
    
    # Add height for images
    img = row.find('img')
    if img:
        # If height attribute exists, use it
        if img.get('height'):
            try:
                img_height = int(img.get('height')) / 96  # Convert px to inches
                height = max(height, Inches(img_height + 0.4))  # Add margin
            except (ValueError, TypeError):
                height = max(height, Inches(2.0))  # Default if can't parse
        else:
            # Default height for images
            height = max(height, Inches(2.0))
    
    # Add height for tables
    if row.find('table'):
        rows = len(row.find_all('tr'))
        height = max(height, Inches(0.3 * rows + 0.3))  # 0.3 inches per row plus header
    
    # Add height for code blocks
    code_block = row.find('div', class_='code-block') or row.find('pre')
    if code_block:
        code_text = code_block.get_text().strip()
        code_lines = len(code_text.split('\n'))
        height = max(height, Inches(0.2 * code_lines + 0.3))  # 0.2 inches per line
    
    # Handle special elements
    if row.find('ul') or row.find('ol'):
        list_items = len(row.find_all('li'))
        height = max(height, Inches(0.25 * list_items + 0.3))  # 0.25 inches per list item
    
    # Add extra padding to prevent content being cut off
    return height + Inches(0.2)

def add_textbox_relative(slide, top, left, width, height, text, font_size=14, bg_color=None):
    # Optional: add a background shape
    if bg_color:
        shape = slide.shapes.add_shape(
            MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height
        )
        shape.fill.solid()
        shape.fill.fore_color.rgb = bg_color
        shape.line.color.rgb = RGBColor(200, 200, 200)

    # Add the actual textbox
    textbox = slide.shapes.add_textbox(left, top, width, height)
    text_frame = textbox.text_frame
    text_frame.word_wrap = True
    text_frame.text = text

    # Format text
    paragraph = text_frame.paragraphs[0]
    paragraph.font.size = Pt(font_size)
    paragraph.font.bold = False

    return top + height + Inches(0.1)  # Return next top position



def process_content(element, text_frame, slide, css_rules, y_position=None, prs=None, slide_index=0):
    max_y = y_position if y_position is not None else Inches(1.5)
    
    process_headers_with_color(element, text_frame, css_rules)
    process_paragraphs_with_color(element, text_frame, css_rules)
    
    text_height = Inches(0.3) * len(text_frame.paragraphs)
    
    if element.find('table'):
        process_table(element.find('table'), text_frame, css_rules)
    elif element.find('ul') or element.find('ol'):
        process_list(element, text_frame, css_rules)
    elif element.find(['pre', 'code']) or element.find('div', class_='code-block'):
        process_code_block(element, text_frame, css_rules)
    
    img = element.find('img')
    if img:
        img_top = max_y + text_height + Inches(0.2)
        
        img_url = img.get('src', '')
        img_alt = img.get('alt', 'Image')
        
        try:
            response = requests.get(img_url, stream=True, timeout=10)
            
            if response.status_code == 200:
                img_bytes = BytesIO(response.content)
                
                try:
                    with PILImage.open(img_bytes) as pil_img:
                        img_width, img_height = pil_img.size
                        aspect_ratio = img_width / img_height
                    
                    img_bytes.seek(0)
                    
                    width_specified = img.get('width')
                    height_specified = img.get('height')
                    
                    img_width = Inches(2.0)
                    img_height = img_width / aspect_ratio
                    
                    if width_specified and height_specified:
                        try:
                            width_px = int(width_specified)
                            height_px = int(height_specified)
                            if width_px > 0 and height_px > 0:
                                img_width = Inches(width_px / 96)
                                img_height = Inches(height_px / 96)
                        except (ValueError, TypeError):
                            pass
                    
                    slide_width = Inches(SLIDE_WIDTH_INCHES)
                    left_position = (slide_width - img_width) / 2
                    
                    if img_width > Inches(6):
                        img_width = Inches(6)
                        img_height = img_width / aspect_ratio
                    
                    picture = slide.shapes.add_picture(
                        img_bytes, 
                        left_position, 
                        img_top, 
                        width=img_width, 
                        height=img_height
                    )
                    
                    max_y = max(max_y, img_top + img_height + Inches(0.2))
                    
                except Exception as img_error:
                    print(f"Error processing image: {img_error}")
                    p = text_frame.add_paragraph()
                    p.text = f"[Image Error: {img_alt}]"
            else:
                p = text_frame.add_paragraph()
                p.text = f"[Image not available: {img_alt}]"
                
        except Exception as request_error:
            print(f"Error downloading image: {request_error}")
            p = text_frame.add_paragraph()
            p.text = f"[Image download error: {img_alt}]"
    
    return max_y




def process_list(element, text_frame, css_rules):
    """Process HTML lists and add them to the text frame"""
    # First add any text before the list
    text_before = ''
    list_elem = element.find(['ul', 'ol'])
    
    for sibling in list_elem.previous_siblings:
        if isinstance(sibling, str) and sibling.strip():
            text_before += sibling.strip() + ' '
        elif hasattr(sibling, 'get_text'):
            text_before += sibling.get_text().strip() + ' '
            
    if text_before.strip():
        p = text_frame.add_paragraph()
        p.text = text_before.strip()
    
    # Process list items
    is_ordered = list_elem.name == 'ol'
    list_items = list_elem.find_all('li')
    
    for i, item in enumerate(list_items):
        p = text_frame.add_paragraph()
        prefix = f"{i+1}. " if is_ordered else "• "
        p.text = prefix + item.get_text().strip()
        p.level = 1  # Set indentation level
        
        apply_css_to_paragraph(p, item, css_rules)

def process_table(table, text_frame, css_rules):
    """Process HTML table and add it to the text frame as formatted text"""
    # Add table caption or heading
    p = text_frame.add_paragraph()
    p.text = "[Table]"
    p.font.bold = True
    
    # Process headers
    headers = [th.get_text().strip() for th in table.find_all('th')]
    if headers:
        p = text_frame.add_paragraph()
        p.text = " | ".join(headers)
        p.font.bold = True
        
        # Add separator line
        p = text_frame.add_paragraph()
        p.text = "-" * (sum(len(h) for h in headers) + 3 * (len(headers) - 1))
    
    # Process rows
    for row in table.find_all('tr'):
        cells = [td.get_text().strip() for td in row.find_all('td')]
        if cells:
            p = text_frame.add_paragraph()
            p.text = " | ".join(cells)

def process_code_block(element, text_frame, css_rules):
    """Process code blocks and add them to the text frame"""
    # Find the code block element
    code_elem = element.find(['pre', 'code']) or element.find('div', class_='code-block')
    
    if not code_elem:
        return
        
    # Add a label
    p = text_frame.add_paragraph()
    p.text = "[Code]"
    p.font.bold = True
    
    # Process code lines
    code_text = code_elem.get_text().strip()
    lines = code_text.split('\n')
    
    for line in lines:
        p = text_frame.add_paragraph()
        p.text = line
        p.font.name = "Courier New"
        p.font.size = Pt(9)
def process_image_with_download(element, text_frame, slide, css_rules, y_position=None):
    """Process images with improved error handling to prevent file corruption"""
    img = element.find('img')
    if not img:
        return y_position
    
    # Get image attributes
    img_url = img.get('src', '')
    img_alt = img.get('alt', 'Image')
    
    # Use standard slide dimensions
    slide_width_inches = SLIDE_WIDTH_INCHES
    slide_height_inches = SLIDE_HEIGHT_INCHES
    
    # Calculate content area
    left = Inches(0.5)
    top = y_position if y_position is not None else Inches(1.5)
    
    # Calculate available height on current slide
    available_height = Inches(slide_height_inches - 1.0) - top  # 1.0 inch margin at bottom for safety
    
    # Skip if not enough space
    if available_height < Inches(0.5):
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - not enough space]"
        return y_position
    
    try:
        # Download the image with timeout
        response = requests.get(img_url, stream=True, timeout=10)
        
        if response.status_code != 200:
            # Failed to download image
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - download failed]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
        
        # Create image from content
        img_bytes = BytesIO(response.content)
        
        try:
            # Try to open the image to validate it
            with PILImage.open(img_bytes) as pil_img:
                img_width, img_height = pil_img.size
                
                # Skip extremely small or zero-dimension images
                if img_width < 10 or img_height < 10:
                    p = text_frame.add_paragraph()
                    p.text = f"[Image: {img_alt} - invalid dimensions]"
                    p.alignment = PP_ALIGN.CENTER
                    return y_position + Inches(0.5)
                
                aspect_ratio = img_width / img_height
            
            # Reset file pointer
            img_bytes.seek(0)
            
            # Get dimensions from HTML
            width_specified = img.get('width')
            height_specified = img.get('height')
            
            # Default dimensions
            width = Inches(6)  # 6 inches wide by default
            height = Inches(6 / aspect_ratio)
            
            # Try to use HTML dimensions if available
            if width_specified:
                try:
                    width_px = int(width_specified)
                    if 10 <= width_px <= 2000:  # Reasonable range check
                        width = Inches(width_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep default width
            
            if height_specified:
                try:
                    height_px = int(height_specified)
                    if 10 <= height_px <= 2000:  # Reasonable range check
                        height = Inches(height_px / 96)
                except (ValueError, TypeError):
                    pass  # Keep calculated height
            
            # Fit to slide width and available height
            max_width = Inches(slide_width_inches - 1.0)  # 0.5 inch margins on each side
            if width > max_width:
                width = max_width
                height = width / aspect_ratio
            
            if height > available_height:
                height = available_height
                width = height * aspect_ratio
            
            # Set minimum dimensions to avoid errors
            width = max(width, Inches(0.1))
            height = max(height, Inches(0.1))
            
            # Create a new BytesIO object to ensure memory is properly managed
            img_data = BytesIO(img_bytes.getvalue())
            img_bytes.close()  # Close the original BytesIO object
            
            # Add image to slide with explicit error handling
            try:
                picture = slide.shapes.add_picture(img_data, left, top, width=width, height=height)
                
                # Close the BytesIO object after adding the picture
                img_data.close()
                
                # Update position for next element
                new_top = top + height + Inches(0.1)
                
                # Add caption if available
                caption = element.find('p', class_='caption')
                if caption and caption.get_text().strip():
                    caption_text = caption.get_text().strip()
                    
                    # Only add caption if there's space
                    if new_top + Inches(0.3) < Inches(slide_height_inches - 0.5):
                        try:
                            caption_box = slide.shapes.add_textbox(
                                left, new_top, width, Inches(0.3)
                            )
                            caption_frame = caption_box.text_frame
                            p = caption_frame.add_paragraph()
                            p.text = caption_text
                            p.font.italic = True
                            p.alignment = PP_ALIGN.CENTER
                            
                            new_top += Inches(0.4)
                        except Exception as caption_error:
                            print(f"Error adding caption: {caption_error}")
                            # Add caption in text frame instead
                            p = text_frame.add_paragraph()
                            p.text = f"Caption: {caption_text}"
                            p.font.italic = True
                
                return new_top
                
            except Exception as picture_error:
                print(f"Error adding picture to slide: {picture_error}")
                # Fallback to text
                p = text_frame.add_paragraph()
                p.text = f"[Image: {img_alt} - failed to add to slide]"
                p.alignment = PP_ALIGN.CENTER
                return y_position + Inches(0.5)
                
        except Exception as pil_error:
            print(f"Error processing image data: {pil_error}")
            # Invalid image data
            p = text_frame.add_paragraph()
            p.text = f"[Image: {img_alt} - invalid image]"
            p.alignment = PP_ALIGN.CENTER
            return y_position + Inches(0.5)
            
    except Exception as request_error:
        print(f"Error downloading image {img_url}: {request_error}")
        # Failed request
        p = text_frame.add_paragraph()
        p.text = f"[Image: {img_alt} - download error]"
        p.alignment = PP_ALIGN.CENTER
        return y_position + Inches(0.5)

def apply_css_to_paragraph(paragraph, element, css_rules):
    """Apply CSS styling to a PowerPoint paragraph based on element classes"""
    # Get classes from the element
    classes = element.get('class', [])
    if isinstance(classes, str):
        classes = classes.split()
        
    # Apply styling from each class
    for class_name in classes:
        if class_name in css_rules:
            props = css_rules[class_name]
            
            # Text alignment
            if 'text-align' in props:
                align_value = props['text-align'].lower()
                if align_value == 'center':
                    paragraph.alignment = PP_ALIGN.CENTER
                elif align_value == 'right':
                    paragraph.alignment = PP_ALIGN.RIGHT
                elif align_value == 'justify':
                    paragraph.alignment = PP_ALIGN.JUSTIFY
                    
            # Font size (approximate conversion from px/em to points)
            if 'font-size' in props:
                size_str = props['font-size']
                size_value = extract_numeric_value(size_str)
                
                if size_value:
                    # Convert common units to points (approximate)
                    if 'px' in size_str:
                        paragraph.font.size = Pt(size_value * 0.75)  # px to pt conversion
                    elif 'em' in size_str:
                        paragraph.font.size = Pt(size_value * 12)  # em to pt conversion
                    elif 'pt' in size_str:
                        paragraph.font.size = Pt(size_value)
                    else:
                        # Default unit or percentage
                        paragraph.font.size = Pt(size_value)
                        
            # Font weight
            if 'font-weight' in props:
                weight = props['font-weight'].lower()
                if weight in ['bold', 'bolder', '700', '800', '900']:
                    paragraph.font.bold = True
                    
            # Font style
            if 'font-style' in props:
                style = props['font-style'].lower()
                if style == 'italic':
                    paragraph.font.italic = True
                    
            # Text color (simplified conversion)
            if 'color' in props:
                color = props['color']
                rgb = extract_rgb_color(color)
                if rgb:
                    paragraph.font.color.rgb = RGBColor(*rgb)

def extract_css_rules(soup):
    """Extract CSS rules from style tags in the HTML"""
    css_rules = {}
    
    # Find all style tags
    style_tags = soup.find_all('style')
    for style_tag in style_tags:
        style_content = style_tag.string
        if not style_content:
            continue
            
        # Extract class-based rules
        for rule in re.findall(r'\.([^\s{]+)\s*{([^}]+)}', style_content):
            class_name = rule[0]
            properties = {}
            
            # Extract properties
            for prop in re.findall(r'([^:;]+):\s*([^;]+);?', rule[1]):
                prop_name = prop[0].strip()
                prop_value = prop[1].strip()
                properties[prop_name] = prop_value
                
            css_rules[class_name] = properties
    
    return css_rules

def extract_numeric_value(value_str):
    """Extract numeric value from a CSS value string"""
    match = re.search(r'([0-9.]+)', value_str)
    if match:
        try:
            return float(match.group(1))
        except ValueError:
            pass
    return None

def extract_rgb_color(color_str):
    """Extract RGB values from a CSS color string"""
    # Handle hex colors
    hex_match = re.search(r'#([0-9a-fA-F]{6})', color_str)
    if hex_match:
        hex_value = hex_match.group(1)
        return (
            int(hex_value[0:2], 16),
            int(hex_value[2:4], 16),
            int(hex_value[4:6], 16)
        )
        
    # Handle rgb() format
    rgb_match = re.search(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
    if rgb_match:
        return (
            int(rgb_match.group(1)),
            int(rgb_match.group(2)),
            int(rgb_match.group(3))
        )
        
    return None
# Modify the text extraction in process_column_content function
# Look for the following function in your code and replace it





def clean_slide_placeholders(slide):
    """Remove or hide any empty placeholders on the slide"""
    for shape in slide.shapes:
        # Check if it's a placeholder
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            try:
                # Try setting it to empty to remove the "Click to add..." text
                if hasattr(shape, 'text'):
                    shape.text = ""
                # Or try to hide it
                if hasattr(shape, 'element') and hasattr(shape.element, 'getparent'):
                    parent = shape.element.getparent()
                    if parent is not None:
                        parent.remove(shape.element)
            except:
                # If we can't modify it, just continue
                pass

def create_html_file_from_string(html_content, filename="temp_html.html"):
    """
    Create a temporary HTML file from a string
    
    Args:
        html_content (str): HTML content as a string
        filename (str): Filename to save the HTML content
        
    Returns:
        str: Path to the created HTML file
    """
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    return filename
def apply_slide_background_color(slide_html, current_slide):
    """Apply background color to the entire slide based on color classes"""
    try:
        # Get the background color from the slide's class
        bg_color = get_color_from_class(slide_html)
        
        # Get the RGB values - RGBColor objects store RGB values directly in rgb attribute
        default_color = RGBColor(255, 255, 255)
        
        # Direct comparison of RGBColor objects
        if bg_color != default_color:
            # Add a background shape that covers the entire slide
            left = top = 0
            width = Inches(SLIDE_WIDTH_INCHES)
            height = Inches(SLIDE_HEIGHT_INCHES)
            
            # Create a rectangle that covers the entire slide
            bg_shape = current_slide.shapes.add_shape(
                MSO_SHAPE.RECTANGLE, 
                left, top, width, height
            )
            bg_shape.fill.solid()
            bg_shape.fill.fore_color.rgb = bg_color
            
            # No border
            bg_shape.line.width = 0
            
            # Send to back so it doesn't cover other content
            try:
                # Get all shapes and reorder
                shapes = list(current_slide.shapes)
                if len(shapes) > 1:
                    # Move background to first position (back)
                    last_idx = len(shapes) - 1
                    bg_idx = last_idx  # Assuming it's the last one added
                    
                    try:
                        current_slide.shapes._spTree.remove(bg_shape._element)
                        current_slide.shapes._spTree.insert(0, bg_shape._element)
                        print(f"Successfully applied {get_color_name(bg_color)} background to slide")
                    except:
                        print(f"Could not reorder slide background but color was applied")
            except:
                print(f"Applied {get_color_name(bg_color)} background, but couldn't reorder it")
    except Exception as e:
        # If background color application fails, log it but don't crash
        print(f"Warning: Could not apply slide background color: {e}")

def get_color_name(color):
    """Get a color name from an RGBColor object by comparing values"""
    # Map RGB tuples to color names
    color_map_reverse = {
        (255, 200, 200): "red",
        (200, 200, 255): "blue", 
        (200, 255, 200): "green",
        (255, 255, 200): "yellow",
        (255, 225, 180): "orange",
        (230, 200, 255): "purple",
        (220, 220, 220): "grey",
        (255, 200, 230): "pink",
        (180, 240, 240): "teal"
    }
    
    # Try to find the color by direct comparison
    for rgb_tuple, name in color_map_reverse.items():
        rgb_color = RGBColor(*rgb_tuple)
        if str(color.rgb) == str(rgb_color.rgb):
            return name
    
    # If no match, return generic description
    return "custom"

def html_from_file_to_pptx(html_file, output_file="presentation.pptx"):
    """
    Process HTML file and convert to PowerPoint
    
    Args:
        html_file (str): Path to HTML file
        output_file (str): Path to save PowerPoint file
    """
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage
if __name__ == "__main__":
    # Default file names
    html_file = "sample1.html"          # Default input HTML file
    output_file = "presentation.pptx"   # Default output PowerPoint file
    
    # Check for command line arguments, ignoring Jupyter kernel arguments
    args = [arg for arg in sys.argv[1:] if not arg.startswith('--')]
    
    if len(args) > 0:
        html_file = args[0]
    if len(args) > 1:
        output_file = args[1]
    
    # Try to read the HTML file
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
            
        # Convert HTML to PowerPoint
        html_to_pptx(html_content, output_file)
        print(f"Successfully converted {html_file} to {output_file}")
        
    except FileNotFoundError:
        print(f"File not found: {html_file}")
        print("Please ensure the HTML file exists or specify the correct path.")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
    except Exception as e:
        print(f"Error: {e}")
        print("Usage: python html_to_pptx.py <html_file> [output_pptx]")
        #FINAL  you can push it to git hub final

Presentation saved as presentation.pptx
Successfully converted sample1.html to presentation.pptx
