In [56]:
from pptx import Presentation
from pptx.util import Inches
from pptx.enum.text import PP_ALIGN
from pptx.enum.shapes import MSO_SHAPE
from pptx.dml.color import RGBColor

In [113]:
path = 'Sample_Slides.pptx'

### Extracting information from sample slides

#### Working

In [107]:
## Ideal, but not for downloading all images properly 
def extract_images_info_from_pptx(pptx_file):
    images_info = []
    presentation = Presentation(pptx_file)
    
    # Iterate over each slide and extract the images and their properties
    for slide in presentation.slides:
        for shape in slide.shapes:
            if shape.shape_type == 13:  # Shape type for images
                picture = shape.image
                image_size = picture.size
                image_left = shape.left
                image_top = shape.top
                print(image_size,image_left, image_top )

                # Add image information to the list
                images_info.append({
                    'size': image_size,
                    'image_left': image_left,
                    'image_top': image_top
                })
    return images_info

In [108]:
# Extract images and their information from the .pptx file
images_information = extract_images_info_from_pptx(path)
print(images_information)

(2048, 1024) 4248177 56600
(2048, 1024) 4248177 56600
(825, 465) 2432 -951
(2048, 1024) 4248177 56600
[{'size': (2048, 1024), 'image_left': 4248177, 'image_top': 56600}, {'size': (2048, 1024), 'image_left': 4248177, 'image_top': 56600}, {'size': (825, 465), 'image_left': 2432, 'image_top': -951}, {'size': (2048, 1024), 'image_left': 4248177, 'image_top': 56600}]


In [109]:
import os
import zipfile

def extract_images_from_pptx(pptx_file):
    # Create a folder to store the extracted images
    images_folder = os.path.splitext(pptx_file)[0] + "_Images"
    os.makedirs(images_folder, exist_ok=True)

    with zipfile.ZipFile(pptx_file, 'r') as zip_ref:
        # Extract all files from the .pptx archive
        zip_ref.extractall(images_folder)

    images_path = images_folder + '/ppt/media'
    print(images_path)
    return images_path
    
# Extract images from the .pptx file
images_path = extract_images_from_pptx(path)

Sample_Slides_Images/ppt/media


In [110]:
def extract_text_information(shape):
    text_info = []

    if shape.has_text_frame:
        text_frame = shape.text_frame
        for paragraph in text_frame.paragraphs:
            for run in paragraph.runs:
                text_data = {
                    'typeface': run.font.name,
                    'color': run.font.color.theme_color,
                    'size': run.font.size.pt,
                    'alignment': paragraph.alignment,
                    'line_spacing': paragraph.line_spacing,
                    'space_after': paragraph.space_after,
                    'space_before': paragraph.space_before,
                    'text': paragraph.text,
                    'left_margin': text_frame.margin_left.inches,
                    'top_margin': text_frame.margin_top.inches,
                }
                text_info.append(text_data)

    return text_info

In [121]:
def extract_slide_information(presentation):
    slide_info = []

    for slide in presentation.slides:
        slide_data = {}

        # Extract font information from shapes (text boxes) on the slide
        for shape in slide.shapes:
            font_info = extract_text_information(shape)
            slide_data.setdefault('font_info', []).extend(font_info)

        # Extract image information from shapes on the slide
        image_info = extract_images_info_from_pptx(presentation)
        slide_data.setdefault('image_info', []).extend(image_info)
        slide_info.append(slide_data)

    return slide_info

In [122]:
# Load the PowerPoint presentation
presentation = Presentation(path)

# Extract properties from each slide
slide_properties = []
for slide in presentation.slides:
    slide_properties.append(extract_slide_information(presentation))

print(slide_properties)

AttributeError: 'Presentation' object has no attribute 'seek'

#### Not working

In [104]:
# Not sure if this works
from pptx import Presentation

def extract_slide_properties(slide):
    properties = {}

    for element in slide._element.iter():
        if element.tag.endswith("}cSld"):
            for child in element:
                properties[child.tag] = child.attrib

    return properties

# Load the PowerPoint presentation
presentation = Presentation(path)

# Extract properties of each slide
slide_properties = []
for slide in presentation.slides:
    slide_properties.append(extract_slide_properties(slide))

# Display the extracted properties
for i, properties in enumerate(slide_properties):
    print(f"Slide {i+1} properties:")
    for tag, attrib in properties.items():
        print(f"{tag}: {attrib}")
    print("\n")

Slide 1 properties:
{http://schemas.openxmlformats.org/presentationml/2006/main}bg: {}
{http://schemas.openxmlformats.org/presentationml/2006/main}spTree: {}


Slide 2 properties:
{http://schemas.openxmlformats.org/presentationml/2006/main}bg: {}
{http://schemas.openxmlformats.org/presentationml/2006/main}spTree: {}


Slide 3 properties:
{http://schemas.openxmlformats.org/presentationml/2006/main}bg: {}
{http://schemas.openxmlformats.org/presentationml/2006/main}spTree: {}




In [105]:
# Not sure if this works
import os
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt

def recreate_slides(xml_files):
    presentation = Presentation()

    for xml_file in xml_files:
        slide = presentation.add_slide()  # Add a new slide

        # Set properties based on the XML file
        # Example: Set title based on the XML file name
        title = xml_file.replace(".xml", "")
        slide.shapes.title.text = title

        # Add additional property settings based on the XML file content
        # Example: Set text based on the XML file content
        with open(xml_file, "r") as f:
            text = f.read()

        # Create a text box shape and set its properties
        left = top = width = height = Inches(1)  # Adjust the positioning and size as needed
        textbox = slide.shapes.add_textbox(left, top, width, height)
        textbox.text_frame.word_wrap = True
        textbox.text_frame.text = text

        # Set font properties of the text
        paragraph = textbox.text_frame.paragraphs[0]
        run = paragraph.add_run()
        run.font.name = "Arial"  # Set the desired font
        run.font.size = Pt(12)  # Set the desired font size

        # Example: Set alignment of the paragraph
        paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER  # Adjust the alignment as needed

    return presentation

# Specify the path to the extracted XML files
xml_files = ["path/to/xml/file1.xml", "path/to/xml/file2.xml", "path/to/xml/file3.xml"]

# Recreate the presentation slides
recreated_presentation = recreate_slides(xml_files)

# Save the recreated presentation
recreated_presentation.save("recreated_presentation.pptx")

AttributeError: 'Presentation' object has no attribute 'add_slide'

#### Create slides based on attributes

In [None]:
# Function to create a song slide with lyrics
def create_song_slide(prs, lyrics, font_size, font_color, image_path):
    slide_layout = prs.slide_layouts[1]  # Assuming a content slide layout
    slide = prs.slides.add_slide(slide_layout)

    # Set slide background to black
    background = slide.background
    fill = background.fill
    fill.solid()
    fill.fore_color.rgb = RGBColor(0, 0, 0)

    # Add lyrics text box
    left = Inches(1)
    top = Inches(1)
    width = Inches(8)
    height = Inches(6)
    txBox = slide.shapes.add_textbox(left, top, width, height)
    tf = txBox.text_frame
    tf.word_wrap = True
    tf.auto_size = True
    p = tf.add_paragraph()
    p.text = lyrics

    # Apply font properties
    p.font.name = "Arial"  # Replace with the desired font name
    #p.font.size = Pt(font_size)
    p.font.color.rgb = RGBColor(*font_color)  # Assuming font_color is a tuple (R, G, B)

    # Add image at the center
    img = slide.shapes.add_picture(image_path, Inches(3), Inches(3))
    img.left = int((prs.slide_width - img.width) / 2)
    img.top = int((prs.slide_height - img.height) / 2)

    # Set image alignment
    img.vertical_anchor = MSO_VERTICAL_ANCHOR.BOTTOM  # Adjust as needed
    img.horizontal_anchor = MSO_HORIZONTAL_ANCHOR.CENTER  # Adjust as needed

    # Add transition effect
    slide.slide_transition.transition = PP_TRANSITION.FADE  # Adjust as needed
    slide.slide_transition.transition_speed = PP_TRANSITION_SPEED.SLOW  # Adjust as needed

In [None]:
# Function to process line and separate English and Korean lyrics
def process_lyrics_line(line):
    # Split the line into English and Korean lyrics (assuming they are separated by a delimiter like '|')
    parts = line.split('|')
    english_lyrics = parts[0].strip() if len(parts) > 0 else ''
    korean_lyrics = parts[1].strip() if len(parts) > 1 else ''
    return english_lyrics, korean_lyrics

In [None]:
# Create a new PowerPoint presentation
prs = Presentation()

# Set default slide properties
prs.slide_width = Inches(10)
prs.slide_height = Inches(7.5)

# Slide 1: Oscar band logo
slide_layout = prs.slide_layouts[0]  # Assuming a title slide layout
slide = prs.slides.add_slide(slide_layout)
img_path = 'path_to_oscar_band_logo_image'
slide.shapes.add_picture(img_path, Inches(1), Inches(1))

# Slide 2: Empty slide before concert starts
slide_layout = prs.slide_layouts[6]  # Assuming a blank slide layout
prs.slides.add_slide(slide_layout)

# Slide 3: Empty slide before concert starts
slide_layout = prs.slide_layouts[6]  # Assuming a blank slide layout
prs.slides.add_slide(slide_layout)

# Read lyrics from the text file
with open('path_to_lyrics_text_file', 'r', encoding='utf-8') as file:
    lyrics_lines = file.readlines()

# Iterate through the lyrics lines and create slides
for line in lyrics_lines:
    line = line.strip()
    if line:
        # Process line to separate English and Korean lyrics
        # Assign font size, color, and image path accordingly

        # Create song slide
        create_song_slide(presentation, lyrics, font_size, font_color, image_path)

# Slide n: Survey slide
# Create a function to create a survey slide with the desired properties
# Call the function to create the survey slide

# Slide n+1: QR code slide
# Create a function to create a QR code slide with the desired properties
# Call the function to create the QR code slide

# Save the presentation
presentation.save('output_presentation.pptx')