In [None]:
%pip install python-pptx


In [1]:
from pptx import Presentation

prs = Presentation()
title_slide_layout = prs.slide_layouts[0]
slide = prs.slides.add_slide(title_slide_layout)
title = slide.shapes.title
subtitle = slide.placeholders[1]

title.text = "Hello, World!"
subtitle.text = "python-pptx was here!"

prs.save('test.pptx')

In [2]:
from pptx import Presentation

prs = Presentation()
bullet_slide_layout = prs.slide_layouts[1]

slide = prs.slides.add_slide(bullet_slide_layout)
shapes = slide.shapes

title_shape = shapes.title
body_shape = shapes.placeholders[1]

title_shape.text = 'Adding a Bullet Slide'

tf = body_shape.text_frame
tf.text = 'Find the bullet slide layout'

p = tf.add_paragraph()
p.text = 'Use _TextFrame.text for first bullet'
p.level = 1

p = tf.add_paragraph()
p.text = 'Use _TextFrame.add_paragraph() for subsequent bullets'
p.level = 2

prs.save('test.pptx')

In [3]:
prs = Presentation('test.pptx')
prs.save('new-file-name.pptx')

In [4]:
f = open('test.pptx')
prs = Presentation(f)
f.close()

# or

with open('test.pptx') as f:
    source_stream = StringIO(f.read())
prs = Presentation(source_stream)
source_stream.close()
...
target_stream = StringIO()
prs.save(target_stream)

BadZipFile: File is not a zip file

In [5]:
from pptx import Presentation
from io import BytesIO

# Option 1: direct open
with open("test.pptx", "rb") as f:
    prs = Presentation(f)

# ... work with prs ...

with open("newfile.pptx", "wb") as f:
    prs.save(f)


# Option 2: copy to BytesIO
with open("test.pptx", "rb") as f:
    source_stream = BytesIO(f.read())
prs = Presentation(source_stream)

target_stream = BytesIO()
prs.save(target_stream)


In [6]:
for shape in slide.shapes:
    if not shape.has_text_frame:
        continue
    text_frame = shape.text_frame

In [7]:
text_frame

<pptx.text.text.TextFrame at 0x2584a014790>

In [8]:
print(text_frame)

<pptx.text.text.TextFrame object at 0x000002584A014790>


In [29]:
from pptx import Presentation

# Load an existing PowerPoint
prs = Presentation("test.pptx")
newtext=""
# Loop over all slides
for i, slide in enumerate(prs.slides, start=1):
    print(f"\n--- Slide {i} ---")
    newtext += f"\n--- Slide {i} ---"

    # Loop over shapes (text boxes, titles, placeholders, etc.)
    for shape in slide.shapes:
        if shape.has_text_frame:  # Only shapes that can contain text
            for paragraph in shape.text_frame.paragraphs:
                # Collect text from each run in the paragraph
                text = "".join(run.text for run in paragraph.runs)
                print(text)
                newtext += f"\n{text}"
print(newtext)


--- Slide 1 ---
Adding a Bullet Slide
Find the bullet slide layout
Use _TextFrame.text for first bullet
Use _TextFrame.add_paragraph() for subsequent bullets

--- Slide 1 ---
Adding a Bullet Slide
Find the bullet slide layout
Use _TextFrame.text for first bullet
Use _TextFrame.add_paragraph() for subsequent bullets


In [10]:
from pptx import Presentation

def extract_text_as_string(path):
    prs = Presentation(path)
    all_text = []

    for i, slide in enumerate(prs.slides, start=1):
        slide_text = [f"--- Slide {i} ---"]
        for shape in slide.shapes:
            if shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    text = "".join(run.text for run in paragraph.runs)
                    if text.strip():  # skip empty lines
                        slide_text.append(text)
        all_text.append("\n".join(slide_text))
    
    return "\n\n".join(all_text)

text_output = extract_text_as_string("test.pptx")
print(text_output)


--- Slide 1 ---
Adding a Bullet Slide
Find the bullet slide layout
Use _TextFrame.text for first bullet
Use _TextFrame.add_paragraph() for subsequent bullets


In [11]:
from pptx import Presentation
import json

def extract_text_as_dict(path):
    prs = Presentation(path)
    slides_data = {}

    for i, slide in enumerate(prs.slides, start=1):
        slide_key = f"Slide {i}"
        slide_texts = []
        for shape in slide.shapes:
            if shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    text = "".join(run.text for run in paragraph.runs)
                    if text.strip():
                        slide_texts.append(text)
        slides_data[slide_key] = slide_texts
    
    return slides_data

slides_dict = extract_text_as_dict("test.pptx")

# Pretty print JSON
print(json.dumps(slides_dict, indent=2))


{
  "Slide 1": [
    "Adding a Bullet Slide",
    "Find the bullet slide layout",
    "Use _TextFrame.text for first bullet",
    "Use _TextFrame.add_paragraph() for subsequent bullets"
  ]
}


In [12]:
# %pip install python-pptx

import os, json, glob
from pptx import Presentation

def extract_slide_texts(pptx_path):
    prs = Presentation(pptx_path)
    slides = []
    for i, slide in enumerate(prs.slides, start=1):
        lines = []
        for shape in slide.shapes:
            if shape.has_text_frame:
                for para in shape.text_frame.paragraphs:
                    text = "".join(run.text for run in para.runs).strip()
                    if text:
                        lines.append(text)
        slides.append({"slide_index": i, "lines": lines})
    return slides

def slides_to_string(slides):
    """Convert list of slide dicts into one string for LLM."""
    chunks = []
    for s in slides:
        if s["lines"]:
            chunks.append(f"Slide {s['slide_index']}:\n" + "\n".join(s["lines"]))
    return "\n\n".join(chunks)

In [32]:
extract_slide_texts("test.pptx")

[{'slide_index': 1,
  'lines': ['Adding a Bullet Slide',
   'Find the bullet slide layout',
   'Use _TextFrame.text for first bullet',
   'Use _TextFrame.add_paragraph() for subsequent bullets']}]

In [13]:
def extract_many(pptx_paths, out_jsonl="slides.jsonl"):
    """
    Writes one JSON object per line with:
    {
      "deck_path": "...",
      "deck_name": "...",
      "slide_index": 1,
      "lines": ["...", "..."],
      "deck_text": "whole deck concatenated (for deck-level summary)"
    }
    """
    with open(out_jsonl, "w", encoding="utf-8") as f:
        for path in pptx_paths:
            slides = extract_slide_texts(path)
            deck_text = "\n".join(
                "\n".join(s["lines"]) for s in slides if s["lines"]
            ).strip()
            deck_name = os.path.basename(path)
            for s in slides:
                rec = {
                    "deck_path": path,
                    "deck_name": deck_name,
                    "slide_index": s["slide_index"],
                    "lines": s["lines"],
                    "deck_text": deck_text
                }
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
                print(rec)



In [14]:
# Example: all PPTX files in a folder
pptx_files = glob.glob("presentations/**/*.pptx", recursive=True) or glob.glob("*.pptx")
extract_many(pptx_files, out_jsonl="slides.jsonl")
print(f"Wrote slide records to slides.jsonl (one line per slide).")


{'deck_path': 'new-file-name.pptx', 'deck_name': 'new-file-name.pptx', 'slide_index': 1, 'lines': ['Adding a Bullet Slide', 'Find the bullet slide layout', 'Use _TextFrame.text for first bullet', 'Use _TextFrame.add_paragraph() for subsequent bullets'], 'deck_text': 'Adding a Bullet Slide\nFind the bullet slide layout\nUse _TextFrame.text for first bullet\nUse _TextFrame.add_paragraph() for subsequent bullets'}
{'deck_path': 'newfile.pptx', 'deck_name': 'newfile.pptx', 'slide_index': 1, 'lines': ['Adding a Bullet Slide', 'Find the bullet slide layout', 'Use _TextFrame.text for first bullet', 'Use _TextFrame.add_paragraph() for subsequent bullets'], 'deck_text': 'Adding a Bullet Slide\nFind the bullet slide layout\nUse _TextFrame.text for first bullet\nUse _TextFrame.add_paragraph() for subsequent bullets'}
{'deck_path': 'test.pptx', 'deck_name': 'test.pptx', 'slide_index': 1, 'lines': ['Adding a Bullet Slide', 'Find the bullet slide layout', 'Use _TextFrame.text for first bullet', 'Use

In [15]:
system_prompt = "You are an assistant that analyzes the contents of several pptx files \
and creates a short humorous, entertaining summary. Respond in markdown.\
Include details of the number of pages and text format and shapes"

In [16]:
import json

with open("slides.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        record = json.loads(line)  # parse JSON from one line
        print(record)


{'deck_path': 'new-file-name.pptx', 'deck_name': 'new-file-name.pptx', 'slide_index': 1, 'lines': ['Adding a Bullet Slide', 'Find the bullet slide layout', 'Use _TextFrame.text for first bullet', 'Use _TextFrame.add_paragraph() for subsequent bullets'], 'deck_text': 'Adding a Bullet Slide\nFind the bullet slide layout\nUse _TextFrame.text for first bullet\nUse _TextFrame.add_paragraph() for subsequent bullets'}
{'deck_path': 'newfile.pptx', 'deck_name': 'newfile.pptx', 'slide_index': 1, 'lines': ['Adding a Bullet Slide', 'Find the bullet slide layout', 'Use _TextFrame.text for first bullet', 'Use _TextFrame.add_paragraph() for subsequent bullets'], 'deck_text': 'Adding a Bullet Slide\nFind the bullet slide layout\nUse _TextFrame.text for first bullet\nUse _TextFrame.add_paragraph() for subsequent bullets'}
{'deck_path': 'test.pptx', 'deck_name': 'test.pptx', 'slide_index': 1, 'lines': ['Adding a Bullet Slide', 'Find the bullet slide layout', 'Use _TextFrame.text for first bullet', 'Use

In [17]:
def get_brochure_user_prompt(path):
    user_prompt = f"You are looking at a folder with pptx presentations.\n"
    user_prompt += f"Here are the contents of its slides; use this information to build a short summary in markdown.\n"
    slides = extract_slide_texts(path)
    user_prompt += slides_to_string(slides)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [18]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [19]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [20]:
def pptx_summary(path):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(path)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [22]:
pptx_summary("test.pptx")

# Presentation Summary: "Bulletproof Your Slides!"

**Total Pages:** 1  
**Text Format:** Standard text for instructions  
**Shapes:** A lone slide - resembling a single bullet waiting for action!

---

In this compact but mighty presentation, we dive into the art of bullet-point ninja tactics! 🌟 The singular slide teaches the essential tricks of the trade for presenting information clearly and concisely. 

- **Step 1:** Hunt down the elusive bullet slide layout. (Seriously, it's not hiding under your couch!)
- **Step 2:** Use `_TextFrame.text` for your first bullet — as if you’re picking the first strawberry off the bush!
- **Step 3:** For all the subsequent bullets, unleash your creativity with `_TextFrame.add_paragraph()`. Who knew adding content could be so exciting!?

One slide is all you need to transform your presentations from 'blah' to 'ta-da!' ✨ Just remember, every bullet point should pack a punch — after all, it’s a bullet slide, not a marshmallow slide!

In [None]:
pptx_summary("Malala_Yousafzai.pptx")

In [14]:
pptx_summary("Shakespeare_Highlights.pptx")

# Shakespeare: The Bard with a Quill

## Overview
This delightful slide show contains **4 pages** packed with insights about the life and times of William Shakespeare, a guy who was basically the **Taylor Swift of the 16th century** (minus the hair flips and Instagram). 

### Slide Breakdown

- **Slide 1: Early Life**
  - Text: Bullet points summarizing his birth, family, education, marriage, and his efforts to keep up with child support (3 children, folks!).
  - Total Shapes: 1 main textbox with cleverly formatted bullets.
  - Fun Fact: Born in Stratford-upon-Avon, where "Shakespeare" was probably the most-used surname after "Glove-maker."

- **Slide 2: Career Beginnings**
  - Text: Highlights of his move to London, gaining fame, and co-founding an acting company (no pressure, right?). 
  - Total Shapes: 1 main textbox with bullets and a side note about his early works looking a little too much like sad poetry.
  - Notable Moment: Launched his career with *Venus and Adonis*—the title sounds romantic, but don't be fooled!

- **Slide 3: Major Works**
  - Text: A rundown of plays, sonnets, and poems, categorized like Tinder interests.
  - Total Shapes: 1 core textbox with classified genres. 
  - Comedic Gold: Listed tragedies help you figure out who should definitely *not* get invited to your party!

- **Slide 4: Later Life & Legacy**
  - Text: Summarizes his retirement, death, and how he managed to become an immortal star in the literary cosmos.
  - Total Shapes: 1 textbox that manages to pack in his burial place and a global legacy.
  - Mic Drop: The line about his works being translated into every major language is just an indication that he was the original influencer. 

### Conclusion
In summary, this presentation was a splendid tour through Shakespeare's life, highlighting the ups and downs while reminding us that even in the 16th century, writing about tragic love was all the rage. So, raise a glass to the man who gave us “To be or not to be!”—a deep question that currently has us all just contemplating dinner choices. 🍽️🎭

In [15]:
pptx_summary("Nelson_Mandela.pptx")

# A Journey through the Life of Nelson Mandela: A PPTX Extravaganza!

**Total Pages:** 4  
**Text Format:** Classic Black & White (because why let colors get in the way of a historical narrative?!)  
**Shapes Used:** Probably rectangles for text boxes, because let’s keep it simple, folks!

---

**Slide 1: Introduction**
- **Title:** "Nelson Mandela Highlights of a Notable Life"  
  (You just know this guy is going places!)

**Slide 2: Early Life** 
- **Fun Facts:**  
  - Born in 1918, which means he was around for the invention of sliced bread! (Not that it mattered, given the apartheid crisis.)
  - Studied law—clearly preparing for a future with more than a few courtroom dramas!

**Slide 3: Major Achievements**
- **Anticipation Building:** 
  - Activist extraordinaire—imprisoned for *27 years*! (Talk about a long timeout!)
  - Became South Africa’s first Black president in 1994. If that’s not the ultimate comeback story, what is?

**Slide 4: Legacy & Impact**
- **Something to Think About:** 
  - The ultimate symbol of peace, reconciliation, and justice—like a superhero, but without the cape! 
  - Nobel Peace Prize? Check! He inspired human rights movements worldwide! (Pretty sure he has a "World’s Best Human" mug somewhere.)

---

And there you have it—a heartwarming and slightly amusing recap of Nelson Mandela’s life, reminding us that one man can indeed make a world of difference, all while keeping it stylishly succinct! 🌍💫

In [None]:

def pptx_summary(path):
    MODEL = "llama3.2"
    ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
    
    response = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(path)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [None]:
pptx_summary("Shakespeare_Highlights.pptx")