# Explore `epub` file format

Epub is a zipped folder containing individual `xhtml` files and metadata explaining how to order and render the content files.

In [14]:
from pathlib import Path
import os

books_dir = Path(os.getcwd()) / "books"
test_book = books_dir / "Feeding the Machine.epub"

In [17]:
import zipfile

def explore_epub_structure(epub_path):
    with zipfile.ZipFile(epub_path, 'r') as epub:
        # List all files in the EPUB
        file_list = epub.namelist()
        
        print("EPUB Structure:")
        print("-" * 40)
        
        # Group files by directory for cleaner display
        dirs = {}
        for file_path in file_list:
            dir_name = os.path.dirname(file_path) or "root"
            if dir_name not in dirs:
                dirs[dir_name] = []
            dirs[dir_name].append(os.path.basename(file_path))
        
        for dir_name, files in sorted(dirs.items()):
            print(f"\n{dir_name}/")
            for file in sorted(files):
                print(f"  {file}")
        
        return file_list

In [18]:
explore_epub_structure(test_book)

EPUB Structure:
----------------------------------------

META-INF/
  container.xml

OEBPS/
  9781639734979.opf
  toc.ncx

OEBPS/images/
  9781639734979.jpg
  pg238.jpg
  pgi.jpg
  pgiii.jpg

OEBPS/styles/
  stylesheet.css

OEBPS/xhtml/
  acknowledgments.xhtml
  authorsnote.xhtml
  chapter1.xhtml
  chapter2.xhtml
  chapter3.xhtml
  chapter4.xhtml
  chapter5.xhtml
  chapter6.xhtml
  chapter7.xhtml
  chapter8.xhtml
  conclusion.xhtml
  contents.xhtml
  copyright.xhtml
  cover.xhtml
  extration.xhtml
  halftitle.xhtml
  nav.xhtml
  notes.xhtml
  title.xhtml

root/
  mimetype


['mimetype',
 'OEBPS/images/pgi.jpg',
 'OEBPS/xhtml/contents.xhtml',
 'OEBPS/xhtml/chapter8.xhtml',
 'OEBPS/xhtml/notes.xhtml',
 'OEBPS/xhtml/chapter5.xhtml',
 'OEBPS/xhtml/chapter4.xhtml',
 'OEBPS/xhtml/chapter1.xhtml',
 'OEBPS/xhtml/authorsnote.xhtml',
 'OEBPS/xhtml/halftitle.xhtml',
 'OEBPS/styles/stylesheet.css',
 'OEBPS/images/9781639734979.jpg',
 'OEBPS/xhtml/extration.xhtml',
 'OEBPS/xhtml/chapter7.xhtml',
 'OEBPS/images/pgiii.jpg',
 'META-INF/container.xml',
 'OEBPS/toc.ncx',
 'OEBPS/xhtml/nav.xhtml',
 'OEBPS/9781639734979.opf',
 'OEBPS/xhtml/copyright.xhtml',
 'OEBPS/xhtml/acknowledgments.xhtml',
 'OEBPS/xhtml/cover.xhtml',
 'OEBPS/xhtml/chapter2.xhtml',
 'OEBPS/xhtml/title.xhtml',
 'OEBPS/images/pg238.jpg',
 'OEBPS/xhtml/chapter3.xhtml',
 'OEBPS/xhtml/conclusion.xhtml',
 'OEBPS/xhtml/chapter6.xhtml']