# Chapter 33: Shutil and Tempfile

This notebook covers Python's `shutil` module for high-level file and directory operations and the `tempfile` module for creating temporary files and directories. Together they provide the tools needed for safe, portable file management.

## Key Concepts
- **`shutil.copy()`** / **`shutil.copy2()`**: Copy files (with or without metadata)
- **`shutil.copytree()`**: Recursively copy entire directory trees
- **`shutil.move()`**: Move or rename files and directories
- **`shutil.rmtree()`**: Recursively delete a directory tree
- **`shutil.disk_usage()`**: Query disk space (total, used, free)
- **`tempfile.NamedTemporaryFile`**: Create a temporary file with a name on disk
- **`tempfile.TemporaryDirectory`**: Create a temporary directory that auto-cleans

## Section 1: Copying Files with `shutil.copy()`

`shutil.copy(src, dst)` copies a file's content and permissions. `shutil.copy2()` additionally preserves metadata (timestamps). Both return the path to the destination.

In [None]:
import os
import shutil
import tempfile

with tempfile.TemporaryDirectory() as tmpdir:
    # Create a source file
    src: str = os.path.join(tmpdir, "source.txt")
    with open(src, "w") as f:
        f.write("hello")

    # Copy to a new file
    dst: str = os.path.join(tmpdir, "dest.txt")
    result: str = shutil.copy(src, dst)

    print(f"Source:      {os.path.basename(src)}")
    print(f"Destination: {os.path.basename(result)}")

    # Verify contents match
    with open(dst) as f:
        content: str = f.read()
    print(f"Content:     {content!r}")
    print(f"Files match: {content == 'hello'}")

In [None]:
import os
import shutil
import tempfile
import time

with tempfile.TemporaryDirectory() as tmpdir:
    src: str = os.path.join(tmpdir, "original.txt")
    with open(src, "w") as f:
        f.write("metadata test")

    # Small delay so timestamps differ if not preserved
    time.sleep(0.1)

    # copy() does NOT preserve modification time
    dst_copy: str = os.path.join(tmpdir, "via_copy.txt")
    shutil.copy(src, dst_copy)

    # copy2() DOES preserve modification time
    dst_copy2: str = os.path.join(tmpdir, "via_copy2.txt")
    shutil.copy2(src, dst_copy2)

    src_mtime: float = os.path.getmtime(src)
    copy_mtime: float = os.path.getmtime(dst_copy)
    copy2_mtime: float = os.path.getmtime(dst_copy2)

    print(f"Source mtime:      {src_mtime:.4f}")
    print(f"copy() mtime:      {copy_mtime:.4f}")
    print(f"copy2() mtime:     {copy2_mtime:.4f}")
    print(f"\ncopy2 preserved:   {abs(src_mtime - copy2_mtime) < 0.01}")

## Section 2: Copying Directory Trees with `shutil.copytree()`

`shutil.copytree(src, dst)` recursively copies an entire directory tree. The destination must not already exist (unless `dirs_exist_ok=True` is used).

In [None]:
import os
import shutil
import tempfile

with tempfile.TemporaryDirectory() as tmpdir:
    # Build a source tree
    src_dir: str = os.path.join(tmpdir, "project")
    os.makedirs(os.path.join(src_dir, "pkg"))
    with open(os.path.join(src_dir, "main.py"), "w") as f:
        f.write("print('hello')")
    with open(os.path.join(src_dir, "pkg", "utils.py"), "w") as f:
        f.write("def helper(): pass")

    # Copy the entire tree
    dst_dir: str = os.path.join(tmpdir, "project_backup")
    shutil.copytree(src_dir, dst_dir)

    # Verify the copy
    print("Copied tree contents:")
    for dirpath, dirnames, filenames in os.walk(dst_dir):
        level: int = dirpath.replace(dst_dir, "").count(os.sep)
        indent: str = "  " * level
        print(f"{indent}{os.path.basename(dirpath)}/")
        for fname in filenames:
            print(f"{indent}  {fname}")

    # Check a file in the copy
    copied_file: str = os.path.join(dst_dir, "pkg", "utils.py")
    print(f"\nCopied file exists: {os.path.exists(copied_file)}")

## Section 3: Moving and Renaming with `shutil.move()`

`shutil.move(src, dst)` moves a file or directory. It works across filesystems (unlike `os.rename()`).

In [None]:
import os
import shutil
import tempfile

with tempfile.TemporaryDirectory() as tmpdir:
    # Create a file and a subdirectory
    src: str = os.path.join(tmpdir, "report.txt")
    with open(src, "w") as f:
        f.write("quarterly results")

    archive_dir: str = os.path.join(tmpdir, "archive")
    os.makedirs(archive_dir)

    # Move the file into the archive directory
    dst: str = os.path.join(archive_dir, "report.txt")
    shutil.move(src, dst)

    print(f"Source exists:      {os.path.exists(src)}")
    print(f"Destination exists: {os.path.exists(dst)}")

    # Rename by moving to a new name
    renamed: str = os.path.join(archive_dir, "q1_report.txt")
    shutil.move(dst, renamed)
    print(f"\nRenamed file exists: {os.path.exists(renamed)}")
    print(f"Archive contents:    {os.listdir(archive_dir)}")

## Section 4: Removing Directory Trees with `shutil.rmtree()`

`shutil.rmtree(path)` recursively deletes a directory and all its contents. Unlike `os.rmdir()`, it works on non-empty directories.

In [None]:
import os
import shutil
import tempfile

with tempfile.TemporaryDirectory() as tmpdir:
    # Create a non-empty directory tree
    target: str = os.path.join(tmpdir, "build_output")
    os.makedirs(os.path.join(target, "assets", "images"))
    for name in ["index.html", "assets/style.css", "assets/images/logo.png"]:
        filepath: str = os.path.join(target, name)
        open(filepath, "w").close()

    print(f"Before rmtree: {os.path.isdir(target)}")

    # Count files before removal
    file_count: int = sum(len(files) for _, _, files in os.walk(target))
    print(f"Files in tree: {file_count}")

    # Remove the entire tree
    shutil.rmtree(target)
    print(f"After rmtree:  {os.path.exists(target)}")

## Section 5: Disk Usage with `shutil.disk_usage()`

`shutil.disk_usage(path)` returns a named tuple with `total`, `used`, and `free` disk space in bytes.

In [None]:
import shutil

# Query disk usage for the root filesystem
usage = shutil.disk_usage("/")

print(f"Total: {usage.total:>15,} bytes  ({usage.total / (1024**3):.1f} GB)")
print(f"Used:  {usage.used:>15,} bytes  ({usage.used / (1024**3):.1f} GB)")
print(f"Free:  {usage.free:>15,} bytes  ({usage.free / (1024**3):.1f} GB)")

# Calculate percentage used
pct_used: float = (usage.used / usage.total) * 100
print(f"\nDisk usage: {pct_used:.1f}%")

# All fields are positive
print(f"\ntotal > 0: {usage.total > 0}")
print(f"used > 0:  {usage.used > 0}")
print(f"free > 0:  {usage.free > 0}")

## Section 6: Temporary Files with `tempfile.NamedTemporaryFile`

`NamedTemporaryFile` creates a temporary file that has a visible name on the filesystem. By default it is deleted when closed; use `delete=False` to keep it.

In [None]:
import os
import tempfile

# NamedTemporaryFile with automatic cleanup (default)
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt") as f:
    f.write("temporary data")
    f.flush()  # Ensure data is written to disk
    temp_name: str = f.name
    print(f"Temp file name: {temp_name}")
    print(f"Exists inside context: {os.path.exists(temp_name)}")

print(f"Exists after context:  {os.path.exists(temp_name)}")

In [None]:
import os
import tempfile

# NamedTemporaryFile with delete=False for manual control
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
    f.write("persistent temp data")
    name: str = f.name

# File persists after closing
print(f"File: {name}")
print(f"Exists after close: {os.path.exists(name)}")

# Read it back
with open(name) as f:
    content: str = f.read()
print(f"Content: {content!r}")

# Clean up manually
os.unlink(name)
print(f"After unlink: {os.path.exists(name)}")

## Section 7: Temporary Directories with `tempfile.TemporaryDirectory`

`TemporaryDirectory` creates a temporary directory that is automatically deleted (including all its contents) when the context manager exits.

In [None]:
import os
import tempfile

# TemporaryDirectory auto-cleans everything inside it
with tempfile.TemporaryDirectory() as tmpdir:
    print(f"Temp dir: {tmpdir}")
    print(f"Is a directory: {os.path.isdir(tmpdir)}")

    # Create files and subdirectories inside it
    os.makedirs(os.path.join(tmpdir, "subdir"))
    with open(os.path.join(tmpdir, "data.txt"), "w") as f:
        f.write("will be cleaned up")
    with open(os.path.join(tmpdir, "subdir", "nested.txt"), "w") as f:
        f.write("also cleaned up")

    contents: list[str] = os.listdir(tmpdir)
    print(f"Contents: {contents}")
    saved_path: str = tmpdir

# Everything is gone after exiting the context
print(f"\nAfter exit, exists: {os.path.exists(saved_path)}")

In [None]:
import tempfile

# Customize the prefix and suffix of temp names
with tempfile.TemporaryDirectory(prefix="myapp_", suffix="_work") as tmpdir:
    print(f"Custom temp dir: {tmpdir}")

# tempfile.gettempdir() shows the default temp directory
print(f"System temp dir: {tempfile.gettempdir()}")

## Section 8: Practical Pattern -- Safe File Processing

A common pattern is to write to a temporary file first, then move it into place. This prevents partial writes from corrupting the target file if something fails.

In [None]:
import os
import shutil
import tempfile


def safe_write(filepath: str, content: str) -> None:
    """Write content to filepath atomically using a temp file."""
    directory: str = os.path.dirname(filepath)
    with tempfile.NamedTemporaryFile(
        mode="w", dir=directory, suffix=".tmp", delete=False
    ) as tmp:
        tmp.write(content)
        tmp_name: str = tmp.name
    # Move the temp file to the final destination
    shutil.move(tmp_name, filepath)


# Demonstrate the pattern
with tempfile.TemporaryDirectory() as tmpdir:
    target: str = os.path.join(tmpdir, "config.json")
    safe_write(target, '{"key": "value"}')

    with open(target) as f:
        print(f"Written content: {f.read()}")
    print(f"File exists: {os.path.exists(target)}")

In [None]:
import os
import shutil
import tempfile


def backup_and_update(filepath: str, new_content: str) -> str:
    """Create a backup of a file, then update it. Returns backup path."""
    backup_path: str = filepath + ".bak"
    if os.path.exists(filepath):
        shutil.copy2(filepath, backup_path)
    with open(filepath, "w") as f:
        f.write(new_content)
    return backup_path


with tempfile.TemporaryDirectory() as tmpdir:
    config: str = os.path.join(tmpdir, "settings.ini")

    # Create original file
    with open(config, "w") as f:
        f.write("version=1")

    # Update with backup
    backup: str = backup_and_update(config, "version=2")

    with open(config) as f:
        print(f"Current: {f.read()}")
    with open(backup) as f:
        print(f"Backup:  {f.read()}")
    print(f"\nFiles: {sorted(os.listdir(tmpdir))}")

## Summary

### `shutil` Module
- **`shutil.copy(src, dst)`**: Copy file content and permissions
- **`shutil.copy2(src, dst)`**: Copy file content, permissions, and metadata (timestamps)
- **`shutil.copytree(src, dst)`**: Recursively copy an entire directory tree
- **`shutil.move(src, dst)`**: Move or rename a file or directory (works across filesystems)
- **`shutil.rmtree(path)`**: Recursively delete a directory and all its contents
- **`shutil.disk_usage(path)`**: Returns named tuple with `total`, `used`, `free` bytes

### `tempfile` Module
- **`NamedTemporaryFile(mode, suffix, delete)`**: Temporary file with a visible name on disk
  - `delete=True` (default): removed on close
  - `delete=False`: persists until manually removed with `os.unlink()`
- **`TemporaryDirectory(prefix, suffix)`**: Temporary directory that auto-cleans on context exit
- **`tempfile.gettempdir()`**: Returns the system's default temporary directory

### Key Patterns
- Write to a temp file, then `shutil.move()` into place for atomic writes
- Use `TemporaryDirectory` as a context manager for scratch space in tests
- Use `copy2()` when you need to preserve file timestamps