In [None]:
from email import policy
from email.generator import Generator
from email.message import Message
from email.parser import Parser
from pathlib import Path
from tempfile import gettempdir
from typing import Iterable

import libratom
import pypff
from libratom.lib import MboxArchive
from libratom.lib.base import Archive
from libratom.lib.core import open_mail_archive

In [None]:
libratom.__version__

### Input file and output folder
(Edit as necessary)

In [None]:
# Relative paths for mybinder
pst_file = Path('RevisedEDRMv1_Complete/andrea_ring/andrea_ring_000_1_1.pst')

output_folder = Path(f'{pst_file.stem}_eml_files')
output_folder.mkdir(parents=True, exist_ok=True)

### List of file IDs
(Edit as necessary)

In [None]:
# Export messages for an arbitrary list of IDs
# message_ids = [2127428, 2127460, 2127492, 2127524]

# Export all messages
with open_mail_archive(pst_file) as archive:
    message_ids = [message.identifier for message in archive.messages()]

### Utility functions

In [None]:
def pff_msg_to_string(message: pypff.message) -> str:
    """
    Serializes a pff.message object to a string
    """

    headers = message.transport_headers or ""
    body = message.plain_text_body or ""
    
    if isinstance(body, bytes):
        body = str(body, encoding="utf-8", errors="replace")

    return f"{headers.strip()}\r\n\r\n{body.strip()}"

In [None]:
def extract_message_from_archive(archive: Archive, msg_id: int) -> Message:
    """
    Extracts a message from an open Archive object
    """
    
    msg = archive.get_message_by_id(msg_id)
    
    # mbox archive
    if isinstance(archive, MboxArchive):
        return msg

    # pst archive
    return Parser(policy=policy.default).parsestr(pff_msg_to_string(msg))
    

In [None]:
def export_messages_from_file(src_file: Path, msg_ids: Iterable[int], dest_folder: Path = Path.cwd()) -> None:
    """
    Writes .eml files in a destination directory given a mailbox file (PST or mbox) and a list of message IDs
    """
    
    with open_mail_archive(src_file) as archive:
        for id in msg_ids:
            msg = extract_message_from_archive(archive, id)
            
            with (dest_folder / f'{id}.eml').open(mode='w') as eml_file:
                Generator(eml_file).flatten(msg)

### Test run

In [None]:
export_messages_from_file(pst_file, message_ids, output_folder)