#! /usr/bin/env python
Conversion of mboxes of
1. Removes specified headers from mailbox archives and not provide extra information to outsiders.
2. Remove certain messages specified my Message-IDs.
from __future__ import with_statement
import re
import sys
import mailbox
HEADERS = ('From',
# It might make sense to keep these headers as well.
possible_HEADERS = ('CC',
def main(mbox_file):
out_mbox = '/tmp/testdir/' + mbox_file + '.converted'
mbox = open(out_mbox, 'w')
with open(mbox_file) as f:
# Set the initial markers. Follow the code and the markers will
# illustrate their respective purpose.
stop = False
start_h = False
first_add = True
header = True
for line in f:
# If a line starts with a blank space or a '\t', output it provided
# the line is a header. This excludes lines within the message body.
if line.startswith((' ', '\t')):
if start_h:
if header:
print >>mbox, line,
start_h = False
# If a line starts with one of the headers specified.
if line.startswith(HEADERS):
if header:
start_h = True
stop = False
first_add = True
print >>mbox, line,
# Headers have ended, now start with the message content.
if stop:
header = False
if first_add:
print >>mbox
first_add = False
print >>mbox, line,
# Mark the headers to stop and start with the message body.
if line == '\n':
stop = True
header = True
start_h = False
print 'Headers stripped from mbox'
# Now open the converted mbox archive to delete messages with the
# Message-IDs specified in the file messageid. If the file is not
# found at this point, simply exit.
msg_ids = []
with open(mbox_file + '.spam') as f:
for line in f.readlines():
msg_ids.append(re.split('\s+', line.strip())[1].strip('<>'))
except IOError:
sys.exit('Error: messageid file not found in current working directory')
# Write the changes after matching the respective Message-IDs.
mbox = mailbox.mbox(out_mbox)
counter = 0
for key, msg in mbox.iteritems():
msg_id_raw = msg['Message-ID']
if msg_id_raw is not None:
msg_id = msg_id_raw.strip('<>')
if msg_id in msg_ids:
counter += 1
print 'Deleted %d messages' % counter
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('Usage: %s <mbox>' % (sys.argv[0]))
mbox_file = sys.argv[1]