Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 510 lines (438 sloc) 17.4 KB
#!/usr/bin/env python
cleanup-maildir [OPTION].. COMMAND FOLDERNAME..
Cleans up old messages in FOLDERNAME; the exact action taken
depends on COMMAND. (See next section.)
Note that FOLDERNAME is a name such as 'Drafts', and the
corresponding maildir path is determined using the values of
maildir-root, folder-prefix, and folder-seperator.
archive - move old messages to subfolders based on message date
trash - move old message to trash folder
delete - permanently delete old messages
-h, --help
Show this help.
-q, --quiet
Suppress normal output.
-v, --verbose
Output extra information for testing.
-n, --trial-run
Do not actually touch any files; just say what would be done.
-a, --age=N
Only touch messages older than N days. Default is 14 days.
-k, --keep-flagged-threads
If any messages in a thread are flagged, do not touch them or
any other messages in that thread.
Note: the thread-detection mechanism is currently base purely on
a message's subject. The In-Reply-To header is not currently used.
-r, --keep-read
If any messages are flagged as READ, do not touch them.
-t, --trash-folder=F
Use F as trash folder when COMMAND is 'trash'.
Default is 'Trash'.
Use F as the base for constructing archive folders. For example, if F is
'Archive', messages from 2004 might be put in the folder 'Archive.2004'.
-d, --archive-hierarchy-depth=N
Specify number of subfolders in archive hierarchy; 1 is just
the year, 2 is year/month (default), 3 is year/month/day.
Specifies folder that contains mail folders.
Default is '$HOME/Maildir'.
Folder hierarchy seperator. Default is '.'
Folder prefix. Default is '.'
The following form is accepted for backwards compatibility, but is deprecated:
cleanup-maildir --mode=COMMAND [OPTION].. FOLDERNAME..
# Archive messages in 'Sent Items' folder over 30 days old
cleanup-maildir --age=30 archive 'Sent Items'
# Delete messages over 2 weeks old in 'Lists/debian-devel' folder,
# except messages that are part of a thread containing a flagged message.
cleanup-maildir --keep-flagged-threads trash 'Lists.debian-devel'
__version__ = "0.2.3"
# $Id$
# $URL$
import mailbox
import os.path
import os
import rfc822
import string
import socket
import time
import logging
import sys
import getopt
def mkMaildir(path):
"""Make a Maildir structure rooted at 'path'"""
os.mkdir(path, 0700)
os.mkdir(os.path.join(path, 'tmp'), 0700)
os.mkdir(os.path.join(path, 'new'), 0700)
os.mkdir(os.path.join(path, 'cur'), 0700)
def get_flname(obj):
if sys.version_info[0] <= 2 and sys.version_info[1] < 5:
class MaildirWriter(object):
"""Deliver messages into a Maildir"""
path = None
counter = 0
def __init__(self, path=None):
"""Create a MaildirWriter that manages the Maildir at 'path'
path -- if specified, used as the default Maildir for this object
if path != None:
if not os.path.isdir(path):
raise ValueError, 'Path does not exist: %s' % path
self.path = path
self.logger = logging.getLogger('MaildirWriter')
def deliver(self, msg, path=None):
"""Deliver a message to a Maildir
msg -- a message object
path -- the path of the Maildir; if None, uses default from __init__
if path != None:
self.path = path
if self.path == None or not os.path.isdir(self.path):
raise ValueError, 'Path does not exist'
tryCount = 1
srcFile = get_flname(msg.fp)
(dstName, tmpFile, newFile, dstFile) = (None, None, None, None)
while 1:
dstName = "%d.%d_%d.%s" % (int(time.time()), os.getpid(),
self.counter, socket.gethostname())
tmpFile = os.path.join(os.path.join(self.path, "tmp"), dstName)
newFile = os.path.join(os.path.join(self.path, "new"), dstName)
self.logger.debug("deliver: attempt copy %s to %s" %
(srcFile, tmpFile)), tmpFile) # Copy into tmp
self.logger.debug("deliver: attempt link to %s" % newFile), newFile) # Link into new
except OSError, (n, s):
"deliver failed: %s (src=%s tmp=%s new=%s i=%d)" %
(s, srcFile, tmpFile, newFile, tryCount))"sleeping")
tryCount += 1
self.counter += 1
if tryCount > 10:
raise OSError("too many failed delivery attempts")
# Successful delivery; increment deliver counter
self.counter += 1
# For the rest of this method we are acting as an MUA, not an MDA.
# Move message to cur and restore any flags
dstFile = os.path.join(os.path.join(self.path, "cur"), dstName)
if msg.getFlags() != None:
dstFile += ':' + msg.getFlags()
self.logger.debug("deliver: attempt link to %s" % dstFile), dstFile)
# Cleanup tmp file
class MessageDateError(TypeError):
"""Indicate that the message date was invalid"""
class MaildirMessage(rfc822.Message):
"""An email message
Has extra Maildir-specific attributes
def isFlagged(self):
"""return true if the message is flagged as important"""
import re
fname = get_flname(self.fp)
if':.*F', fname) != None:
return True
return False
def getFlags(self):
"""return the flag part of the message's filename"""
parts = get_flname(self.fp).split(':')
if len(parts) == 2:
return parts[1]
return None
def isNew(self):
"""return true if the message is marked as unread"""
# XXX should really be called isUnread
import re
fname = get_flname(self.fp)
if':.*S', fname) != None:
return False
return True
def getSubject(self):
"""get the message's subject as a unicode string"""
import email.Header
s = self.getheader("Subject")
return u"".join(map(lambda x: x[0].decode(x[1] or 'ASCII', 'replace'),
return s
def getSubjectHash(self):
"""get the message's subject in a "normalized" form
This currently means lowercasing and removing any reply or forward
import re
import string
s = self.getSubject()
if s == None:
return '(no subject)'
return re.sub(r'^(re|fwd?):\s*', '', string.strip(s.lower()))
def getDateSent(self):
"""Get the time of sending from the Date header
Returns a time object using time.mktime. Not very reliable, because
the Date header can be missing or spoofed (and often is, by spammers).
Throws a MessageDateError if the Date header is missing or invalid.
dh = self.getheader('Date')
if dh == None:
return None
return time.mktime(rfc822.parsedate(dh))
except ValueError:
raise MessageDateError("message has missing or bad Date")
except TypeError: # gets thrown by mktime if parsedate returns None
raise MessageDateError("message has missing or bad Date")
except OverflowError:
raise MessageDateError("message has missing or bad Date")
def getDateRecd(self):
"""Get the time the message was received"""
# XXX check that stat returns time in UTC, fix if not
return os.stat(get_flname(self.fp))[8]
def getDateSentOrRecd(self):
"""Get the time the message was sent, fall back on time received"""
d = self.getDateSent()
if d != None:
return d
except MessageDateError:
return self.getDateRecd()
def getAge(self):
"""Get the number of seconds since the message was received"""
msgTime = self.getDateRecd()
msgAge = time.mktime(time.gmtime()) - msgTime
return msgAge / (60*60*24)
class MaildirCleaner(object):
"""Clean a maildir by deleting or moving old messages"""
__trashWriter = None
__mdWriter = None
stats = {'total': 0, 'delete': 0, 'trash': 0, 'archive': 0}
keepSubjects = {}
archiveFolder = None
archiveHierDepth = 2
folderBase = None
folderPrefix = "."
folderSeperator = "."
keepFlaggedThreads = False
trashFolder = "Trash"
isTrialRun = False
keepRead = False
def __init__(self, folderBase=None):
"""Initialize the MaildirCleaner
folderBase -- the directory in which the folders are found
self.folderBase = folderBase
self.__mdWriter = MaildirWriter()
self.logger = logging.getLogger('MaildirCleaner')
def __getTrashWriter(self):
if not self.__trashWriter:
path = os.path.join(self.folderBase, self.folderPrefix + self.trashFolder)
self.__trashWriter = MaildirWriter(path)
return self.__trashWriter
trashWriter = property(__getTrashWriter)
def scanSubjects(self, folderName):
"""Scans for flagged subjects""""Scanning for flagged subjects...")
if (folderName == 'INBOX'):
path = self.folderBase
path = os.path.join(self.folderBase, self.folderPrefix + folderName)
maildir = mailbox.Maildir(path, MaildirMessage)
self.keepSubjects = {}
for i, msg in enumerate(maildir):
if msg.isFlagged():
self.keepSubjects[msg.getSubjectHash()] = 1
self.logger.debug("Flagged (%d): %s", i, msg.getSubjectHash())"Done scanning.")
def clean(self, mode, folderName, minAge):
"""Trashes or archives messages older than minAge days
mode -- the cleaning mode. Valid modes are:
trash -- moves the messages to a trash folder
archive -- moves the messages to folders based on their date
delete -- deletes the messages
folderName -- the name of the folder on which to operate
This is a name like "Stuff", not a filename
minAge -- messages younger than minAge days are left alone
if not mode in ('trash', 'archive', 'delete'):
raise ValueError
if (self.keepFlaggedThreads):
archiveFolder = self.archiveFolder
if (archiveFolder == None):
if (folderName == 'INBOX'):
archiveFolder = ""
archiveFolder = folderName
if (folderName == 'INBOX'):
path = self.folderBase
path = os.path.join(self.folderBase, self.folderPrefix + folderName)
maildir = mailbox.Maildir(path, MaildirMessage)
fakeMsg = ""
if self.isTrialRun:
fakeMsg = "(Not really) "
# Move old messages
for i, msg in enumerate(maildir):
if self.keepFlaggedThreads == True \
and msg.getSubjectHash() in self.keepSubjects:
self.log(logging.DEBUG, "Keeping #%d (topic flagged)" % i, msg)
if (msg.getAge() >= minAge) and ((not self.keepRead) or (self.keepRead and msg.isNew())):
if mode == 'trash':
self.log(logging.INFO, "%sTrashing #%d (old)" %
(fakeMsg, i), msg)
if not self.isTrialRun:
elif mode == 'delete':
self.log(logging.INFO, "%sDeleting #%d (old)" %
(fakeMsg, i), msg)
if not self.isTrialRun:
else: # mode == 'archive'
# Determine subfolder path
mdate = time.gmtime(msg.getDateSentOrRecd())
datePart = str(mdate[0])
if self.archiveHierDepth > 1:
datePart += self.folderSeperator \
+ time.strftime("%m-%b", mdate)
if self.archiveHierDepth > 2:
datePart += self.folderSeperator \
+ time.strftime("%d-%a", mdate)
subFolder = archiveFolder + self.folderSeperator \
+ datePart
sfPath = os.path.join(self.folderBase,
self.folderPrefix + subFolder)
self.log(logging.INFO, "%sArchiving #%d to %s" %
(fakeMsg, i, subFolder), msg)
if not self.isTrialRun:
# Create the subfolder if needed
if not os.path.exists(sfPath):
# Deliver
self.__mdWriter.deliver(msg, sfPath)
self.stats[mode] += 1
self.log(logging.DEBUG, "Keeping #%d (fresh)" % i, msg)
self.stats['total'] += 1
def log(self, lvl, text, msgObj):
"""Log some text with the subject of a message"""
subj = msgObj.getSubject()
if subj == None:
subj = "(no subject)"
self.logger.log(lvl, text + ": " + subj)
# Defaults
minAge = 14
mode = None
logging.disable(logging.INFO - 1)
logger = logging.getLogger('cleanup-maildir')
cleaner = MaildirCleaner()
# Read command-line arguments
opts, args = getopt.getopt(sys.argv[1:],
["help", "quiet", "verbose", "version", "mode=", "trash-folder=",
"age=", "keep-flagged-threads", "keep-read", "folder-seperator=",
"folder-prefix=", "maildir-root=", "archive-folder=",
"archive-hierarchy-depth=", "trial-run"])
except getopt.GetoptError, (msg, opt):
logger.error("%s\n\n%s" % (msg, __doc__))
output = None
for o, a in opts:
if o in ("-h", "--help"):
print __doc__
if o in ("-q", "--quiet"):
logging.disable(logging.WARNING - 1)
if o in ("-v", "--verbose"):
logging.disable(logging.DEBUG - 1)
if o == "--version":
print __version__
if o in ("-n", "--trial-run"):
cleaner.isTrialRun = True
if o in ("-m", "--mode"):
logger.warning("the --mode flag is deprecated (see --help)")
if a in ('trash', 'archive', 'delete'):
mode = a
logger.error("%s is not a valid command" % a)
if o in ("-t", "--trash-folder"):
cleaner.trashFolder = a
if o == "--archive-folder":
cleaner.archiveFolder = a
if o in ("-a", "--age"):
minAge = int(a)
if o in ("-k", "--keep-flagged-threads"):
cleaner.keepFlaggedThreads = True
if o in ("-r", "--keep-read"):
cleaner.keepRead = True
if o == "--folder-seperator":
cleaner.folderSeperator = a
if o == "--folder-prefix":
cleaner.folderPrefix = a
if o == "--maildir-root":
cleaner.folderBase = a
if o in ("-d", "--archive-hierarchy-depth"):
archiveHierDepth = int(a)
if archiveHierDepth < 1 or archiveHierDepth > 3:
sys.stderr.write("Error: archive hierarchy depth must be 1, " +
"2, or 3.\n")
cleaner.archiveHierDepth = archiveHierDepth
if not cleaner.folderBase:
cleaner.folderBase = os.path.join(os.environ["HOME"], "Maildir")
if mode == None:
if len(args) < 1:
logger.error("No command specified")
mode = args.pop(0)
if not mode in ('trash', 'archive', 'delete'):
logger.error("%s is not a valid command" % mode)
if len(args) == 0:
logger.error("No folder(s) specified")
logger.debug("Mode is " + mode)
# Clean each folder
for dir in args:
logger.debug("Cleaning up %s..." % dir)
cleaner.clean(mode, dir, minAge)'Total messages: %5d' % cleaner.stats['total'])'Affected messages: %5d' % cleaner.stats[mode])'Untouched messages: %5d' %
(cleaner.stats['total'] - cleaner.stats[mode]))