Skip to content

Commit

Permalink
tools: Add tools to process mailboxes
Browse files Browse the repository at this point in the history
process_mailbox uses procmail's formail to separate and sort mails.
process_mailbox creates subdirectories for year, month and date and
places mails into those subdirectories, named by their Message-ID.

Messages with suspicious Message-IDs or Dates are automatically
filtered.

Signed-off-by: Ralf Ramsauer <ralf.ramsauer@oth-regensburg.de>
  • Loading branch information
rralf committed Aug 16, 2017
1 parent ee9982d commit c45ecb4
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
69 changes: 69 additions & 0 deletions tools/process_mail.sh
@@ -0,0 +1,69 @@
#!/bin/bash

# Copyright (c) OTH Regensburg, 2017
#
# Author:
# Ralf Ramsauer <ralf.ramsauer@othr.de>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.

# Sorts one single mail. Invoked by process_mailbox.sh

BASEDIR=$1
TMP=$(mktemp)
cat /dev/stdin > $TMP

function die {
echo "$@" 1>&2
rm -- "$TMP"
exit 1
}

function get_date {
local HEADER=$1
local DATE=$(cat $TMP | grep "^${HEADER}:" | head -n 1 | sed -e "s/${HEADER}:\s*//")
local YEAR=$(date -d "${DATE}" "+%Y")

if [ "$YEAR" == "" ]; then
return 1
fi

if [ "$YEAR" -lt "1970" ]; then
return 2
fi

local MD=$(date -d "${DATE}" "+%m/%d")

echo "${YEAR}/${MD}"
return 0
}

ID=$(cat $TMP | grep "^Message-ID:" | head -n 1 | sed -e 's/Message-ID:\s*//')
MD5=$(echo -en $ID | md5sum | awk '{ print $1 }')

# Try to get a valid mail date
DATE=$(get_date Date)
R=$?
if (($R > 0)); then
echo "Invalid Date (Error: $R, ID: $ID)"
echo "Fall back to NNTP date..."
DATE=$(get_date NNTP-Posting-Date)
if (($? > 0)); then
die "Nope, I'm sorry. No way to parse this mail."
fi
echo "Success."
fi

DSTDIR="${BASEDIR}/${DATE}"
DSTFILE="${DSTDIR}/${MD5}"
[ -d $DSTDIR ] || mkdir -p $DSTDIR

if [ -f $DSTFILE ]; then
die "File for $ID already exists. Duplicate entry?"
else
mv $TMP $DSTFILE
fi

# no lock required, echo will write atomatically when writing short lines
echo "$DATE $ID $MD5" >> ${BASEDIR}/index
27 changes: 27 additions & 0 deletions tools/process_mailbox.sh
@@ -0,0 +1,27 @@
#!/bin/bash

# Copyright (c) OTH Regensburg, 2017
#
# Author:
# Ralf Ramsauer <ralf.ramsauer@othr.de>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.

function die {
echo "$@" 1>&2
exit -1;
}

if [ "$#" -ne 2 ]; then
echo "Usage: $0 mailbox_file destination_directory"
echo
echo "This script splits up a mailbox file into seperate mail"
echo "files, placed into date-separated subdirectories."
exit 1
fi

BASEDIR=${2}
mkdir -p $BASEDIR || die "Unable to create basedir"

formail -n $(nproc) -s <${1} ./process_mail.sh ${BASEDIR}

0 comments on commit c45ecb4

Please sign in to comment.