Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tools: Add tools to process mailboxes
process_mailbox uses procmail's formail to separate and sort mails. process_mailbox creates subdirectories for year, month and date and places mails into those subdirectories, named by their Message-ID. Messages with suspicious Message-IDs or Dates are automatically filtered. Signed-off-by: Ralf Ramsauer <ralf.ramsauer@oth-regensburg.de>
- Loading branch information
Showing
2 changed files
with
96 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/bin/bash | ||
|
||
# Copyright (c) OTH Regensburg, 2017 | ||
# | ||
# Author: | ||
# Ralf Ramsauer <ralf.ramsauer@othr.de> | ||
# | ||
# This work is licensed under the terms of the GNU GPL, version 2. See | ||
# the COPYING file in the top-level directory. | ||
|
||
# Sorts one single mail. Invoked by process_mailbox.sh | ||
|
||
BASEDIR=$1 | ||
TMP=$(mktemp) | ||
cat /dev/stdin > $TMP | ||
|
||
function die { | ||
echo "$@" 1>&2 | ||
rm -- "$TMP" | ||
exit 1 | ||
} | ||
|
||
function get_date { | ||
local HEADER=$1 | ||
local DATE=$(cat $TMP | grep "^${HEADER}:" | head -n 1 | sed -e "s/${HEADER}:\s*//") | ||
local YEAR=$(date -d "${DATE}" "+%Y") | ||
|
||
if [ "$YEAR" == "" ]; then | ||
return 1 | ||
fi | ||
|
||
if [ "$YEAR" -lt "1970" ]; then | ||
return 2 | ||
fi | ||
|
||
local MD=$(date -d "${DATE}" "+%m/%d") | ||
|
||
echo "${YEAR}/${MD}" | ||
return 0 | ||
} | ||
|
||
ID=$(cat $TMP | grep "^Message-ID:" | head -n 1 | sed -e 's/Message-ID:\s*//') | ||
MD5=$(echo -en $ID | md5sum | awk '{ print $1 }') | ||
|
||
# Try to get a valid mail date | ||
DATE=$(get_date Date) | ||
R=$? | ||
if (($R > 0)); then | ||
echo "Invalid Date (Error: $R, ID: $ID)" | ||
echo "Fall back to NNTP date..." | ||
DATE=$(get_date NNTP-Posting-Date) | ||
if (($? > 0)); then | ||
die "Nope, I'm sorry. No way to parse this mail." | ||
fi | ||
echo "Success." | ||
fi | ||
|
||
DSTDIR="${BASEDIR}/${DATE}" | ||
DSTFILE="${DSTDIR}/${MD5}" | ||
[ -d $DSTDIR ] || mkdir -p $DSTDIR | ||
|
||
if [ -f $DSTFILE ]; then | ||
die "File for $ID already exists. Duplicate entry?" | ||
else | ||
mv $TMP $DSTFILE | ||
fi | ||
|
||
# no lock required, echo will write atomatically when writing short lines | ||
echo "$DATE $ID $MD5" >> ${BASEDIR}/index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
|
||
# Copyright (c) OTH Regensburg, 2017 | ||
# | ||
# Author: | ||
# Ralf Ramsauer <ralf.ramsauer@othr.de> | ||
# | ||
# This work is licensed under the terms of the GNU GPL, version 2. See | ||
# the COPYING file in the top-level directory. | ||
|
||
function die { | ||
echo "$@" 1>&2 | ||
exit -1; | ||
} | ||
|
||
if [ "$#" -ne 2 ]; then | ||
echo "Usage: $0 mailbox_file destination_directory" | ||
echo | ||
echo "This script splits up a mailbox file into seperate mail" | ||
echo "files, placed into date-separated subdirectories." | ||
exit 1 | ||
fi | ||
|
||
BASEDIR=${2} | ||
mkdir -p $BASEDIR || die "Unable to create basedir" | ||
|
||
formail -n $(nproc) -s <${1} ./process_mail.sh ${BASEDIR} |