Skip to content

Commit

Permalink
refactor: remove USE_PATCHWORK_ID support and introduce IS_PATCHWORK_…
Browse files Browse the repository at this point in the history
…ARCHIVE

In the case of a Patchwork archive a column is added to the index file
denoting the Patchwork archive.

Signed-off-by: Rohit Sarkar <rohitsarkar5398@gmail.com>
[ralf: remove unrelated changes, add minor improvements, rebase to latest bugfixes on next]
Signed-off-by: Ralf Ramsauer <ralf.ramsauer@oth-regensburg.de>
  • Loading branch information
rsarky authored and rralf committed May 15, 2020
1 parent d702a64 commit dcbaabf
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 52 deletions.
4 changes: 0 additions & 4 deletions bin/pasta_analyse.py
Expand Up @@ -199,10 +199,6 @@ def fill_result(hashes, tag):
victims = available

if args.linux:
if config.mbox_use_patchwork_id:
log.error('Doesn\'t work with USE_PATCHWORK_ID = true')
return -1

log.info('Searching for non-Linux patches...')
repo.mbox.load_threads()
characteristic = load_linux_mail_characteristics(repo, victims)
Expand Down
4 changes: 0 additions & 4 deletions bin/pasta_evaluate_patches.py
Expand Up @@ -220,10 +220,6 @@ def evaluate_patches(config, prog, argv):
log.error('Only works in Mbox mode!')
return -1

if config.mbox_use_patchwork_id:
log.error('pasta evaluate_patches does not work with '
'USE_PATCHWORK_ID = true')

repo = config.repo
_, clustering = config.load_cluster()
clustering.optimize()
Expand Down
5 changes: 2 additions & 3 deletions bin/pasta_sync.py
Expand Up @@ -101,6 +101,5 @@ def sync(config, prog, argv):
config.update_ccache_mbox()

# Update the mail thread cache
if not config.mbox_use_patchwork_id:
repo.mbox.load_threads()
repo.mbox.threads.update()
repo.mbox.load_threads()
repo.mbox.threads.update()
1 change: 0 additions & 1 deletion pypasta/Config.py
Expand Up @@ -198,7 +198,6 @@ def path(name):
mbox_pub_in = mbox['pubin']

# mailbox parameters
self.mbox_use_patchwork_id = mbox['USE_PATCHWORK_ID']
self.mbox_mindate = parse_date_ymd(mbox['MINDATE'])
self.mbox_maxdate = parse_date_ymd(mbox['MAXDATE'])
self.mbox_time_window = self.mbox_mindate, self.mbox_maxdate
Expand Down
37 changes: 16 additions & 21 deletions pypasta/Repository/Mbox.py
Expand Up @@ -256,7 +256,7 @@ def __getitem__(self, message_id):
commits = self.get_hashes(message_id)
return [self.get_blob(commit) for commit in commits]

def update(self, use_patchwork_id):
def update(self):
log.info('Update list %s' % self.listaddr)
self.repo = pygit2.Repository(self.d_repo)

Expand All @@ -269,8 +269,6 @@ def update(self, use_patchwork_id):
hashes = hashes - known_hashes
log.info('Updating %d emails' % len(hashes))

identifier = 'X-Patchwork-ID' if use_patchwork_id else 'Message-ID'

for hash in hashes:
mail = self.get_mail_by_commit(hash)
if not mail:
Expand All @@ -279,26 +277,23 @@ def update(self, use_patchwork_id):

# There are broken mails that may contain multiple Message-IDs.
# Hence, get all Message-IDs and search for the sanest one
ids = mail.get_all(identifier)
ids = mail.get_all('Message-Id')
if ids is None or len(ids) == 0:
log.warning('No %s in commit %s' % (identifier, hash))
log.warning('No Message-Id in commit %s' % hash)
continue

id = max(ids, key=len)
id = ''.join(id.split())
if use_patchwork_id:
# Try to do repair some broken message IDs. This only makes
# sense if the message ids have a 'sane' length
if len(id) > 10 and id[0] != '<' and id[-1] != '>':
id = '<%s>' % id
match = PubInbox.MESSAGE_ID_REGEX.match(id)
if match:
id = match.group(1)
else:
# Try to do repair some broken message IDs. This only makes
# sense if the message ids have a 'sane' length
if len(id) > 10 and id[0] != '<' and id[-1] != '>':
id = '<%s>' % id
match = PubInbox.MESSAGE_ID_REGEX.match(id)
if match:
id = match.group(1)
else:
log.warning('Unable to parse Message ID: %s' % id)
continue
log.warning('Unable to parse Message ID: %s' % id)
continue

date = mail_parse_date(mail['Date'])
if not date:
Expand Down Expand Up @@ -337,7 +332,7 @@ def add_mbox(self, listname, f_mbox_raw):

return set(index.keys())

def update(self, use_patchwork_id):
def update(self):
for listname, f_mbox_raw in self.raw_mboxes:
if not os.path.exists(f_mbox_raw):
log.error('not a file or directory: %s' % f_mbox_raw)
Expand All @@ -346,7 +341,7 @@ def update(self, use_patchwork_id):
log.info('Processing raw mailbox %s' % listname)
cwd = os.getcwd()
os.chdir(os.path.join(cwd, 'tools'))
ret = call(['./process_mailbox_maildir.sh', str(use_patchwork_id),
ret = call(['./process_mailbox_maildir.sh', "False",
listname, self.d_mbox, f_mbox_raw])
os.chdir(cwd)
if ret == 0:
Expand Down Expand Up @@ -494,11 +489,11 @@ def get_ids(self, time_window=None, allow_invalid=False, lists=None):

return ids

def update(self, use_patchwork_id):
self.mbox_raw.update(use_patchwork_id)
def update(self):
self.mbox_raw.update()

for pub in self.pub_in:
pub.update(use_patchwork_id)
pub.update()

def get_lists(self, message_id):
return self.message_id_to_lists[message_id]
Expand Down
2 changes: 1 addition & 1 deletion pypasta/Repository/Repository.py
Expand Up @@ -282,7 +282,7 @@ def register_mbox(self, config):

def update_mbox(self, config):
self.register_mbox(config)
self.mbox.update(config.mbox_use_patchwork_id)
self.mbox.update()

# The mbox doesn't track changes after an update. The easiest
# workaround is to reload the whole instance.
Expand Down
2 changes: 1 addition & 1 deletion tools/global.env
Expand Up @@ -6,7 +6,7 @@
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.

USE_PATCHWORK_ID=${1}
IS_PATCHWORK_ARCHIVE=${1}
LISTNAME=${2}
BASEDIR=${3}
VICTIM=${4}
Expand Down
36 changes: 24 additions & 12 deletions tools/process_mail.sh
Expand Up @@ -47,18 +47,22 @@ function get_header {
formail -x $1 -c < $MAIL | head -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
}

if [ "$USE_PATCHWORK_ID" = "True" ]; then
ID=$(get_header "X-Patchwork-ID")
if [ "$IS_PATCHWORK_ARCHIVE" = "True" ]; then
PATCHWORK_ID=$(get_header "X-Patchwork-ID")

if [ "$PATCHWORK_ID" = "" ]; then
die "Unable to parse Patchwork ID for ${MAIL}: empty Patchwork ID"
fi
# Always surround emails by <> tags. PaStA needs them in order to
# classify them as emails
ID="<${ID}>"
else
ID=$(get_header "Message-id" | sed -e 's/.*\(<.*>\).*/\1/i')
if [ "$ID" = "" ]; then
die "Unable to parse Message ID for ${MAIL}: empty Message-ID"
elif [[ "$ID" =~ $whitespace_pattern ]]; then
die "Unable to parse Message ID for ${MAIL}: contains whitespaces"
fi
PATCHWORK_ID="<${PATCHWORK_ID}>"
fi

ID=$(get_header "Message-id" | sed -e 's/.*\(<.*>\).*/\1/i')
if [ "$ID" = "" ]; then
die "Unable to parse Message ID for ${MAIL}: empty Message-ID"
elif [[ "$ID" =~ $whitespace_pattern ]]; then
die "Unable to parse Message ID for ${MAIL}: contains whitespaces"
fi

MD5=$(md5sum $MAIL | awk '{ print $1 }')
Expand All @@ -84,7 +88,11 @@ if [ "$DATE" == "" ]; then
fi

# no lock required, echo will write atomatically when writing short lines
DSTDIR="${BASEDIR}/raw/${DATE}"
if [ "$IS_PATCHWORK_ARCHIVE" = "True" ]; then
DSTDIR="${BASEDIR}/patchwork/${DATE}"
else
DSTDIR="${BASEDIR}/raw/${DATE}"
fi
DSTFILE="${DSTDIR}/${MD5}"
[ -d $DSTDIR ] || mkdir -p $DSTDIR

Expand All @@ -93,4 +101,8 @@ if [ ! -f $DSTFILE ]; then
cp $MAIL $DSTFILE
fi

echo "$DATE $ID $MD5" >> ${INDEX}
if [ "$IS_PATCHWORK_ARCHIVE" = "True" ]; then
echo "$DATE $ID $MD5 $PATCHWORK_ID" >> ${INDEX}
else
echo "$DATE $ID $MD5" >> ${INDEX}
fi
4 changes: 2 additions & 2 deletions tools/process_mail_pipe.sh
Expand Up @@ -8,13 +8,13 @@
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.

USE_PATCHWORK_ID=$1
IS_PATCHWORK_ARCHIVE=$1
LISTNAME=$2
BASEDIR=$3
TMP=$(mktemp)

cat /dev/stdin > $TMP
./process_mail.sh $USE_PATCHWORK_ID $LISTNAME $BASEDIR $TMP
./process_mail.sh $IS_PATCHWORK_ARCHIVE $LISTNAME $BASEDIR $TMP
RET=$?

if [ $RET -eq 0 ]; then
Expand Down
6 changes: 3 additions & 3 deletions tools/process_mailbox_maildir.sh
Expand Up @@ -15,17 +15,17 @@ initialise
if [ "$#" -ne 4 ]; then
echo "Usage: $0 listname destination_directory mailbox_file"
echo
echo "This script splits up a mailbox file into seperate mail"
echo "This script splits up a mailbox file into separate mail"
echo "files, placed into date-separated subdirectories."
exit 1
fi

if [ -d ${VICTIM} ]; then
find ${VICTIM} -type f -print0 | \
xargs -0 -P $(nproc) -n 1 \
./process_mail.sh $USE_PATCHWORK_ID $LISTNAME $BASEDIR
./process_mail.sh $IS_PATCHWORK_ARCHIVE $LISTNAME $BASEDIR
elif [ -f ${VICTIM} ]; then
formail -n $(nproc) -s <${VICTIM} ./process_mail_pipe.sh $USE_PATCHWORK_ID $LISTNAME $BASEDIR
formail -n $(nproc) -s <${VICTIM} ./process_mail_pipe.sh $IS_PATCHWORK_ARCHIVE $LISTNAME $BASEDIR
else
echo "${VICTIM} is not a file or directory"
exit 1
Expand Down

0 comments on commit dcbaabf

Please sign in to comment.