Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 108 lines (96 sloc) 3.02 KB
#!/bin/bash
#
echo
if [ -z "$1" ]; then
cat <<EOF
$0: [-d directory] pathname...
Import MakeStuff blog entries or lj posts into a Jekyll _posts directory.
The entries are of the form ./yy/mm/dd--name.ext; -d specifies the
directory they are contained in, and defaults to ~/.ljarchive.
Filenames are converted from yyyy/mm/dd--name to yyyy-mm-dd-name format,
and names ending in _dddd have that replaced by SLUG.html.
The front matter is put into YAML format no matter what format it
had originally.
The "Subject" header is converted to "title", tags get surrounded by
brackets to make a list, and tags of "curmudgeon" are removed from the
list. (That last could be generalized, but it can wait.)
Files that have already been imported are skipped.
EOF
exit 1
fi
case $1 in
-d) shift; DIR=$1/; shift
;;
esac
if [ -z $DIR ]; then
DIR=~/.ljarchive
fi
shopt -s extglob # Required to trim whitespace; see below
read_headers () {
REST=
while IFS=':' read key value; do
# trim whitespace in "value"
value=${value##+([[:space:]])}; value=${value%%+([[:space:]])}
[ -z "$key" ] && break # the separator line between headers and body
# values with colons or brackets need to be escaped/quoted
# Start by just double quoting everything except tags, which are safe
# and need special treatment anyway.
case "$key" in
[sS]ubject|[tT]itle) SUBJECT="\"$value\""
;;
[tT]ags) TAGS="${value//[\[\]]/}" # remove brackets from tag list, if any
;;
---)
[ ! -z $yaml_started ] && break;
yaml_started=1
;;
"--text follows this line--") break # Handle emacs's separator
;;
*) if [ -z "$REST" ]; then
REST="${key,,}: \"$value\""
else
REST="$REST\n${key,,}: \"$value\""
fi
;;
esac
done
}
slug () {
echo "$1" | sed -e 's/Subject: *//' -e 's/"//g' -e 's/\[.*\]//' \
-e 's/[ ]\+/-/g' -e 's/^-*//' -e s/-*$// \
-e 's/[^-a-zA-Z0-9]//g' | tr '[A-Z]' '[a-z]'
}
for ff in $*; do
if [ -e $ff ]; then
f=`basename $ff`
d=`dirname $ff`
else
f=$ff
d=$DIR
fi
g=`echo $f | sed -e 's/\\//-/g' -e s/--/-/ -e s/[.]-// -e s/_[0-9]*$//`
if echo $f | grep -se '_[0-9]*$'; then
# the filename ends in a sequence number, so it must be an archived LJ post
# extract the subject and slugify it.
subj="`sed -n -e '/Subject:/p' -e '/Subject:/q' $DIR/$f`"
g=$g-`slug "$subj"`.html
fi
if [ -e $g ]; then
echo $g exists, skipping $f
else
echo $d/$f '->' $g
{
echo ---
read_headers
echo title: $SUBJECT
echo tags: \[ `echo $TAGS | sed -e 's/, curmudgeon/,/' -e 's/curmudgeon,//'` \]
echo -e "$REST"
echo layout: post # needed because Jekyll's default mechanism appears to be broken
echo ---
cat
# FIXME: take out any "another fine post" colophon. Possibly not needed
# now that we've imported all the original curmudgeon posts.
# sed -i -e '/Another fine/,/The Computer Curmudgeon/d'
} < $d/$f > $g
fi
done