Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 27 lines (24 sloc) 1.27 KB
#!/bin/bash
# Bash script that archives selected Free Dale Askey web sites using the WARC ISO format.
HOME=/mnt/DIY/web-archiving/arxivdaleascii/sites
DATE=`date +"%Y_%m_%d"`
SITES=/home/nruest/git/arxivdaleascii/arxivdaleascii-sites.txt
index=0
cd $HOME
mkdir FDA_$DATE
cd FDA_$DATE
cat $SITES | while read line; do
let "index++"
pad=`printf "%05d" $index`
mkdir $DATE-$pad
cd $DATE-$pad
#/usr/bin/xvfb-run -a -s "-screen 0 1280x1024x24" /usr/bin/wkhtmltopdf --dpi 200 --page-size Letter --custom-header 'User-Agent' 'User-Agent Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; de-de) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4' "$line" $pad-$DATE.pdf
/usr/bin/xvfb-run -a -s "-screen 0 1280x1024x24" /usr/local/bin/wkhtmltoimage --use-xserver --custom-header 'User-Agent' 'User-Agent Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; de-de) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4' "$line" tmp.png
/usr/bin/pngcrush tmp.png $pad-$DATE.png
rm tmp.png
/usr/local/bin/wget --adjust-extension --page-requisites --convert-links --no-parent --random-wait --warc-file=$pad-$DATE "$line"
cd $HOME/FDA_$DATE
zip -r $DATE-$pad.zip $DATE-$pad
rm -rf $DATE-$pad
echo "$(date) - $line archived" >> /var/log/daleascii.log
done