Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 51 lines (44 sloc) 1.46 KB
#!/bin/bash
set -e -u -f -o pipefail
if [ "$#" != 1 ] || [ "${1:-}" == '-h' ] || [ "${1:-}" == '--help' ]; then
echo "Usage: $0 http://SOME-WEBSITE/" >&2
echo "Links outside SOME-WEBSITE will not be checked." >&2
echo "Page contents will be downloaded and saved for investigation." >&2
exit 1
fi
WEBSITE="$1"
DIR="$(mktemp -d /tmp/check-links-XXXXXXXXXX)"
LOG="${DIR}/wget.log"
readonly WEBSITE DIR LOG
cd "${DIR}"
reject_regex="/xmlrpc.php|/blog/Smarter_HTTP_redirects/"
wget \
--output-file wget.log \
--execute robots=off \
--reject-regex "${reject_regex}" \
--wait 1 \
--recursive \
--page-requisites \
"${WEBSITE}" || true
# -- marks the start of a request; replace the hold space with that line.
# Delete successful request (200) logs.
# When any other status is seen, append the line to the hold space, swap the
# hold and pattern spaces, then print the pattern space.
PROBLEMS="$(sed -n \
-e '/^--/ h' \
-e '/HTTP request sent, awaiting response... 200/ d' \
-e '/HTTP request sent, awaiting response.../ { H; g; p; }' \
"${LOG}")"
# Check for common error messages too.
PROBLEMS+="$(grep -r -F -e 'Fatal error' "${DIR}" || true)"
if [[ -n "${PROBLEMS}" || -t 0 ]]; then
if [[ -n "${PROBLEMS}" ]]; then
echo "Problems found:"
echo "${PROBLEMS}"
else
echo "No problems found :)"
fi
echo "See ${LOG} and the contents of ${DIR} for further investigation"
else
rm -rf "${DIR}"
fi