Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: f93a55beae
Fetching contributors…

Cannot retrieve contributors at this time

executable file 147 lines (126 sloc) 5.481 kb
#!/bin/sh
# Generate an RSS 2.0 feed directly from your existing web site
# Author - Pádraig Brady <P@draigBrady.com>
# Licence - LGPLV2
# Releases -
# 1.0 - Jun 19 2006 - Initial release
# 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed"
# HTML comment within the first 10 lines.
# 1.2 - May 01 2007 - Add author elements (from html if present)
# 1.4 - May 04 2011
# http://github.com/pixelb/scripts/commits/master/scripts/bashfeed
# Just run this script from the root directory of your web site and
# it will generate feed items for the newest files. Generally I do this
# just before I sync my local web site copy to my public server.
# One can generate a feed for a subset of the site by still running
# from the root directory, and passing a subdirectory to start at.
# To change the feed TITLE and DESCRIPTION, set those environment
# variables before running the script.
# Which files are selected and excluded can be configured below.
# Note for html files it will extract the following elements if present
# <head>
# <title>Item title</title>
# <meta name="description" content="One line item description">
# <meta name="keywords" content="Item tags">
# </head>
# Note this script will keep the same item guid for an updated file.
# Just updating the pubDate will not cause liferea 1.0.11 at least
# to mark the item as updated (or update the timestamp even).
# One must change the description or title also, and so
# I set the (hidden) description to the file timestamp.
#
# Testing with thunderbird 1.0.8 shows that it indexes on link
# and so wont ever show updates to other fields. Therefore I append #seconds
# to the link to force it to create a new entry for an updated item.
#
# Note you may find the http://www.pixelbeat.org/scripts/fix script
# useful for doing edits to files that you don't want to show up
# as updated content in the feed, or generally edit a file without
# changing the modification date.
num_files=10
site="www.pixelbeat.org"
author="P@draigBrady.com (Pádraig Brady)"
suggested_update_freq=1440 #mins
: ${TITLE=$site}
: ${DESCRIPTION="latest from $site"}
#files starting with . | files without a . | files ending in .c .cpp ...
include_re='(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$' #only show these files
exclude_re='(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum))' #don't show these paths
default_files="index.html index.shtml index.php"
############# No user serviceable parts below ###################
for file in $default_files; do
replace_default_files="$replace_default_files; s/\(.*\)$file$/\1/;t"
done
echo '<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/css" href="rss2.css" ?>
<rss version="2.0">
<channel>'
time=`date --rfc-2822`
echo "
<title>$TITLE</title>
<ttl>$suggested_update_freq</ttl>
<link>http://$site/$1</link>
<generator>http://www.pixelbeat.org/scripts/bashfeed</generator>
<description>$DESCRIPTION</description>
<managingEditor>$author</managingEditor>
<lastBuildDate>$time</lastBuildDate>
<language>`echo $LANG | sed 's/\(..\)_.*/\1/'`</language>
"
xml_unescaped() { sed 's/&amp;//g; s/&[lg]t;//g; s/&quot;//g' | grep -q "[&<>]"; }
find $1 -type f -printf "%p\t%T@\n" |
sed 's/^\.\///' | # strip leading ./ when "$1" is empty
sort -k2,2nr |
cut -f1 |
grep -E "$include_re" |
grep -Ev "$exclude_re" |
while read file; do
if ! head "$file" | grep -Fiq '<!--Exclude from bashfeed-->'; then
echo "$file"
i=$((i+1))
[ $i -eq $num_files ] && break
fi
done |
while read file; do
pubDate=`date --reference="$file" --rfc-2822`
force_update=`date --reference="$file" "+%s"`
title=""; keywords=""; description=""
if echo "$file" | grep -Eq '\.(html|shtml|php)$'; then
title=`sed -n 's/.*<title>\(.*\)<\/title>.*/\1/ip;T;q' < "$file"`
keywords=`sed -n 's/.*<META.*NAME="keywords".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
description=`sed -n 's/.*<META.*NAME="description".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
page_author=`sed -n 's/.*<META.*NAME="author".*CONTENT="\(.*\)".*/\1/ip;T;q' < "$file"`
extracted_text="$title $keywords $description$ $page_author"
if echo "$extracted_text" | xml_unescaped; then
echo "Error: HTML metadata in $file will not produce a valid XML feed" >&2
exit 1
fi
elif [ -x "$file" ]; then # I always have a 1 line description on line 3 of my scripts
description=`sed -n '3s/# \(.*\)/\1/p' "$file"`
fi
file=`echo "$file" | sed "$replace_default_files"`
[ -z "$title" ] && title="$file"
tags=""
if [ ! -z "$keywords" ]; then
for keyword in $keywords; do
tags=`echo -ne "$tags<category>$keyword</category>\n "`
done
fi
if [ "$page_author" ]; then
if [ "$page_author" = "$author" ]; then
page_author=""
else
page_author=`echo -ne "<author>$page_author</author>\n "`
fi
fi
echo "
<item>
<title>$title</title>
<guid>http://$site/$file</guid>
<pubDate>$pubDate</pubDate>
<link>http://$site/$file#$force_update</link>
$page_author$tags<description><![CDATA[$description<!--$force_update-->]]></description>
</item>
"
done &&
echo '</channel>
</rss>'
Jump to Line
Something went wrong with that request. Please try again.