Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

executable file 325 lines (270 sloc) 6.59 kB
#!/usr/bin/perl -w
#
# Given a list of RSS/Atom blog-feeds post each new entry to our
# chatroom, via a HTTP POST to http2xmpp.
#
# We keep a local cache of the blog-entries we've seen before and
# avoid resubmitting them. By default the cache is set to one year.
#
# This code assumes two things:
#
# 1. http2xmpp is running on the localhost.
#
# 2. There is a local redis server also on the same host.
#
# If the configuration file contains a room it will be used to send to
# instead of just the default.
#
#
# Dependencies:
#
# apt-get install libxml-feed-perl libwww-perl libredis-perl libjson-perl
#
#
# Steve
# --
#
use strict;
use warnings;
use Getopt::Long;
use JSON;
use LWP::Simple;
use Redis;
use XML::Feed;
#
# Config data.
#
my %CONFIG;
$CONFIG{ 'http2xmpp' } = "http://localhost:9999/";
$CONFIG{ 'verbose' } = 0;
# Default room.
$CONFIG{ 'room' } = "feeds";
# cache period is expressed in periods of different units. For example
# 1h (one hour), 1d (one day), 1m (one month), and 1y (one year).
$CONFIG{ 'cache' } = "1y";
#
# Parse the arguments, looking for --verbose
#
exit if ( !GetOptions( "verbose", \$CONFIG{ 'verbose' } ) );
#
# UTF8 is the future.
#
$CONFIG{ 'verbose' } && binmode( STDOUT, ":encoding(UTF-8)" );
#
# Connect to redis, which is used for caching purposes.
#
my $redis = new Redis;
#
# Read the URLs from our DATA section, or our config file if present
#
if ( -e "/etc/rss-announcer.conf" )
{
open( my $handle, "<", "/etc/rss-announcer.conf" ) or
die "Failed to open config";
while ( my $line = <$handle> )
{
chomp($line);
next if ( $line =~ /^#/ );
next if ( $line =~ /^$/ );
processFeed($line);
}
close($handle);
}
else
{
while ( my $list = <DATA> )
{
chomp($list);
next if ( $list =~ /^#/ );
next if ( $list =~ /^$/ );
processFeed($list);
}
}
#
# All done.
#
exit 0;
#
# Process the feed at the given URL.
#
sub processFeed
{
my ($line) = (@_);
my $url;
my $rooms;
#
# The line will either contain a list of rooms, and a URL, or just a URL:
#
if ( $line =~ /^([^ \t]+)[ \t]+(.*)$/ )
{
$url = $2;
$rooms = $1;
}
else
{
$url = $line;
}
#
# The parsed feed entries.
#
my $feed = undef;
#
# Try to parse the contents.
#
eval {$feed = XML::Feed->parse( URI->new($url) );};
if ( $@ || !$feed )
{
print "WARNING: Failed to parse/process URL: $url\n";
print "WARNING: $@\n" if ($@);
my $error = XML::Feed->errstr();
if ( $error && $CONFIG{ 'verbose' } )
{
print "WARNING: " . $error . "\n";
}
return;
}
my $title = $feed->title || "";
$CONFIG{ 'verbose' } && print "\nHandling: $url [$title]\n";
#
# For each entry.
#
for my $entry ( $feed->entries )
{
#
# The data we care about
#
my $title = $entry->title;
my $author = $entry->author || "unknown";
my $link = $entry->link;
#
# If we don't have all the data then we ignore this entry.
#
next unless ( $title && $author && $link );
#
# Show details
#
$CONFIG{ 'verbose' } && print "- $link -> $title\n";
#
# Post it to the chatroom, unless we saw it already.
#
if ( !seen($link) )
{
if ($rooms)
{
foreach my $r ( split( /,/, $rooms ) )
{
$r =~ s/^\s+//;
$r =~ s/\s+$//;
next unless ($r);
sendMessage( $link, $title, $author, $r );
}
}
else
{
sendMessage( $link, $title, $author, $CONFIG{ 'room' } );
}
}
}
}
#
# Post a link about the blog entry to the chat-room
#
sub sendMessage
{
my ( $link, $title, $author, $room ) = (@_);
$author = "unknown" unless ($author);
#
# The entry we'll post
#
my %data;
$data{ 'room' } = $room;
$data{ 'message' } =
"<p>A new entry was posted by $author <a href=\"$link\">$title</a></p>";
$CONFIG{ 'verbose' } && print "- Announcing to room: $CONFIG{'room'}\n";
#
# Encode the hash prior to submission.
#
# The server expects JSON.
#
my $json = encode_json \%data;
my $req = HTTP::Request->new( 'POST', $CONFIG{ 'http2xmpp' } );
$req->header( 'Content-Type' => 'application/json' );
$req->content($json);
#
# Send the request.
#
my $lwp = LWP::UserAgent->new;
my $response = $lwp->request($req);
}
#
# Have we seen this entry?
#
# Just a simple Redis lookup.
#
# If we haven't seen it then record it, with a sane TTL.
#
sub seen
{
my ($url) = (@_);
if ( $redis->get($url) )
{
return 1;
}
else
{
#
# Convert the cache-period from h/d/w/m/y into the appropriate
# number of seconds.
#
my $seconds = $CONFIG{ 'cache' };
if ( $seconds =~ /^([0-9]+)h/i )
{
$seconds = $1 * ( 60 * 60 );
}
elsif ( $seconds =~ /^([0-9]+)d/i )
{
$seconds = $1 * ( 60 * 60 * 24 );
}
elsif ( $seconds =~ /^([0-9]+)m/i )
{
# avering a month to be 31 days.
$seconds = $1 * ( 60 * 60 * 24 * 31 );
}
elsif ( $seconds =~ /^([0-9]+)y/i )
{
# avering a year to be 365 days.
$seconds = $1 * ( 60 * 60 * 24 * 365 );
}
else
{
print "WARNING: Unknown cache-period: $CONFIG{'cache'}\n";
}
$CONFIG{ 'verbose' } &&
print $CONFIG{ 'cache' } . " -> $seconds seconds\n";
#
# Cache the feed entries for the given time-period.
#
$redis->set( $url, 1 );
$redis->expire( $url, $seconds );
return 0;
}
}
#
# The URLs we'll post.
#
# This is just a sample set, add your URLs to /etc/rss-announcer.conf
#
__DATA__
#
# Comments are good.
#
http://blog.bytemark.co.uk/feed
http://blog.mythic-beasts.com/feed/
http://blogs.msdn.com/b/oldnewthing/atom.aspx
http://planet.debian.org/rss20.xml
http://syndication.thedailywtf.com/TheDailyWtf
http://youarenotaphotographer.com/feed/
http://perf.fail/rss
http://www.debian.org/security/dsa
http://forum.bytemark.co.uk/latest.rss
http://brightbox.com/blog/feed/
Jump to Line
Something went wrong with that request. Please try again.