Skip to content

Commit

Permalink
Change planet:subscription to planet:source, and expand the information
Browse files Browse the repository at this point in the history
provided in each.
  • Loading branch information
Sam Ruby committed Aug 18, 2006
1 parent 6c0e24f commit 9fa9fb6
Show file tree
Hide file tree
Showing 28 changed files with 195 additions and 51 deletions.
6 changes: 4 additions & 2 deletions examples/planet.xslt
Expand Up @@ -19,13 +19,15 @@

<h2>Subscriptions</h2>
<ul>
<xsl:for-each select="planet:subscription">
<xsl:for-each select="planet:source">
<xsl:sort select="planet:name"/>
<li>
<a href="{atom:link[@rel='self']/@href}" title="subscribe">
<img src="images/feed-icon-10x10.png" alt="(feed)"/>
</a>
<xsl:value-of select="planet:name"/>
<a href="{atom:link[@rel='alternate']/@href}">
<xsl:value-of select="planet:name"/>
</a>
</li>
</xsl:for-each>
</ul>
Expand Down
2 changes: 2 additions & 0 deletions planet/__init__.py
@@ -1,3 +1,5 @@
xmlns = 'http://planet.intertwingly.net/'

logger = None

def getLogger(level):
Expand Down
8 changes: 7 additions & 1 deletion planet/config.py
Expand Up @@ -26,7 +26,7 @@
* error handling (example: no planet section)
"""

import sys
import os, sys
from ConfigParser import ConfigParser

parser = ConfigParser()
Expand Down Expand Up @@ -83,6 +83,12 @@ def template_files():
""" list the templates defined """
return parser.get('Planet','template_files').split(' ')

def cache_sources_directory():
if parser.has_option('Planet', 'cache_sources_directory'):
parser.get('Planet', 'cache_sources_directory')
else:
return os.path.join(cache_directory(), 'sources')

def feeds():
""" list the feeds defined """
return filter(lambda feed: feed!='Planet' and feed not in template_files(),
Expand Down
6 changes: 5 additions & 1 deletion planet/feedparser.py
Expand Up @@ -11,7 +11,7 @@
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
"""

__version__ = "4.2-pre-" + "$Revision: 1.131 $"[11:16] + "-cvs"
__version__ = "4.2-pre-" + "$Revision: 1.132 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
Expand Down Expand Up @@ -2379,12 +2379,16 @@ def handle_data(self, text):
_BaseHTMLProcessor.handle_data(self, text)

def sanitize_style(self, style):
# disallow urls
style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)

# gauntlet
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
if not re.match("^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style): return ''

clean = []
for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
if not value: continue
if prop.lower() in self.acceptable_css_properties:
clean.append(prop + ': ' + value + ';')
elif prop.split('-')[0].lower() in ['background','border','margin','padding']:
Expand Down
18 changes: 9 additions & 9 deletions planet/reconstitute.py
Expand Up @@ -18,6 +18,7 @@
from xml.dom import minidom
from BeautifulSoup import BeautifulSoup
from xml.parsers.expat import ExpatError
import planet

illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")

Expand Down Expand Up @@ -141,10 +142,9 @@ def content(xentry, name, detail, bozo):

xentry.appendChild(xcontent)

def source(xentry, source, bozo):
def source(xsource, source, bozo):
""" copy source information to the entry """
xdoc = xentry.ownerDocument
xsource = xdoc.createElement('source')
xdoc = xsource.ownerDocument

createTextElement(xsource, 'id', source.get('id', None))
createTextElement(xsource, 'icon', source.get('icon', None))
Expand All @@ -164,16 +164,14 @@ def source(xentry, source, bozo):

# propagate planet inserted information
for key, value in source.items():
if key.startswith('planet:'):
createTextElement(xsource, key, value)

xentry.appendChild(xsource)
if key.startswith('planet_'):
createTextElement(xsource, key.replace('_',':',1), value)

def reconstitute(feed, entry):
""" create an entry document from a parsed feed """
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
xentry=xdoc.documentElement
xentry.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
xentry.setAttribute('xmlns:planet',planet.xmlns)

id(xentry, entry)
links(xentry, entry)
Expand All @@ -191,6 +189,8 @@ def reconstitute(feed, entry):
for contributor in entry.get('contributors',[]):
author(xentry, 'contributor', contributor)

source(xentry, entry.get('source', feed.feed), bozo)
xsource = xdoc.createElement('source')
source(xsource, entry.get('source', feed.feed), bozo)
xentry.appendChild(xsource)

return xdoc
54 changes: 36 additions & 18 deletions planet/spider.py
Expand Up @@ -5,8 +5,9 @@

# Standard library modules
import time, calendar, re, os
from xml.dom import minidom
# Planet modules
import config, feedparser, reconstitute
import planet, config, feedparser, reconstitute

try:
from xml.dom.ext import PrettyPrint
Expand Down Expand Up @@ -40,15 +41,45 @@ def filename(directory, filename):

return os.path.join(directory, filename)

def write(xdoc, out):
""" write the document out to disk """
file = open(out,'w')
try:
PrettyPrint(xdoc, file)
except:
# known reasons for failure include no pretty printer installed,
# and absurdly high levels of markup nesting causing Python to
# declare infinite recursion.
file.seek(0)
file.write(xdoc.toxml('utf-8'))
file.close()
xdoc.unlink()

def spiderFeed(feed):
""" Spider (fetch) a single feed """
data = feedparser.parse(feed)
cache = config.cache_directory()
if not data.feed: return

# capture data from the planet configuration file
# capture feed and data from the planet configuration file
if not data.feed.has_key('links'): data.feed['links'] = list()
for link in data.feed.links:
if link.rel == 'self': break
else:
data.feed.links.append(feedparser.FeedParserDict(
{'rel':'self', 'type':'application/atom+xml', 'href':feed}))
for name, value in config.feed_options(feed).items():
data.feed['planet:'+name] = value
data.feed['planet_'+name] = value

# write the feed info to the cache
sources = config.cache_sources_directory()
if not os.path.exists(sources): os.makedirs(sources)
xdoc=minidom.parseString('''<feed xmlns:planet="%s"
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
write(xdoc, filename(sources, feed))

# write each entry to the cache
cache = config.cache_directory()
for entry in data.entries:
if not entry.has_key('id'):
entry['id'] = reconstitute.id(None, entry)
Expand All @@ -65,24 +96,11 @@ def spiderFeed(feed):
mtime = time.time()
entry['updated_parsed'] = time.gmtime(mtime)

xml = reconstitute.reconstitute(data, entry)

file = open(out,'w')
try:
PrettyPrint(reconstitute.reconstitute(data, entry), file)
except:
# known reasons for failure include no pretty printer installed,
# and absurdly high levels of markup nesting causing Python to
# declare infinite recursion.
file.seek(0)
file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
file.close()

write(reconstitute.reconstitute(data, entry), out)
os.utime(out, (mtime, mtime))

def spiderPlanet(configFile):
""" Spider (fetch) an entire planet """
import planet
config.load(configFile)
log = planet.getLogger(config.log_level())
planet.setTimeout(config.feed_timeout())
Expand Down
25 changes: 12 additions & 13 deletions planet/splice.py
@@ -1,8 +1,9 @@
""" Splice together a planet from a cache of feed entries """
import glob, os
from xml.dom import minidom
import config
import planet, config, feedparser, reconstitute
from reconstitute import createTextElement
from spider import filename

def splice(configFile):
""" Splice together a planet from a cache of entries """
Expand All @@ -11,7 +12,8 @@ def splice(configFile):
log = planet.getLogger(config.log_level())

cache = config.cache_directory()
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")]
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
if not os.path.isdir(file)]
dir.sort()
dir.reverse()

Expand All @@ -34,17 +36,14 @@ def splice(configFile):
feed.appendChild(entry.documentElement)

# insert subscription information
feed.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
feed.setAttribute('xmlns:planet',planet.xmlns)
sources = config.cache_sources_directory()
for sub in config.feeds():
name = config.feed_options(sub).get('name','')
xsub = doc.createElement('planet:subscription')
xlink = doc.createElement('link')
xlink.setAttribute('rel','self')
xlink.setAttribute('href',sub.decode('utf-8'))
xsub.appendChild(xlink)
xname = doc.createElement('planet:name')
xname.appendChild(doc.createTextNode(name.decode('utf-8')))
xsub.appendChild(xname)
feed.appendChild(xsub)
data=feedparser.parse(filename(sources,sub))
if not data.feed: continue
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
feed.appendChild(xdoc.documentElement)

return doc
1 change: 1 addition & 0 deletions spider.py 100644 → 100755
@@ -1,3 +1,4 @@
#!/usr/bin/env python
"""
Main program to run just the spider portion of planet
"""
Expand Down
1 change: 1 addition & 0 deletions splice.py 100644 → 100755
@@ -1,3 +1,4 @@
#!/usr/bin/env python
"""
Main program to run just the splice portion of planet
"""
Expand Down
3 changes: 3 additions & 0 deletions tests/data/spider/config.ini
Expand Up @@ -2,6 +2,9 @@
cache_directory = tests/work/spider/cache
template_files =

[tests/data/spider/testfeed0.atom]
name = not found

[tests/data/spider/testfeed1b.atom]
name = one

Expand Down
15 changes: 15 additions & 0 deletions tests/data/splice/cache/example.com,3
@@ -0,0 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>http://example.com/3</id>
<link href='http://example.com/3' type='text/html' rel='alternate'/>
<title>Earth</title>
<summary>the Blue Planet</summary>
<updated>2006-01-03T00:00:00Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
15 changes: 15 additions & 0 deletions tests/data/splice/cache/example.com,4
@@ -0,0 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>http://example.com/4</id>
<link href='http://example.com/4' type='text/html' rel='alternate'/>
<title>Mars</title>
<summary>the Red Planet</summary>
<updated>2006-08-18T18:30:50Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
15 changes: 15 additions & 0 deletions tests/data/splice/cache/planet.intertwingly.net,2006,testfeed3,1
@@ -0,0 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed3/1</id>
<link href='http://example.com/1' type='text/html' rel='alternate'/>
<title>Mercury</title>
<summary>Messenger of the Roman Gods</summary>
<updated>2006-01-01T00:00:00Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
15 changes: 15 additions & 0 deletions tests/data/splice/cache/planet.intertwingly.net,2006,testfeed3,2
@@ -0,0 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed3/2</id>
<link href='http://example.com/2' type='text/html' rel='alternate'/>
<title>Venus</title>
<summary>the Morning Star</summary>
<updated>2006-08-18T18:30:50Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
15 changes: 15 additions & 0 deletions tests/data/splice/cache/sources/tests,data,spider,testfeed1b.atom
@@ -0,0 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</feed>
15 changes: 15 additions & 0 deletions tests/data/splice/cache/sources/tests,data,spider,testfeed2.atom
@@ -0,0 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</feed>
@@ -0,0 +1,8 @@
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>It’s just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</feed>
5 changes: 5 additions & 0 deletions tests/data/splice/config.ini
Expand Up @@ -3,9 +3,14 @@ name = test planet
cache_directory = tests/data/splice/cache
template_files =

[tests/data/spider/testfeed0.atom]
name = not found

[tests/data/spider/testfeed1b.atom]
name = one

[tests/data/spider/testfeed2.atom]
name = two

[tests/data/spider/testfeed3.rss]
name = three

0 comments on commit 9fa9fb6

Please sign in to comment.