Skip to content

Commit

Permalink
Use HTMLTidy and some attribute trickery to make output XHTML valid.
Browse files Browse the repository at this point in the history
Change to XHTML Transitional, because that's really what it is.

Fix issue where [...] appendings weren't always rewritten to proper
HTML links in the HTML output.
  • Loading branch information
mhagander committed Oct 22, 2008
1 parent da9733f commit 7a42825
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
33 changes: 27 additions & 6 deletions generator.py
Expand Up @@ -11,12 +11,22 @@
import PyRSS2Gen import PyRSS2Gen
import datetime import datetime
import sys import sys
import tidy
import urllib
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from planethtml import PlanetHtml from planethtml import PlanetHtml


class Generator: class Generator:
def __init__(self,db): def __init__(self,db):
self.db = db self.db = db
self.tidyopts = dict( drop_proprietary_attributes=1,
alt_text='',
hide_comments=1,
output_xhtml=1,
show_body_only=1,
clean=1,
)



def Generate(self): def Generate(self):
rss = PyRSS2Gen.RSS2( rss = PyRSS2Gen.RSS2(
Expand Down Expand Up @@ -48,6 +58,10 @@ def Generate(self):
html.WriteFile("www/index.html") html.WriteFile("www/index.html")


def TruncateAndCleanDescription(self, txt, title): def TruncateAndCleanDescription(self, txt, title):
# First apply Tidy
txt = str(tidy.parseString(txt, **self.tidyopts))

# Then truncate as necessary
ht = HtmlTruncator(1024, title) ht = HtmlTruncator(1024, title)
ht.feed(txt) ht.feed(txt)
out = ht.GetText() out = ht.GetText()
Expand Down Expand Up @@ -78,10 +92,19 @@ def handle_startendtag(self, tag, attrs):
if self.skiprest: return if self.skiprest: return
self.trunctxt += self.get_starttag_text() self.trunctxt += self.get_starttag_text()


def quoteurl(self, str):
p = str.split(":",2)
return p[0] + ":" + urllib.quote(p[1])

def cleanhref(self, attrs):
if attrs[0] == 'href':
return 'href', self.quoteurl(attrs[1])
return attrs

def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if self.skiprest: return if self.skiprest: return
self.trunctxt += "<" + tag self.trunctxt += "<" + tag
self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in attrs])) self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
self.trunctxt += ">" self.trunctxt += ">"
self.tagstack.append(tag) self.tagstack.append(tag)


Expand All @@ -102,18 +125,16 @@ def handle_data(self, data):
if self.len > self.maxlen: if self.len > self.maxlen:
# Passed max length, so truncate text as close to the limit as possible # Passed max length, so truncate text as close to the limit as possible
self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)] self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
# Terminate at whitespace if possible, max 12 chars back
for i in range(len(self.trunctxt)-1, len(self.trunctxt)-12, -1):
if self.trunctxt[i].isspace():
self.trunctxt = self.trunctxt[0:i] + " [...]"
break


# Now append any tags that weren't properly closed # Now append any tags that weren't properly closed
self.tagstack.reverse() self.tagstack.reverse()
for tag in self.tagstack: for tag in self.tagstack:
self.trunctxt += "</" + tag + ">" self.trunctxt += "</" + tag + ">"
self.skiprest = True self.skiprest = True


# Finally, append the continuation chars
self.trunctxt += "[...]"

def GetText(self): def GetText(self):
if self.len > self.maxlen: if self.len > self.maxlen:
return self.trunctxt return self.trunctxt
Expand Down
6 changes: 3 additions & 3 deletions planethtml.py
Expand Up @@ -15,8 +15,8 @@ class PlanetHtml:
def __init__(self): def __init__(self):
self.items = [] self.items = []
self.feeds = [] self.feeds = []
self.str = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" self.str = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en" dir="ltr"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en" dir="ltr">
<head> <head>
<title>Planet PostgreSQL</title> <title>Planet PostgreSQL</title>
Expand Down Expand Up @@ -46,7 +46,7 @@ def BuildPosts(self):
lastdate = None lastdate = None
for post in self.items: for post in self.items:
if post[6].endswith('[...]'): if post[6].endswith('[...]'):
txt = post[6][:len(post[6])-4] + """<a href="%s">continue reading...</a>]""" % (post[1]) txt = post[6][:len(post[6])-5] + """<p>[<a href="%s">continue reading...</a>]</p>""" % (post[1])
else: else:
txt = post[6] txt = post[6]


Expand Down

0 comments on commit 7a42825

Please sign in to comment.