Skip to content
Newer
Older
100755 146 lines (119 sloc) 3.8 KB
f28b73a @mhagander Initial version of new planet code
authored
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to generate output RSS and
5 HTML data from what's currently in the database.
6
7 Copyright (C) 2008 PostgreSQL Global Development Group
8 """
9
10 import psycopg2
11 import PyRSS2Gen
12 import datetime
13 import sys
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
14 import tidy
15 import urllib
f28b73a @mhagander Initial version of new planet code
authored
16 from HTMLParser import HTMLParser
17 from planethtml import PlanetHtml
18
19 class Generator:
20 def __init__(self,db):
21 self.db = db
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
22 self.tidyopts = dict( drop_proprietary_attributes=1,
23 alt_text='',
24 hide_comments=1,
25 output_xhtml=1,
26 show_body_only=1,
27 clean=1,
28 )
29
f28b73a @mhagander Initial version of new planet code
authored
30
31 def Generate(self):
32 rss = PyRSS2Gen.RSS2(
33 title = 'Planet PostgreSQL',
34 link = 'http://planet.postgresql.org',
35 description = 'Planet PostgreSQL',
36 generator = 'Planet PostgreSQL',
37 lastBuildDate = datetime.datetime.utcnow())
38 html = PlanetHtml()
39
40 c = self.db.cursor()
41 c.execute("SET TIMEZONE=GMT")
42 c.execute("SELECT guid,link,dat,title,txt,name,blogurl,guidisperma FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed ORDER BY dat DESC LIMIT 30")
43 for post in c.fetchall():
44 desc = self.TruncateAndCleanDescription(post[4], post[3])
45 rss.items.append(PyRSS2Gen.RSSItem(
46 title=post[5] + ': ' + post[3],
47 link=post[1],
48 guid=PyRSS2Gen.Guid(post[0],post[7]),
49 pubDate=post[2],
50 description=desc))
51 html.AddItem(post[0], post[1], post[2], post[3], post[5], post[6], desc)
52
53 c.execute("SELECT name,blogurl,feedurl FROM planet.feeds ORDER BY name")
54 for feed in c.fetchall():
55 html.AddFeed(feed[0], feed[1], feed[2])
56
57 rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
58 html.WriteFile("www/index.html")
59
60 def TruncateAndCleanDescription(self, txt, title):
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
61 # First apply Tidy
62 txt = str(tidy.parseString(txt, **self.tidyopts))
63
64 # Then truncate as necessary
f28b73a @mhagander Initial version of new planet code
authored
65 ht = HtmlTruncator(1024, title)
66 ht.feed(txt)
67 out = ht.GetText()
68
69 # Remove initial <br /> tags
70 while out.startswith('<br'):
71 out = out[out.find('>')+1:]
72
73 return out
74
75 class HtmlTruncator(HTMLParser):
76 def __init__(self, maxlen, title = None):
77 HTMLParser.__init__(self)
78 self.len = 0
79 self.maxlen = maxlen
80 self.fulltxt = ''
81 self.trunctxt = ''
82 self.tagstack = []
83 self.skiprest = False
84 self.title = title
85
86 def feed(self, txt):
87 txt = txt.lstrip()
88 self.fulltxt += txt
89 HTMLParser.feed(self, txt)
90
91 def handle_startendtag(self, tag, attrs):
92 if self.skiprest: return
93 self.trunctxt += self.get_starttag_text()
94
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
95 def quoteurl(self, str):
96 p = str.split(":",2)
97 return p[0] + ":" + urllib.quote(p[1])
98
99 def cleanhref(self, attrs):
100 if attrs[0] == 'href':
101 return 'href', self.quoteurl(attrs[1])
102 return attrs
103
f28b73a @mhagander Initial version of new planet code
authored
104 def handle_starttag(self, tag, attrs):
105 if self.skiprest: return
106 self.trunctxt += "<" + tag
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
107 self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
f28b73a @mhagander Initial version of new planet code
authored
108 self.trunctxt += ">"
109 self.tagstack.append(tag)
110
111 def handle_endtag(self, tag):
112 if self.skiprest: return
113 self.trunctxt += "</" + tag + ">"
114 self.tagstack.pop()
115
116 def handle_entityref(self, ref):
117 self.len += 1
118 if self.skiprest: return
119 self.trunctxt += "&" + ref + ";"
120
121 def handle_data(self, data):
122 self.len += len(data)
123 if self.skiprest: return
124 self.trunctxt += data
125 if self.len > self.maxlen:
126 # Passed max length, so truncate text as close to the limit as possible
127 self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
128
129 # Now append any tags that weren't properly closed
130 self.tagstack.reverse()
131 for tag in self.tagstack:
132 self.trunctxt += "</" + tag + ">"
133 self.skiprest = True
134
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
135 # Finally, append the continuation chars
136 self.trunctxt += "[...]"
137
f28b73a @mhagander Initial version of new planet code
authored
138 def GetText(self):
139 if self.len > self.maxlen:
140 return self.trunctxt
141 else:
142 return self.fulltxt
143
144 if __name__=="__main__":
145 Generator(psycopg2.connect('dbname=planetpg host=/tmp')).Generate()
Something went wrong with that request. Please try again.