Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 162 lines (132 sloc) 4.467 kb
f28b73a @mhagander Initial version of new planet code
authored
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to generate output RSS and
5 HTML data from what's currently in the database.
6
7 Copyright (C) 2008 PostgreSQL Global Development Group
8 """
9
10 import psycopg2
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
11 import psycopg2.extensions
f28b73a @mhagander Initial version of new planet code
authored
12 import PyRSS2Gen
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
13 import ConfigParser
f28b73a @mhagander Initial version of new planet code
authored
14 import datetime
15 import sys
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
16 import tidy
17 import urllib
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
18 from mako.template import Template
f28b73a @mhagander Initial version of new planet code
authored
19 from HTMLParser import HTMLParser
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
20 from planethtml import *
f28b73a @mhagander Initial version of new planet code
authored
21
22 class Generator:
23 def __init__(self,db):
24 self.db = db
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
25 self.tidyopts = dict( drop_proprietary_attributes=1,
26 alt_text='',
27 hide_comments=1,
28 output_xhtml=1,
29 show_body_only=1,
30 clean=1,
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
31 char_encoding='utf8',
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
32 )
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
33 self.items = []
34 self.feeds = []
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
35
f28b73a @mhagander Initial version of new planet code
authored
36
37 def Generate(self):
38 rss = PyRSS2Gen.RSS2(
39 title = 'Planet PostgreSQL',
40 link = 'http://planet.postgresql.org',
41 description = 'Planet PostgreSQL',
42 generator = 'Planet PostgreSQL',
43 lastBuildDate = datetime.datetime.utcnow())
44
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
45 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
46 self.db.set_client_encoding('UTF8')
f28b73a @mhagander Initial version of new planet code
authored
47 c = self.db.cursor()
48 c.execute("SET TIMEZONE=GMT")
49 c.execute("SELECT guid,link,dat,title,txt,name,blogurl,guidisperma FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed ORDER BY dat DESC LIMIT 30")
50 for post in c.fetchall():
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
51 desc = self.TruncateAndCleanDescription(post[4])
f28b73a @mhagander Initial version of new planet code
authored
52 rss.items.append(PyRSS2Gen.RSSItem(
53 title=post[5] + ': ' + post[3],
54 link=post[1],
55 guid=PyRSS2Gen.Guid(post[0],post[7]),
56 pubDate=post[2],
57 description=desc))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
58 self.items.append(PlanetPost(post[0], post[1], post[2], post[3], post[5], post[6], desc))
f28b73a @mhagander Initial version of new planet code
authored
59
60 c.execute("SELECT name,blogurl,feedurl FROM planet.feeds ORDER BY name")
61 for feed in c.fetchall():
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
62 self.feeds.append(PlanetFeed(feed[0], feed[1], feed[2]))
f28b73a @mhagander Initial version of new planet code
authored
63
64 rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
65
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
66 self.WriteFromTemplate('template/index.tmpl', 'www/index.html')
67
68 def WriteFromTemplate(self, templatename, outputname):
69 tmpl = Template(filename=templatename, output_encoding='utf-8', input_encoding='utf-8')
70 f = open(outputname, "w")
71 f.write(tmpl.render_unicode(feeds=self.feeds, posts=self.items).encode('utf-8'))
72 f.close()
73
74
75 def TruncateAndCleanDescription(self, txt):
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
76 # First apply Tidy
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
77 txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **self.tidyopts)),'utf8')
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
78
79 # Then truncate as necessary
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
80 ht = HtmlTruncator(1024)
f28b73a @mhagander Initial version of new planet code
authored
81 ht.feed(txt)
82 out = ht.GetText()
83
84 # Remove initial <br /> tags
85 while out.startswith('<br'):
86 out = out[out.find('>')+1:]
87
88 return out
89
90 class HtmlTruncator(HTMLParser):
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
91 def __init__(self, maxlen):
f28b73a @mhagander Initial version of new planet code
authored
92 HTMLParser.__init__(self)
93 self.len = 0
94 self.maxlen = maxlen
95 self.fulltxt = ''
96 self.trunctxt = ''
97 self.tagstack = []
98 self.skiprest = False
99
100 def feed(self, txt):
101 txt = txt.lstrip()
102 self.fulltxt += txt
103 HTMLParser.feed(self, txt)
104
105 def handle_startendtag(self, tag, attrs):
106 if self.skiprest: return
107 self.trunctxt += self.get_starttag_text()
108
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
109 def quoteurl(self, str):
110 p = str.split(":",2)
111 return p[0] + ":" + urllib.quote(p[1])
112
113 def cleanhref(self, attrs):
114 if attrs[0] == 'href':
115 return 'href', self.quoteurl(attrs[1])
116 return attrs
117
f28b73a @mhagander Initial version of new planet code
authored
118 def handle_starttag(self, tag, attrs):
119 if self.skiprest: return
120 self.trunctxt += "<" + tag
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
121 self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
f28b73a @mhagander Initial version of new planet code
authored
122 self.trunctxt += ">"
123 self.tagstack.append(tag)
124
125 def handle_endtag(self, tag):
126 if self.skiprest: return
127 self.trunctxt += "</" + tag + ">"
128 self.tagstack.pop()
129
130 def handle_entityref(self, ref):
131 self.len += 1
132 if self.skiprest: return
133 self.trunctxt += "&" + ref + ";"
134
135 def handle_data(self, data):
136 self.len += len(data)
137 if self.skiprest: return
138 self.trunctxt += data
139 if self.len > self.maxlen:
140 # Passed max length, so truncate text as close to the limit as possible
141 self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
142
143 # Now append any tags that weren't properly closed
144 self.tagstack.reverse()
145 for tag in self.tagstack:
146 self.trunctxt += "</" + tag + ">"
147 self.skiprest = True
148
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
149 # Finally, append the continuation chars
150 self.trunctxt += "[...]"
151
f28b73a @mhagander Initial version of new planet code
authored
152 def GetText(self):
153 if self.len > self.maxlen:
154 return self.trunctxt
155 else:
156 return self.fulltxt
157
158 if __name__=="__main__":
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
159 c = ConfigParser.ConfigParser()
160 c.read('planet.ini')
161 Generator(psycopg2.connect(c.get('planet','db'))).Generate()
Something went wrong with that request. Please try again.