Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 237 lines (202 sloc) 7.444 kb
f28b73a @mhagander Initial version of new planet code
authored
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to generate output RSS and
5 HTML data from what's currently in the database.
6
47a5a62 @mhagander Header and (C) messages updates
authored
7 Copyright (C) 2008-2009 PostgreSQL Global Development Group
f28b73a @mhagander Initial version of new planet code
authored
8 """
9
10 import psycopg2
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
11 import psycopg2.extensions
f28b73a @mhagander Initial version of new planet code
authored
12 import PyRSS2Gen
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
13 import ConfigParser
f28b73a @mhagander Initial version of new planet code
authored
14 import datetime
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
15 import os.path
f28b73a @mhagander Initial version of new planet code
authored
16 import sys
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
17 import tidy
18 import urllib
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
19 from django.template import Context
20 from django.template.loader import get_template
21 from django.conf import settings
f28b73a @mhagander Initial version of new planet code
authored
22 from HTMLParser import HTMLParser
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
23 from planethtml import *
f28b73a @mhagander Initial version of new planet code
authored
24
25 class Generator:
04bd0b4 @mhagander Show link to twitter feed on the frontpage, if one is
authored
26 def __init__(self,cfg):
27 self.db = psycopg2.connect(cfg.get('planet','db'))
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
28 self.tidyopts = dict( drop_proprietary_attributes=1,
29 alt_text='',
30 hide_comments=1,
31 output_xhtml=1,
32 show_body_only=1,
33 clean=1,
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
34 char_encoding='utf8',
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
35 )
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
36 self.items = []
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
37 self.topposters = []
38 self.topteams = []
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
39 self.allposters = []
40 self.allteams = []
b84eef5 @mhagander Move policy page to the wiki
authored
41 self.staticfiles = ['add', ]
04bd0b4 @mhagander Show link to twitter feed on the frontpage, if one is
authored
42 if cfg.has_option('twitter','account'):
43 self.twittername = cfg.get('twitter','account')
44 else:
45 self.twittername = None
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
46
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
47 settings.configure(
48 TEMPLATE_DIRS=('template',),
49 )
f28b73a @mhagander Initial version of new planet code
authored
50
51 def Generate(self):
52 rss = PyRSS2Gen.RSS2(
53 title = 'Planet PostgreSQL',
54 link = 'http://planet.postgresql.org',
55 description = 'Planet PostgreSQL',
56 generator = 'Planet PostgreSQL',
57 lastBuildDate = datetime.datetime.utcnow())
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
58 rssshort = PyRSS2Gen.RSS2(
59 title = 'Planet PostgreSQL (short)',
60 link = 'http://planet.postgresql.org',
61 description = 'Planet PostgreSQL (short)',
62 generator = 'Planet PostgreSQL',
63 lastBuildDate = datetime.datetime.utcnow())
f28b73a @mhagander Initial version of new planet code
authored
64
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
65 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
66 self.db.set_client_encoding('UTF8')
f28b73a @mhagander Initial version of new planet code
authored
67 c = self.db.cursor()
68 c.execute("SET TIMEZONE=GMT")
114792a @mhagander Add support for team blogs
authored
69 c.execute("SELECT guid,link,dat,title,txt,planet.feeds.name,blogurl,guidisperma,planet.teams.name,planet.teams.teamurl FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed LEFT JOIN planet.teams ON planet.feeds.team = planet.teams.id WHERE planet.feeds.approved AND NOT planet.posts.hidden ORDER BY dat DESC LIMIT 30")
f28b73a @mhagander Initial version of new planet code
authored
70 for post in c.fetchall():
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
71 desc = self.TruncateAndCleanDescription(post[4])
f28b73a @mhagander Initial version of new planet code
authored
72 rss.items.append(PyRSS2Gen.RSSItem(
73 title=post[5] + ': ' + post[3],
74 link=post[1],
75 guid=PyRSS2Gen.Guid(post[0],post[7]),
76 pubDate=post[2],
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
77 description=post[4]))
78 rssshort.items.append(PyRSS2Gen.RSSItem(
79 title=post[5] + ': ' + post[3],
80 link=post[1],
81 guid=PyRSS2Gen.Guid(post[0],post[7]),
82 pubDate=post[2],
f28b73a @mhagander Initial version of new planet code
authored
83 description=desc))
114792a @mhagander Add support for team blogs
authored
84 self.items.append(PlanetPost(post[0], post[1], post[2], post[3], post[5], post[6], desc, post[8], post[9]))
f28b73a @mhagander Initial version of new planet code
authored
85
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
86 c.execute("""
24d593c @mhagander Update toplists of posts to:
authored
87 SELECT planet.feeds.name,blogurl,feedurl,count(*),planet.teams.name,planet.teams.teamurl,NULL FROM planet.feeds
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
88 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
24d593c @mhagander Update toplists of posts to:
authored
89 LEFT JOIN planet.teams ON planet.teams.id=planet.feeds.team
bccafe3 @mhagander Add field to make it possible to exclude a blog from the toplists, used
authored
90 WHERE age(dat) < '1 month' AND approved AND NOT hidden
91 AND NOT excludestats
92 GROUP BY planet.feeds.name,blogurl,feedurl,planet.teams.name,teamurl ORDER BY 4 DESC,1 LIMIT 20
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
93 """)
94
405f265 @mhagander Some python code-style changes
authored
95 self.topposters = [PlanetFeed(feed) for feed in c.fetchall()]
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
96 if len(self.topposters) < 2: self.topposters = []
97
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
98 c.execute("""
24d593c @mhagander Update toplists of posts to:
authored
99 SELECT NULL,NULL,NULL,NULL,planet.teams.name, teamurl, count(*) FROM
100 planet.feeds
101 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
102 INNER JOIN planet.teams ON planet.teams.id=planet.feeds.team
bccafe3 @mhagander Add field to make it possible to exclude a blog from the toplists, used
authored
103 WHERE age(dat) < '1 month' AND approved AND NOT hidden
104 AND NOT excludestats
105 GROUP BY planet.teams.name, teamurl ORDER BY 7 DESC, 1 LIMIT 10""")
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
106
405f265 @mhagander Some python code-style changes
authored
107 self.topteams = [PlanetFeed(feed) for feed in c.fetchall()]
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
108 if len(self.topteams) < 2: self.topteams = []
f28b73a @mhagander Initial version of new planet code
authored
109
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
110 c.execute("""
111 SELECT name,blogurl,feedurl,NULL,NULL,NULL,NULL FROM planet.feeds
112 WHERE approved AND team IS NULL ORDER BY name,blogurl
113 """)
114 self.allposters = [PlanetFeed(feed) for feed in c.fetchall()]
115 c.execute("""
116 SELECT feeds.name AS feedname,blogurl,feedurl,NULL,teams.name,teamurl,NULL
117 FROM planet.feeds INNER JOIN planet.teams ON planet.feeds.team=planet.teams.id
118 WHERE approved ORDER BY teams.name,feeds.name,blogurl
119 """)
120 self.allteams = [PlanetFeed(feed) for feed in c.fetchall()]
121
f28b73a @mhagander Initial version of new planet code
authored
122 rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
123 rssshort.write_xml(open("www/rss20_short.xml","w"), encoding='utf-8')
f28b73a @mhagander Initial version of new planet code
authored
124
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
125 self.WriteFromTemplate('index.tmpl', 'www/index.html')
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
126 self.WriteFromTemplate('feeds.tmpl', 'www/feeds.html')
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
127 for staticfile in self.staticfiles:
128 self.UpdateStaticFile(staticfile)
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
129
130 def WriteFromTemplate(self, templatename, outputname):
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
131 tmpl = get_template(templatename)
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
132 f = open(outputname, "w")
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
133 f.write(tmpl.render(Context({
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
134 'topposters': self.topposters,
135 'topteams': self.topteams,
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
136 'allposters': self.allposters,
137 'allteams': self.allteams,
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
138 'posts': self.items,
04bd0b4 @mhagander Show link to twitter feed on the frontpage, if one is
authored
139 'twittername': self.twittername,
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
140 })).encode('utf-8'))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
141 f.close()
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
142
143 def UpdateStaticFile(self, filename):
144 if not os.path.exists("www/%s.html" % (filename)) or \
145 os.path.getmtime("www/%s.html" % (filename)) < os.path.getmtime("template/%s.tmpl" % (filename)):
146 print "Updating %s.html" % (filename)
147 self.WriteFromTemplate("%s.tmpl" % (filename), "www/%s.html" % (filename))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
148
149
150 def TruncateAndCleanDescription(self, txt):
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
151 # First apply Tidy
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
152 txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **self.tidyopts)),'utf8')
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
153
154 # Then truncate as necessary
176dfc7 @mhagander Drop max length of post back down to 2048
authored
155 ht = HtmlTruncator(2048)
f28b73a @mhagander Initial version of new planet code
authored
156 ht.feed(txt)
157 out = ht.GetText()
158
159 # Remove initial <br /> tags
160 while out.startswith('<br'):
161 out = out[out.find('>')+1:]
162
163 return out
164
165 class HtmlTruncator(HTMLParser):
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
166 def __init__(self, maxlen):
f28b73a @mhagander Initial version of new planet code
authored
167 HTMLParser.__init__(self)
168 self.len = 0
169 self.maxlen = maxlen
170 self.fulltxt = ''
171 self.trunctxt = ''
172 self.tagstack = []
173 self.skiprest = False
174
175 def feed(self, txt):
176 txt = txt.lstrip()
177 self.fulltxt += txt
178 HTMLParser.feed(self, txt)
179
180 def handle_startendtag(self, tag, attrs):
181 if self.skiprest: return
182 self.trunctxt += self.get_starttag_text()
183
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
184 def quoteurl(self, str):
185 p = str.split(":",2)
186 return p[0] + ":" + urllib.quote(p[1])
187
188 def cleanhref(self, attrs):
189 if attrs[0] == 'href':
190 return 'href', self.quoteurl(attrs[1])
191 return attrs
192
f28b73a @mhagander Initial version of new planet code
authored
193 def handle_starttag(self, tag, attrs):
194 if self.skiprest: return
195 self.trunctxt += "<" + tag
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
196 self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
f28b73a @mhagander Initial version of new planet code
authored
197 self.trunctxt += ">"
198 self.tagstack.append(tag)
199
200 def handle_endtag(self, tag):
201 if self.skiprest: return
202 self.trunctxt += "</" + tag + ">"
203 self.tagstack.pop()
204
205 def handle_entityref(self, ref):
206 self.len += 1
207 if self.skiprest: return
208 self.trunctxt += "&" + ref + ";"
209
210 def handle_data(self, data):
211 self.len += len(data)
212 if self.skiprest: return
213 self.trunctxt += data
214 if self.len > self.maxlen:
215 # Passed max length, so truncate text as close to the limit as possible
216 self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
217
218 # Now append any tags that weren't properly closed
219 self.tagstack.reverse()
220 for tag in self.tagstack:
221 self.trunctxt += "</" + tag + ">"
222 self.skiprest = True
223
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
224 # Finally, append the continuation chars
225 self.trunctxt += "[...]"
226
f28b73a @mhagander Initial version of new planet code
authored
227 def GetText(self):
228 if self.len > self.maxlen:
229 return self.trunctxt
230 else:
231 return self.fulltxt
232
233 if __name__=="__main__":
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
234 c = ConfigParser.ConfigParser()
235 c.read('planet.ini')
04bd0b4 @mhagander Show link to twitter feed on the frontpage, if one is
authored
236 Generator(c).Generate()
Something went wrong with that request. Please try again.