Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 236 lines (201 sloc) 7.605 kb
f28b73a @mhagander Initial version of new planet code
authored
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to generate output RSS and
5 HTML data from what's currently in the database.
6
7 Copyright (C) 2008 PostgreSQL Global Development Group
8 """
9
10 import psycopg2
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
11 import psycopg2.extensions
f28b73a @mhagander Initial version of new planet code
authored
12 import PyRSS2Gen
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
13 import ConfigParser
f28b73a @mhagander Initial version of new planet code
authored
14 import datetime
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
15 import os.path
f28b73a @mhagander Initial version of new planet code
authored
16 import sys
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
17 import tidy
18 import urllib
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
19 from django.template import Context
20 from django.template.loader import get_template
21 from django.conf import settings
f28b73a @mhagander Initial version of new planet code
authored
22 from HTMLParser import HTMLParser
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
23 from planethtml import *
f28b73a @mhagander Initial version of new planet code
authored
24
25 class Generator:
26 def __init__(self,db):
27 self.db = db
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
28 self.tidyopts = dict( drop_proprietary_attributes=1,
29 alt_text='',
30 hide_comments=1,
31 output_xhtml=1,
32 show_body_only=1,
33 clean=1,
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
34 char_encoding='utf8',
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
35 )
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
36 self.items = []
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
37 self.topposters = []
38 self.topteams = []
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
39 self.allposters = []
40 self.allteams = []
1a42593 @mhagander Add a page specifically outlining how you add a blog
authored
41 self.staticfiles = ['policy','add']
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
42
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
43 settings.configure(
44 TEMPLATE_DIRS=('template',),
45 )
f28b73a @mhagander Initial version of new planet code
authored
46
47 def Generate(self):
48 rss = PyRSS2Gen.RSS2(
49 title = 'Planet PostgreSQL',
50 link = 'http://planet.postgresql.org',
51 description = 'Planet PostgreSQL',
52 generator = 'Planet PostgreSQL',
53 lastBuildDate = datetime.datetime.utcnow())
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
54 rssshort = PyRSS2Gen.RSS2(
55 title = 'Planet PostgreSQL (short)',
56 link = 'http://planet.postgresql.org',
57 description = 'Planet PostgreSQL (short)',
58 generator = 'Planet PostgreSQL',
59 lastBuildDate = datetime.datetime.utcnow())
f28b73a @mhagander Initial version of new planet code
authored
60
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
61 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
62 self.db.set_client_encoding('UTF8')
f28b73a @mhagander Initial version of new planet code
authored
63 c = self.db.cursor()
64 c.execute("SET TIMEZONE=GMT")
114792a @mhagander Add support for team blogs
authored
65 c.execute("SELECT guid,link,dat,title,txt,planet.feeds.name,blogurl,guidisperma,planet.teams.name,planet.teams.teamurl FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed LEFT JOIN planet.teams ON planet.feeds.team = planet.teams.id WHERE planet.feeds.approved AND NOT planet.posts.hidden ORDER BY dat DESC LIMIT 30")
f28b73a @mhagander Initial version of new planet code
authored
66 for post in c.fetchall():
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
67 desc = self.TruncateAndCleanDescription(post[4])
f28b73a @mhagander Initial version of new planet code
authored
68 rss.items.append(PyRSS2Gen.RSSItem(
69 title=post[5] + ': ' + post[3],
70 link=post[1],
71 guid=PyRSS2Gen.Guid(post[0],post[7]),
72 pubDate=post[2],
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
73 description=post[4]))
74 rssshort.items.append(PyRSS2Gen.RSSItem(
75 title=post[5] + ': ' + post[3],
76 link=post[1],
77 guid=PyRSS2Gen.Guid(post[0],post[7]),
78 pubDate=post[2],
f28b73a @mhagander Initial version of new planet code
authored
79 description=desc))
114792a @mhagander Add support for team blogs
authored
80 self.items.append(PlanetPost(post[0], post[1], post[2], post[3], post[5], post[6], desc, post[8], post[9]))
f28b73a @mhagander Initial version of new planet code
authored
81
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
82 c.execute("""
83 SELECT name,blogurl,feedurl,count(*),NULL,NULL,NULL FROM planet.feeds
84 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
85 WHERE age(dat) < '1 month' AND team IS NULL AND approved AND NOT hidden GROUP BY name,blogurl,feedurl ORDER BY 4 DESC,1 LIMIT 20
86 """)
87
405f265 @mhagander Some python code-style changes
authored
88 self.topposters = [PlanetFeed(feed) for feed in c.fetchall()]
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
89 if len(self.topposters) < 2: self.topposters = []
90
e079220 @mhagander List which members are posting in a team, when a team is listed on
authored
91 c.execute("""
92 SELECT feedname,blogurl,feedurl,feedcount,teamname,teamurl,teamcount FROM
93 (SELECT team,name AS feedname,blogurl,feedurl,count(*) AS feedcount FROM planet.feeds
94 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
95 WHERE age(dat) < '1 month' AND approved AND NOT hidden GROUP BY team,name,blogurl,feedurl
96 ) AS q_feeds
97 INNER JOIN
98 (SELECT teams.id AS team,teams.name AS teamname,teams.teamurl,count(*) AS teamcount FROM planet.teams
99 INNER JOIN planet.feeds ON planet.feeds.team=planet.teams.id
100 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
101 WHERE age(dat) < '1 month' AND approved AND NOT hidden GROUP BY teams.id, teams.name, teams.teamurl ORDER BY 4 DESC,1 LIMIT 10
102 ) AS q_teams
103 ON q_feeds.team=q_teams.team
104 ORDER BY teamcount DESC, teamname, feedcount DESC, feedname;
105 """)
106
405f265 @mhagander Some python code-style changes
authored
107 self.topteams = [PlanetFeed(feed) for feed in c.fetchall()]
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
108 if len(self.topteams) < 2: self.topteams = []
f28b73a @mhagander Initial version of new planet code
authored
109
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
110 c.execute("""
111 SELECT name,blogurl,feedurl,NULL,NULL,NULL,NULL FROM planet.feeds
112 WHERE approved AND team IS NULL ORDER BY name,blogurl
113 """)
114 self.allposters = [PlanetFeed(feed) for feed in c.fetchall()]
115 c.execute("""
116 SELECT feeds.name AS feedname,blogurl,feedurl,NULL,teams.name,teamurl,NULL
117 FROM planet.feeds INNER JOIN planet.teams ON planet.feeds.team=planet.teams.id
118 WHERE approved ORDER BY teams.name,feeds.name,blogurl
119 """)
120 self.allteams = [PlanetFeed(feed) for feed in c.fetchall()]
121
f28b73a @mhagander Initial version of new planet code
authored
122 rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
123 rssshort.write_xml(open("www/rss20_short.xml","w"), encoding='utf-8')
f28b73a @mhagander Initial version of new planet code
authored
124
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
125 self.WriteFromTemplate('index.tmpl', 'www/index.html')
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
126 self.WriteFromTemplate('feeds.tmpl', 'www/feeds.html')
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
127 for staticfile in self.staticfiles:
128 self.UpdateStaticFile(staticfile)
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
129
130 def WriteFromTemplate(self, templatename, outputname):
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
131 tmpl = get_template(templatename)
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
132 f = open(outputname, "w")
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
133 f.write(tmpl.render(Context({
60dba60 @mhagander List top posters and top teams, instead of just listing all feeds.
authored
134 'topposters': self.topposters,
135 'topteams': self.topteams,
c37d1fc @mhagander Add a list of all subscribed blogs as a subpage, per request from
authored
136 'allposters': self.allposters,
137 'allteams': self.allteams,
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
138 'posts': self.items,
139 })).encode('utf-8'))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
140 f.close()
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
141
142 def UpdateStaticFile(self, filename):
143 if not os.path.exists("www/%s.html" % (filename)) or \
144 os.path.getmtime("www/%s.html" % (filename)) < os.path.getmtime("template/%s.tmpl" % (filename)):
145 print "Updating %s.html" % (filename)
146 self.WriteFromTemplate("%s.tmpl" % (filename), "www/%s.html" % (filename))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
147
148
149 def TruncateAndCleanDescription(self, txt):
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
150 # First apply Tidy
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
151 txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **self.tidyopts)),'utf8')
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
152
153 # Then truncate as necessary
47b2479 @mhagander Increase truncation limit to 4k, by popular demand.
authored
154 ht = HtmlTruncator(4096)
f28b73a @mhagander Initial version of new planet code
authored
155 ht.feed(txt)
156 out = ht.GetText()
157
158 # Remove initial <br /> tags
159 while out.startswith('<br'):
160 out = out[out.find('>')+1:]
161
162 return out
163
164 class HtmlTruncator(HTMLParser):
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
165 def __init__(self, maxlen):
f28b73a @mhagander Initial version of new planet code
authored
166 HTMLParser.__init__(self)
167 self.len = 0
168 self.maxlen = maxlen
169 self.fulltxt = ''
170 self.trunctxt = ''
171 self.tagstack = []
172 self.skiprest = False
173
174 def feed(self, txt):
175 txt = txt.lstrip()
176 self.fulltxt += txt
177 HTMLParser.feed(self, txt)
178
179 def handle_startendtag(self, tag, attrs):
180 if self.skiprest: return
181 self.trunctxt += self.get_starttag_text()
182
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
183 def quoteurl(self, str):
184 p = str.split(":",2)
185 return p[0] + ":" + urllib.quote(p[1])
186
187 def cleanhref(self, attrs):
188 if attrs[0] == 'href':
189 return 'href', self.quoteurl(attrs[1])
190 return attrs
191
f28b73a @mhagander Initial version of new planet code
authored
192 def handle_starttag(self, tag, attrs):
193 if self.skiprest: return
194 self.trunctxt += "<" + tag
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
195 self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
f28b73a @mhagander Initial version of new planet code
authored
196 self.trunctxt += ">"
197 self.tagstack.append(tag)
198
199 def handle_endtag(self, tag):
200 if self.skiprest: return
201 self.trunctxt += "</" + tag + ">"
202 self.tagstack.pop()
203
204 def handle_entityref(self, ref):
205 self.len += 1
206 if self.skiprest: return
207 self.trunctxt += "&" + ref + ";"
208
209 def handle_data(self, data):
210 self.len += len(data)
211 if self.skiprest: return
212 self.trunctxt += data
213 if self.len > self.maxlen:
214 # Passed max length, so truncate text as close to the limit as possible
215 self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
216
217 # Now append any tags that weren't properly closed
218 self.tagstack.reverse()
219 for tag in self.tagstack:
220 self.trunctxt += "</" + tag + ">"
221 self.skiprest = True
222
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
223 # Finally, append the continuation chars
224 self.trunctxt += "[...]"
225
f28b73a @mhagander Initial version of new planet code
authored
226 def GetText(self):
227 if self.len > self.maxlen:
228 return self.trunctxt
229 else:
230 return self.fulltxt
231
232 if __name__=="__main__":
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
233 c = ConfigParser.ConfigParser()
234 c.read('planet.ini')
235 Generator(psycopg2.connect(c.get('planet','db'))).Generate()
Something went wrong with that request. Please try again.