Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 220 lines (187 sloc) 6.959 kb
f28b73a2 »
2008-10-18 Initial version of new planet code
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to generate output RSS and
5 HTML data from what's currently in the database.
6
7 Copyright (C) 2008 PostgreSQL Global Development Group
8 """
9
10 import psycopg2
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
11 import psycopg2.extensions
f28b73a2 »
2008-10-18 Initial version of new planet code
12 import PyRSS2Gen
ce807cda »
2008-10-24 Read database from a configfile, so beta can now easily have
13 import ConfigParser
f28b73a2 »
2008-10-18 Initial version of new planet code
14 import datetime
fef87f9f »
2008-10-25 Split template into a base piece shared for more pages, and a main pi…
15 import os.path
f28b73a2 »
2008-10-18 Initial version of new planet code
16 import sys
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
17 import tidy
18 import urllib
90b5201d »
2008-11-03 Switch to using the Django template engine, since we'll
19 from django.template import Context
20 from django.template.loader import get_template
21 from django.conf import settings
f28b73a2 »
2008-10-18 Initial version of new planet code
22 from HTMLParser import HTMLParser
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
23 from planethtml import *
f28b73a2 »
2008-10-18 Initial version of new planet code
24
25 class Generator:
26 def __init__(self,db):
27 self.db = db
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
28 self.tidyopts = dict( drop_proprietary_attributes=1,
29 alt_text='',
30 hide_comments=1,
31 output_xhtml=1,
32 show_body_only=1,
33 clean=1,
40194b17 »
2008-10-25 Make tidy deal with utf8 properly. Oops.
34 char_encoding='utf8',
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
35 )
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
36 self.items = []
60dba604 »
2009-05-22 List top posters and top teams, instead of just listing all feeds.
37 self.topposters = []
38 self.topteams = []
1a42593c »
2009-03-01 Add a page specifically outlining how you add a blog
39 self.staticfiles = ['policy','add']
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
40
90b5201d »
2008-11-03 Switch to using the Django template engine, since we'll
41 settings.configure(
42 TEMPLATE_DIRS=('template',),
43 )
f28b73a2 »
2008-10-18 Initial version of new planet code
44
45 def Generate(self):
46 rss = PyRSS2Gen.RSS2(
47 title = 'Planet PostgreSQL',
48 link = 'http://planet.postgresql.org',
49 description = 'Planet PostgreSQL',
50 generator = 'Planet PostgreSQL',
51 lastBuildDate = datetime.datetime.utcnow())
83017b42 »
2008-11-03 Include full text of all feeds in the RSS feed.
52 rssshort = PyRSS2Gen.RSS2(
53 title = 'Planet PostgreSQL (short)',
54 link = 'http://planet.postgresql.org',
55 description = 'Planet PostgreSQL (short)',
56 generator = 'Planet PostgreSQL',
57 lastBuildDate = datetime.datetime.utcnow())
f28b73a2 »
2008-10-18 Initial version of new planet code
58
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
59 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
60 self.db.set_client_encoding('UTF8')
f28b73a2 »
2008-10-18 Initial version of new planet code
61 c = self.db.cursor()
62 c.execute("SET TIMEZONE=GMT")
114792a8 »
2009-05-18 Add support for team blogs
63 c.execute("SELECT guid,link,dat,title,txt,planet.feeds.name,blogurl,guidisperma,planet.teams.name,planet.teams.teamurl FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed LEFT JOIN planet.teams ON planet.feeds.team = planet.teams.id WHERE planet.feeds.approved AND NOT planet.posts.hidden ORDER BY dat DESC LIMIT 30")
f28b73a2 »
2008-10-18 Initial version of new planet code
64 for post in c.fetchall():
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
65 desc = self.TruncateAndCleanDescription(post[4])
f28b73a2 »
2008-10-18 Initial version of new planet code
66 rss.items.append(PyRSS2Gen.RSSItem(
67 title=post[5] + ': ' + post[3],
68 link=post[1],
69 guid=PyRSS2Gen.Guid(post[0],post[7]),
70 pubDate=post[2],
83017b42 »
2008-11-03 Include full text of all feeds in the RSS feed.
71 description=post[4]))
72 rssshort.items.append(PyRSS2Gen.RSSItem(
73 title=post[5] + ': ' + post[3],
74 link=post[1],
75 guid=PyRSS2Gen.Guid(post[0],post[7]),
76 pubDate=post[2],
f28b73a2 »
2008-10-18 Initial version of new planet code
77 description=desc))
114792a8 »
2009-05-18 Add support for team blogs
78 self.items.append(PlanetPost(post[0], post[1], post[2], post[3], post[5], post[6], desc, post[8], post[9]))
f28b73a2 »
2008-10-18 Initial version of new planet code
79
e079220f »
2009-06-22 List which members are posting in a team, when a team is listed on
80 c.execute("""
81 SELECT name,blogurl,feedurl,count(*),NULL,NULL,NULL FROM planet.feeds
82 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
83 WHERE age(dat) < '1 month' AND team IS NULL AND approved AND NOT hidden GROUP BY name,blogurl,feedurl ORDER BY 4 DESC,1 LIMIT 20
84 """)
85
f28b73a2 »
2008-10-18 Initial version of new planet code
86 for feed in c.fetchall():
60dba604 »
2009-05-22 List top posters and top teams, instead of just listing all feeds.
87 self.topposters.append(PlanetFeed(feed))
88 if len(self.topposters) < 2: self.topposters = []
89
e079220f »
2009-06-22 List which members are posting in a team, when a team is listed on
90 c.execute("""
91 SELECT feedname,blogurl,feedurl,feedcount,teamname,teamurl,teamcount FROM
92 (SELECT team,name AS feedname,blogurl,feedurl,count(*) AS feedcount FROM planet.feeds
93 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
94 WHERE age(dat) < '1 month' AND approved AND NOT hidden GROUP BY team,name,blogurl,feedurl
95 ) AS q_feeds
96 INNER JOIN
97 (SELECT teams.id AS team,teams.name AS teamname,teams.teamurl,count(*) AS teamcount FROM planet.teams
98 INNER JOIN planet.feeds ON planet.feeds.team=planet.teams.id
99 INNER JOIN planet.posts ON planet.feeds.id=planet.posts.feed
100 WHERE age(dat) < '1 month' AND approved AND NOT hidden GROUP BY teams.id, teams.name, teams.teamurl ORDER BY 4 DESC,1 LIMIT 10
101 ) AS q_teams
102 ON q_feeds.team=q_teams.team
103 ORDER BY teamcount DESC, teamname, feedcount DESC, feedname;
104 """)
105
60dba604 »
2009-05-22 List top posters and top teams, instead of just listing all feeds.
106 for feed in c.fetchall():
107 self.topteams.append(PlanetFeed(feed))
108 if len(self.topteams) < 2: self.topteams = []
f28b73a2 »
2008-10-18 Initial version of new planet code
109
110 rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
83017b42 »
2008-11-03 Include full text of all feeds in the RSS feed.
111 rssshort.write_xml(open("www/rss20_short.xml","w"), encoding='utf-8')
f28b73a2 »
2008-10-18 Initial version of new planet code
112
fef87f9f »
2008-10-25 Split template into a base piece shared for more pages, and a main pi…
113 self.WriteFromTemplate('index.tmpl', 'www/index.html')
114 for staticfile in self.staticfiles:
115 self.UpdateStaticFile(staticfile)
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
116
117 def WriteFromTemplate(self, templatename, outputname):
90b5201d »
2008-11-03 Switch to using the Django template engine, since we'll
118 tmpl = get_template(templatename)
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
119 f = open(outputname, "w")
90b5201d »
2008-11-03 Switch to using the Django template engine, since we'll
120 f.write(tmpl.render(Context({
60dba604 »
2009-05-22 List top posters and top teams, instead of just listing all feeds.
121 'topposters': self.topposters,
122 'topteams': self.topteams,
90b5201d »
2008-11-03 Switch to using the Django template engine, since we'll
123 'posts': self.items,
124 })).encode('utf-8'))
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
125 f.close()
fef87f9f »
2008-10-25 Split template into a base piece shared for more pages, and a main pi…
126
127 def UpdateStaticFile(self, filename):
128 if not os.path.exists("www/%s.html" % (filename)) or \
129 os.path.getmtime("www/%s.html" % (filename)) < os.path.getmtime("template/%s.tmpl" % (filename)):
130 print "Updating %s.html" % (filename)
131 self.WriteFromTemplate("%s.tmpl" % (filename), "www/%s.html" % (filename))
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
132
133
134 def TruncateAndCleanDescription(self, txt):
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
135 # First apply Tidy
40194b17 »
2008-10-25 Make tidy deal with utf8 properly. Oops.
136 txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **self.tidyopts)),'utf8')
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
137
138 # Then truncate as necessary
47b24791 »
2008-12-01 Increase truncation limit to 4k, by popular demand.
139 ht = HtmlTruncator(4096)
f28b73a2 »
2008-10-18 Initial version of new planet code
140 ht.feed(txt)
141 out = ht.GetText()
142
143 # Remove initial <br /> tags
144 while out.startswith('<br'):
145 out = out[out.find('>')+1:]
146
147 return out
148
149 class HtmlTruncator(HTMLParser):
3899e805 »
2008-10-23 Switch to using templates to generate the HTML instead
150 def __init__(self, maxlen):
f28b73a2 »
2008-10-18 Initial version of new planet code
151 HTMLParser.__init__(self)
152 self.len = 0
153 self.maxlen = maxlen
154 self.fulltxt = ''
155 self.trunctxt = ''
156 self.tagstack = []
157 self.skiprest = False
158
159 def feed(self, txt):
160 txt = txt.lstrip()
161 self.fulltxt += txt
162 HTMLParser.feed(self, txt)
163
164 def handle_startendtag(self, tag, attrs):
165 if self.skiprest: return
166 self.trunctxt += self.get_starttag_text()
167
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
168 def quoteurl(self, str):
169 p = str.split(":",2)
170 return p[0] + ":" + urllib.quote(p[1])
171
172 def cleanhref(self, attrs):
173 if attrs[0] == 'href':
174 return 'href', self.quoteurl(attrs[1])
175 return attrs
176
f28b73a2 »
2008-10-18 Initial version of new planet code
177 def handle_starttag(self, tag, attrs):
178 if self.skiprest: return
179 self.trunctxt += "<" + tag
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
180 self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
f28b73a2 »
2008-10-18 Initial version of new planet code
181 self.trunctxt += ">"
182 self.tagstack.append(tag)
183
184 def handle_endtag(self, tag):
185 if self.skiprest: return
186 self.trunctxt += "</" + tag + ">"
187 self.tagstack.pop()
188
189 def handle_entityref(self, ref):
190 self.len += 1
191 if self.skiprest: return
192 self.trunctxt += "&" + ref + ";"
193
194 def handle_data(self, data):
195 self.len += len(data)
196 if self.skiprest: return
197 self.trunctxt += data
198 if self.len > self.maxlen:
199 # Passed max length, so truncate text as close to the limit as possible
200 self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
201
202 # Now append any tags that weren't properly closed
203 self.tagstack.reverse()
204 for tag in self.tagstack:
205 self.trunctxt += "</" + tag + ">"
206 self.skiprest = True
207
7a428250 »
2008-10-22 Use HTMLTidy and some attribute trickery to make output XHTML valid.
208 # Finally, append the continuation chars
209 self.trunctxt += "[...]"
210
f28b73a2 »
2008-10-18 Initial version of new planet code
211 def GetText(self):
212 if self.len > self.maxlen:
213 return self.trunctxt
214 else:
215 return self.fulltxt
216
217 if __name__=="__main__":
ce807cda »
2008-10-24 Read database from a configfile, so beta can now easily have
218 c = ConfigParser.ConfigParser()
219 c.read('planet.ini')
220 Generator(psycopg2.connect(c.get('planet','db'))).Generate()
Something went wrong with that request. Please try again.