Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 193 lines (162 sloc) 5.711 kb
f28b73a @mhagander Initial version of new planet code
authored
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to generate output RSS and
5 HTML data from what's currently in the database.
6
7 Copyright (C) 2008 PostgreSQL Global Development Group
8 """
9
10 import psycopg2
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
11 import psycopg2.extensions
f28b73a @mhagander Initial version of new planet code
authored
12 import PyRSS2Gen
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
13 import ConfigParser
f28b73a @mhagander Initial version of new planet code
authored
14 import datetime
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
15 import os.path
f28b73a @mhagander Initial version of new planet code
authored
16 import sys
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
17 import tidy
18 import urllib
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
19 from django.template import Context
20 from django.template.loader import get_template
21 from django.conf import settings
f28b73a @mhagander Initial version of new planet code
authored
22 from HTMLParser import HTMLParser
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
23 from planethtml import *
f28b73a @mhagander Initial version of new planet code
authored
24
25 class Generator:
26 def __init__(self,db):
27 self.db = db
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
28 self.tidyopts = dict( drop_proprietary_attributes=1,
29 alt_text='',
30 hide_comments=1,
31 output_xhtml=1,
32 show_body_only=1,
33 clean=1,
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
34 char_encoding='utf8',
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
35 )
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
36 self.items = []
37 self.feeds = []
1a42593 @mhagander Add a page specifically outlining how you add a blog
authored
38 self.staticfiles = ['policy','add']
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
39
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
40 settings.configure(
41 TEMPLATE_DIRS=('template',),
42 )
f28b73a @mhagander Initial version of new planet code
authored
43
44 def Generate(self):
45 rss = PyRSS2Gen.RSS2(
46 title = 'Planet PostgreSQL',
47 link = 'http://planet.postgresql.org',
48 description = 'Planet PostgreSQL',
49 generator = 'Planet PostgreSQL',
50 lastBuildDate = datetime.datetime.utcnow())
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
51 rssshort = PyRSS2Gen.RSS2(
52 title = 'Planet PostgreSQL (short)',
53 link = 'http://planet.postgresql.org',
54 description = 'Planet PostgreSQL (short)',
55 generator = 'Planet PostgreSQL',
56 lastBuildDate = datetime.datetime.utcnow())
f28b73a @mhagander Initial version of new planet code
authored
57
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
58 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
59 self.db.set_client_encoding('UTF8')
f28b73a @mhagander Initial version of new planet code
authored
60 c = self.db.cursor()
61 c.execute("SET TIMEZONE=GMT")
114792a @mhagander Add support for team blogs
authored
62 c.execute("SELECT guid,link,dat,title,txt,planet.feeds.name,blogurl,guidisperma,planet.teams.name,planet.teams.teamurl FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed LEFT JOIN planet.teams ON planet.feeds.team = planet.teams.id WHERE planet.feeds.approved AND NOT planet.posts.hidden ORDER BY dat DESC LIMIT 30")
f28b73a @mhagander Initial version of new planet code
authored
63 for post in c.fetchall():
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
64 desc = self.TruncateAndCleanDescription(post[4])
f28b73a @mhagander Initial version of new planet code
authored
65 rss.items.append(PyRSS2Gen.RSSItem(
66 title=post[5] + ': ' + post[3],
67 link=post[1],
68 guid=PyRSS2Gen.Guid(post[0],post[7]),
69 pubDate=post[2],
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
70 description=post[4]))
71 rssshort.items.append(PyRSS2Gen.RSSItem(
72 title=post[5] + ': ' + post[3],
73 link=post[1],
74 guid=PyRSS2Gen.Guid(post[0],post[7]),
75 pubDate=post[2],
f28b73a @mhagander Initial version of new planet code
authored
76 description=desc))
114792a @mhagander Add support for team blogs
authored
77 self.items.append(PlanetPost(post[0], post[1], post[2], post[3], post[5], post[6], desc, post[8], post[9]))
f28b73a @mhagander Initial version of new planet code
authored
78
88d5abe @mhagander Only list blogs that are approved in the blog-list on the right...
authored
79 c.execute("SELECT name,blogurl,feedurl FROM planet.feeds WHERE approved ORDER BY name")
f28b73a @mhagander Initial version of new planet code
authored
80 for feed in c.fetchall():
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
81 self.feeds.append(PlanetFeed(feed[0], feed[1], feed[2]))
f28b73a @mhagander Initial version of new planet code
authored
82
83 rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
83017b4 @mhagander Include full text of all feeds in the RSS feed.
authored
84 rssshort.write_xml(open("www/rss20_short.xml","w"), encoding='utf-8')
f28b73a @mhagander Initial version of new planet code
authored
85
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
86 self.WriteFromTemplate('index.tmpl', 'www/index.html')
87 for staticfile in self.staticfiles:
88 self.UpdateStaticFile(staticfile)
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
89
90 def WriteFromTemplate(self, templatename, outputname):
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
91 tmpl = get_template(templatename)
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
92 f = open(outputname, "w")
90b5201 @mhagander Switch to using the Django template engine, since we'll
authored
93 f.write(tmpl.render(Context({
94 'feeds': self.feeds,
95 'posts': self.items,
96 })).encode('utf-8'))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
97 f.close()
fef87f9 @mhagander Split template into a base piece shared for more pages, and a main pi…
authored
98
99 def UpdateStaticFile(self, filename):
100 if not os.path.exists("www/%s.html" % (filename)) or \
101 os.path.getmtime("www/%s.html" % (filename)) < os.path.getmtime("template/%s.tmpl" % (filename)):
102 print "Updating %s.html" % (filename)
103 self.WriteFromTemplate("%s.tmpl" % (filename), "www/%s.html" % (filename))
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
104
105
106 def TruncateAndCleanDescription(self, txt):
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
107 # First apply Tidy
40194b1 @mhagander Make tidy deal with utf8 properly. Oops.
authored
108 txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **self.tidyopts)),'utf8')
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
109
110 # Then truncate as necessary
47b2479 @mhagander Increase truncation limit to 4k, by popular demand.
authored
111 ht = HtmlTruncator(4096)
f28b73a @mhagander Initial version of new planet code
authored
112 ht.feed(txt)
113 out = ht.GetText()
114
115 # Remove initial <br /> tags
116 while out.startswith('<br'):
117 out = out[out.find('>')+1:]
118
119 return out
120
121 class HtmlTruncator(HTMLParser):
3899e80 @mhagander Switch to using templates to generate the HTML instead
authored
122 def __init__(self, maxlen):
f28b73a @mhagander Initial version of new planet code
authored
123 HTMLParser.__init__(self)
124 self.len = 0
125 self.maxlen = maxlen
126 self.fulltxt = ''
127 self.trunctxt = ''
128 self.tagstack = []
129 self.skiprest = False
130
131 def feed(self, txt):
132 txt = txt.lstrip()
133 self.fulltxt += txt
134 HTMLParser.feed(self, txt)
135
136 def handle_startendtag(self, tag, attrs):
137 if self.skiprest: return
138 self.trunctxt += self.get_starttag_text()
139
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
140 def quoteurl(self, str):
141 p = str.split(":",2)
142 return p[0] + ":" + urllib.quote(p[1])
143
144 def cleanhref(self, attrs):
145 if attrs[0] == 'href':
146 return 'href', self.quoteurl(attrs[1])
147 return attrs
148
f28b73a @mhagander Initial version of new planet code
authored
149 def handle_starttag(self, tag, attrs):
150 if self.skiprest: return
151 self.trunctxt += "<" + tag
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
152 self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
f28b73a @mhagander Initial version of new planet code
authored
153 self.trunctxt += ">"
154 self.tagstack.append(tag)
155
156 def handle_endtag(self, tag):
157 if self.skiprest: return
158 self.trunctxt += "</" + tag + ">"
159 self.tagstack.pop()
160
161 def handle_entityref(self, ref):
162 self.len += 1
163 if self.skiprest: return
164 self.trunctxt += "&" + ref + ";"
165
166 def handle_data(self, data):
167 self.len += len(data)
168 if self.skiprest: return
169 self.trunctxt += data
170 if self.len > self.maxlen:
171 # Passed max length, so truncate text as close to the limit as possible
172 self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
173
174 # Now append any tags that weren't properly closed
175 self.tagstack.reverse()
176 for tag in self.tagstack:
177 self.trunctxt += "</" + tag + ">"
178 self.skiprest = True
179
7a42825 @mhagander Use HTMLTidy and some attribute trickery to make output XHTML valid.
authored
180 # Finally, append the continuation chars
181 self.trunctxt += "[...]"
182
f28b73a @mhagander Initial version of new planet code
authored
183 def GetText(self):
184 if self.len > self.maxlen:
185 return self.trunctxt
186 else:
187 return self.fulltxt
188
189 if __name__=="__main__":
ce807cd @mhagander Read database from a configfile, so beta can now easily have
authored
190 c = ConfigParser.ConfigParser()
191 c.read('planet.ini')
192 Generator(psycopg2.connect(c.get('planet','db'))).Generate()
Something went wrong with that request. Please try again.