From 1aadf830c2ff9e5fe9c935683e8dff188e3dccfb Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Thu, 20 Nov 2008 08:59:27 +0000 Subject: [PATCH] Fixes and improvements to Serendipity uploader. With this commit, the serendipity uploader is now (as far as I know), fully functional. --- dev/scripts/drupal_uploader.py | 80 +++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/dev/scripts/drupal_uploader.py b/dev/scripts/drupal_uploader.py index 26643c7..659b13b 100755 --- a/dev/scripts/drupal_uploader.py +++ b/dev/scripts/drupal_uploader.py @@ -237,6 +237,7 @@ def go(self, num_articles = None): # Get all articles self.redirect = {} # Keys are legacy IDs and maps to permalink + article_count = 0 for article in self.get_articles(): article = self.get_article_tags(article) @@ -253,10 +254,14 @@ def go(self, num_articles = None): comment_posting_url = self.app_url + entry_permalink for comment in self.get_article_comments(article): - print "Posting comment '" + row[0] + "' to", \ - comment_posting_url + print ("Posting comment '%s' to %s" + % (comment['title'], comment_posting_url)) self.webserver.post(comment_posting_url, comment) + article_count += 1 + if num_articles and article_count >= num_articles: + break + # create_python_routing from url_alias table f = open('legacy_aliases.py', 'w') print >>f, "redirects = {" @@ -272,15 +277,15 @@ def get_articles(self): def get_article_tags(self, article): """Annotates an article with tags.""" - return - + return article + def get_article_comments(self, article): """Returns an iterable of comments associated with an article.""" - return + return [] def get_redirects(self): """Returns an iterable of (src, dest) redirect tuples.""" - return + return [] class SerendipityConverter(BlogConverter): @@ -294,7 +299,7 @@ def get_articles(self): article['legacy_id'] = row[0] article['title'] = force_singleline(row[1]) article['format'] = None - article['body'] = row[4] + article['body'] = re.sub('\n', '
', row[4]) article['html'] = article['body'] article['format'] = 'html' published = datetime.datetime.fromtimestamp(row[2]) @@ -303,8 +308,6 @@ def get_articles(self): article['updated'] = str(last_modified) article['post_url'] = '/%s/%s/' % (published.year, published.month) yield article - if num_articles and len(articles) >= num_articles: - break def get_article_tags(self, article): article_tags = set() @@ -317,37 +320,44 @@ def get_article_tags(self, article): while tag: article_tags.add(tag['name']) tag = self.tags.get(tag['parent'], None) - article['tags'] = ','.join(tag_names) + article['tags'] = ','.join(article_tags) + return article def get_article_comments(self, article): self.cursor.execute("SELECT entry_id, id, parent_id, title, body, " "timestamp, author, email, url FROM %scomments " - "WHERE entry_id = %s ORDER BY entry_id, parent_id" + "WHERE entry_id = %s ORDER BY entry_id,parent_id,id" % (self.table_prefix, article['legacy_id'])) rows = self.cursor.fetchall() - current_entry_id = None - comments = {'0': { 'children': []}} + comments = {0: { 'children': []}} + thread_id_ctr = 0 for row in rows: - if current_entry_id != row[0]: - current_entry_id = row[0] - stack = [((x[1],), x) for x in comments['0']['children']] - while stack: - thread, entry = stack.pop() - yield { - 'title': entry[3], - 'body': entry[4], - 'published': str(datetime.datetime.fromtimestamp(row[5])), - 'thread': '.'.join('%03d' % x for x in thread), - 'name': entry[6], - 'email': entry[7], - 'homepage': entry[8], - } - stack.extend((thread + (entry[1],), x) - for x in comments[entry[1]]) - comments[row[1]] = {'data': row, 'children': []} - if row[2] > 0: - comments[row[2]]['children'].append(row) - + comments[row[1]] = { + 'data': row, + 'children': [], + 'thread_id': thread_id_ctr + } + comments[row[2]]['children'].append(row[1]) + thread_id_ctr += 1 + + stack = [] + for i in comments[0]['children']: + stack.append(((comments[i]['thread_id'],), comments[i])) + while stack: + thread, entry = stack.pop() + data = entry['data'] + yield { + 'title': data[3], + 'body': re.sub('\n', '
', data[4]), + 'published': str(datetime.datetime.fromtimestamp(data[5])), + 'thread': '.'.join('%03d' % x for x in thread), + 'name': data[6], + 'email': data[7], + 'homepage': data[8], + } + for i in comments[data[1]]['children']: + stack.append((thread + (comments[i]['thread_id'],), + comments[i])) def go(self, num_articles=None): self.cursor.execute("SELECT categoryid, parentid, category_name" @@ -361,7 +371,7 @@ def go(self, num_articles=None): 'name': row[2], } - super(DrupalConverter, self).go(num_articles) + super(SerendipityConverter, self).go(num_articles) class DrupalConverter(BlogConverter): @@ -438,8 +448,6 @@ def get_articles(self): else: article['post_url'] = '/' yield article - if num_articles and len(articles) >= num_articles: - break else: print "Rejected article with title (", \ article['title'], ") because bad format."