Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Commit

Permalink
PEP8 code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
lmorchard committed Feb 13, 2012
1 parent ef351ef commit cb4613d
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 79 deletions.
88 changes: 49 additions & 39 deletions apps/dekicompat/management/commands/migrate_to_kuma_wiki.py
Expand Up @@ -48,7 +48,7 @@
MT_REDIR_PAT = re.compile(r"""^#REDIRECT ?\[\[([^\]]+)\]\]""")

# See also: https://github.com/mozilla/kuma/blob/mdn/apps/devmo/models.py#L327
# I'd just import from there, but wanted to do this a little differently
# I'd just import from there, but wanted to do this a little differently
MT_NAMESPACES = (
('', 0),
('Talk:', 1),
Expand Down Expand Up @@ -118,7 +118,7 @@ class Command(BaseCommand):

make_option('--wipe', action="store_true", dest="wipe", default=False,
help="Wipe all documents before migration"),

make_option('--all', action="store_true", dest="all", default=False,
help="Migrate all documents"),
make_option('--slug', dest="slug", default=None,
Expand All @@ -142,7 +142,7 @@ class Command(BaseCommand):
make_option('--maxlength', dest="maxlength", type="int",
default=1000000,
help="Maximum character length for page content"),

make_option('--update-revisions', action="store_true",
dest="update_revisions", default=False,
help="Force update to existing revisions"),
Expand All @@ -157,7 +157,7 @@ class Command(BaseCommand):
dest="list_full_template", default=False,
help="Print the full template call, rather than"
" just the method used"),

make_option('--verbose', action='store_true', dest='verbose',
help="Produce verbose output"),)

Expand Down Expand Up @@ -194,7 +194,7 @@ def handle_migration(self, rows):
self.docs_migrated = self.index_migrated_docs()
log.info("Found %s docs already migrated" %
len(self.docs_migrated.values()))

start_ts = ts_now = time.time()

self.rev_ct = 0
Expand All @@ -207,14 +207,14 @@ def handle_migration(self, rows):
if ct < self.options['skip']:
# Skip rows until past the option value
continue

if self.update_document(r):
# Something was actually updated and not skipped
ct += 1
else:
# This was a skip.
skip_ct += 1

# Clear query cache after each document. Lots of queries are
# bound to happen, there.
django.db.reset_queries()
Expand All @@ -235,14 +235,17 @@ def handle_migration(self, rows):
duration = ts_now - start_ts
total_ct = ct + skip_ct + error_ct
if (total_ct % 10) == 0:
log.info("Rate: %s docs/sec, %s secs/doc, %s total in %s seconds" %
((total_ct+1)/(duration+1), (duration+1)/(total_ct+1),
log.info("Rate: %s docs/sec, %s secs/doc, "
"%s total in %s seconds" %
((total_ct + 1) / (duration + 1),
(duration + 1) / (total_ct + 1),
total_ct, duration))
log.info("Rate: %s revs/sec, %s total in %s seconds" %
((self.rev_ct+1)/(duration+1),
((self.rev_ct + 1) / (duration + 1),
self.rev_ct, duration))

log.info("Migration finished: %s seconds, %s migrated, %s skipped, %s errors" %
log.info("Migration finished: %s seconds, %s migrated, "
"%s skipped, %s errors" %
((time.time() - start_ts), ct, skip_ct, error_ct))

if ct == 0:
Expand Down Expand Up @@ -274,12 +277,14 @@ def handle_template_metrics(self, rows):
if src.startswith('wiki.template'):
pat = wt_pat
m = pat.match(src)
if not m: continue
if not m:
continue
print (u"Template:%s" % m.group(1)).encode('utf-8')
else:
pat = fn_pat
m = pat.match(src)
if not m: continue
if not m:
continue
out = m.group(1)
if out.startswith('template.'):
out = out.replace('template.', 'Template:')
Expand Down Expand Up @@ -325,7 +330,7 @@ def gather_pages(self):
ORDER BY page_timestamp DESC
""" % (ns_list)
self.cur.execute("SELECT count(*) FROM pages %s" % where)
log.info("Gathering ALL %s pages from MindTouch..." %
log.info("Gathering ALL %s pages..." %
self.cur.fetchone()[0])
iters.append(self._query("SELECT * FROM pages %s" % where))

Expand All @@ -334,7 +339,7 @@ def gather_pages(self):
# if a colon is present.
ns, slug = 0, self.options['slug']
if ':' in slug:
ns_name, slug = slug.split(':',1)
ns_name, slug = slug.split(':', 1)
ns = MT_NS_NAME_TO_ID.get('%s:' % ns_name, 0)

# Migrating a single page...
Expand All @@ -353,12 +358,12 @@ def gather_pages(self):

if self.options['most_viewed'] > 0:
# Grab the most viewed pages
log.info("Gathering %s most viewed pages from MindTouch..." %
log.info("Gathering %s most viewed pages..." %
self.options['most_viewed'])
iters.append(self._query("""
SELECT p.*, pc.*
FROM pages AS p, page_viewcount AS pc
WHERE
FROM pages AS p, page_viewcount AS pc
WHERE
pc.page_id=p.page_id AND
page_namespace IN %s
ORDER BY pc.page_counter DESC
Expand All @@ -367,7 +372,7 @@ def gather_pages(self):

if self.options['recent'] > 0:
# Grab the most recently modified
log.info("Gathering %s recently modified pages from MindTouch..." %
log.info("Gathering %s recently modified pages..." %
self.options['recent'])
iters.append(self._query("""
SELECT *
Expand All @@ -379,11 +384,11 @@ def gather_pages(self):

if self.options['longest'] > 0:
# Grab the longest pages
log.info("Gathering %s longest pages from MindTouch..." %
log.info("Gathering %s longest pages..." %
self.options['longest'])
iters.append(self._query("""
SELECT *
FROM pages
SELECT *
FROM pages
WHERE page_namespace IN %s
ORDER BY length(page_text) DESC
LIMIT %s
Expand All @@ -398,7 +403,7 @@ def gather_pages(self):
# formatting - once for page namespace list, and once for SQL
# escaping in Django.
iters.append(self._query("""
SELECT * FROM pages
SELECT * FROM pages
WHERE
page_namespace IN %s AND
page_text LIKE '#REDIRECT%%%%'
Expand Down Expand Up @@ -428,7 +433,7 @@ def update_document(self, r):
# exising is doc is up to date.
page_ts = self.parse_timestamp(r['page_timestamp'])
last_mod = self.docs_migrated.get(r['page_id'], (None, None))[1]
if (not self.options['update_documents'] and last_mod is not None
if (not self.options['update_documents'] and last_mod is not None
and last_mod >= page_ts):
log.debug("\t%s (%s) up to date" %
(slug, r['page_display_name']))
Expand All @@ -437,7 +442,8 @@ def update_document(self, r):
# Check to see if this doc's content hash falls in the list of User:
# namespace content we want to exclude.
if r['page_namespace'] == MT_NS_NAME_TO_ID['User:']:
content_hash = hashlib.md5(r['page_text'].encode('utf-8')).hexdigest()
content_hash = (hashlib.md5(r['page_text'].encode('utf-8'))
.hexdigest())
if content_hash in USER_NS_EXCLUDED_CONTENT_HASHES:
log.debug("\t%s (%s) matched User: content exclusion list" %
(slug, r['page_display_name']))
Expand Down Expand Up @@ -499,7 +505,7 @@ def update_past_revisions(self, r_page, doc, tags):
# Process all the past revisions...
revs = []
for r in old_rows:

# Check if this already exists.
existing_id = None
if r['old_id'] in existing_old_ids:
Expand Down Expand Up @@ -534,14 +540,15 @@ def update_past_revisions(self, r_page, doc, tags):

# Build SQL placeholders for the revisions
row_placeholders = ",\n".join(
"(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
"(%s, %s, %s, %s, %s, %s, %s, %s, "
"%s, %s, %s, %s, %s, %s, %s, %s)"
for x in revs)

# Flatten list of revisions data in chronological order, so that we
# get roughly time-sequential IDs and a flat list to fill the
# placeholders.
revs_flat = [col
for rev in sorted(revs, key=lambda x: x[11])
for rev in sorted(revs, key=lambda x: x[11])
for col in rev]

# Build and execute a giant query to save all the revisions.
Expand All @@ -555,7 +562,7 @@ def update_past_revisions(self, r_page, doc, tags):
content, comment,
created, creator_id,
reviewed, reviewer_id)
VALUES
VALUES
%s
""" % row_placeholders
kc.execute(sql, revs_flat)
Expand All @@ -565,29 +572,30 @@ def update_past_revisions(self, r_page, doc, tags):
(ct_saved, ct_skipped, ct_error))

def update_current_revision(self, r, doc, tags):
# HACK: Using old_id of None to indicate the current MindTouch revision.
# HACK: Using old_id of None to indicate current MindTouch revision.
# All revisions of a Kuma document have revision records, whereas
# MindTouch only tracks "old" revisions.
p_id = r['page_user_id']
rev, created = Revision.objects.get_or_create(document=doc,
is_mindtouch_migration=True, mindtouch_old_id=None, defaults=dict(
creator_id=self.get_django_user_id_for_deki_id(r['page_user_id']),
creator_id=self.get_django_user_id_for_deki_id(p_id),
is_approved=True,
significance=SIGNIFICANCES[0][0],))

# Check to see if the current revision is up to date, in which case we
# can skip the update and save a little time.
page_ts = self.parse_timestamp(r['page_timestamp'])
if (not self.options['update_documents'] and not created and
if (not self.options['update_documents'] and not created and
page_ts <= rev.created):
log.info("\t\tCurrent revision already up to date. (ID=%s)" % rev.pk)
log.info("\t\tCurrent revision up to date. (ID=%s)" % rev.pk)
return

rev.created = rev.reviewed = page_ts
rev.slug = doc.slug
rev.title = doc.title
rev.tags = tags
rev.content = self.convert_page_text(r['page_text'])

# HACK: Some comments end up being too long, but just truncate.
rev.comment = r['page_comment'][:255]

Expand All @@ -608,7 +616,7 @@ def convert_page_text(self, pt):
pt = self.convert_redirect(pt)

# TODO: bug 710728 - Convert and normalize template calls
# TODO: bug 710726 - Convert intra-wiki links?
# TODO: bug 710726 - Convert intra-wiki links?

return pt

Expand All @@ -622,20 +630,20 @@ def convert_redirect(self, pt):
href = reverse('wiki.document', args=[title])
pt = REDIRECT_CONTENT % dict(href=href, title=title)
return pt

def get_tags_for_page(self, r):
"""For a given page row, get the list of tags from MindTouch and build
a string representation for Kuma revisions."""
wc = self.wikidb.cursor()
wc.execute("""
SELECT t.tag_name
FROM tag_map AS tm, tags AS t, pages AS p
FROM tag_map AS tm, tags AS t, pages AS p
WHERE
t.tag_id=tm.tagmap_tag_id AND
p.page_id=tm.tagmap_page_id AND
p.page_id=%s
""", (r['page_id'],))

# HACK: To prevent MySQL truncation warnings, constrain the imported
# tags to 100 chars. Who wants tags that long, anyway?
mt_tags = [row[0][:100] for row in wc]
Expand Down Expand Up @@ -671,7 +679,8 @@ def get_django_user_id_for_deki_id(self, deki_user_id):

# Build a DekiUser object from the database record
user = r[0]
deki_user = DekiUser(id=user['user_id'], username=user['user_name'],
deki_user = DekiUser(id=user['user_id'],
username=user['user_name'],
fullname=user['user_real_name'],
email=user['user_email'], gravatar='',)

Expand All @@ -692,6 +701,7 @@ def get_django_user_id_for_deki_id(self, deki_user_id):
return self.user_ids[deki_user_id]

SUPERUSER_ID = None

def get_superuser_id(self):
"""Get the first superuser from Django we can find."""
if not self.SUPERUSER_ID:
Expand Down
3 changes: 2 additions & 1 deletion apps/wiki/admin.py
Expand Up @@ -5,7 +5,8 @@

class DocumentAdmin(admin.ModelAdmin):
exclude = ('tags',)
list_display = ('id', 'locale', 'slug', 'title', 'category', 'is_localizable')
list_display = ('id', 'locale', 'slug', 'title', 'category',
'is_localizable')
list_display_links = ('id', 'slug',)
list_filter = ('is_template', 'is_localizable', 'category', 'locale')
raw_id_fields = ('parent',)
Expand Down

0 comments on commit cb4613d

Please sign in to comment.