Skip to content

Commit

Permalink
counttemplates-mp.py: fixing (wasn't working because of sonet.mediawi…
Browse files Browse the repository at this point in the history
…ki changes)
  • Loading branch information
vad committed Oct 4, 2010
1 parent 9a871c2 commit 0b85d2e
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 114 deletions.
20 changes: 12 additions & 8 deletions counttemplates-mp.py
Expand Up @@ -71,7 +71,8 @@ def xml_to_queue(src, queue, lu, lut):


### MAIN PROCESS
def process_page(elem, q):
def process_page(elem, queue=None):
q = queue
user = None
global count, templates

Expand Down Expand Up @@ -105,7 +106,9 @@ def process_page(elem, q):


def main():
from functools import partial
import optparse
from operator import itemgetter

p = optparse.OptionParser(usage="usage: %prog [options] file")
opts, files = p.parse_args()
Expand All @@ -120,26 +123,27 @@ def main():

tag = mwlib.getTags(src)

lang_user, lang_user_talk = mwlib.getTranslations(src)
translations = mwlib.getTranslations(src)
lang_user, lang_user_talk = translations['User'], translations['User talk']

assert lang_user, "User namespace not found"
assert lang_user_talk, "User Talk namespace not found"

## XML Reader Process
rp = Process(target=xml_to_queue, args=(src, queue, lang_user, lang_user_talk))
rp.start()

p = Process(target=get_freq_dist, args=(queue, done_queue))
p.start()

rp.join()
## XML Reader Process
partial_process_page = partial(process_page, queue=queue)
mwlib.fast_iter(etree.iterparse(src, tag=tag['page']),
partial_process_page)

print >>sys.stderr, "end of XML processing"

queue.put(None) ## this STOPS the process
templates = done_queue.get()
p.join()

for k, v in sorted(templates.items(),cmp=lambda x,y: cmp(x[1], y[1]),reverse=True):
for k, v in sorted(templates.items(), key=itemgetter(1), reverse=True):
print v, k.encode('utf-8')


Expand Down
106 changes: 0 additions & 106 deletions counttemplates.py

This file was deleted.

1 change: 1 addition & 0 deletions countwords_groups.py
Expand Up @@ -173,6 +173,7 @@ def process_page(elem, send):

for child in elem:
if child.tag == tag['title'] and child.text:
##TODO: fix this for archive (keep) and sandbox (discard)
a_title = child.text.split('/')[0].split(':')

try:
Expand Down

0 comments on commit 0b85d2e

Please sign in to comment.