Skip to content

Commit

Permalink
add example for collecting all metas
Browse files Browse the repository at this point in the history
  • Loading branch information
leVirve committed Jul 28, 2016
1 parent 4a8a249 commit 80c75dc
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions examples/collect_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import time
from multiprocessing import Pool

from pymongo import MongoClient

from dcard import Dcard, logger


client = MongoClient()
db = client['dcard-metas']
collct = None


def store_metas(metas, forum):

bulk = db['forum'].initialize_ordered_bulk_op()
[bulk.find({'id': meta['id']}).upsert().update({"$set": meta}) for meta in metas]
result = bulk.execute()

result['upserted'] = len(result['upserted'])
logger.info('[database] #Forum {}: {}'.format(forum, result))


def collect_metas(name):
bound = 999 # let it be infinity later!
s = time.time()
Dcard.forums(name).get_metas(
num=bound,
callback=lambda metas, forum=name: store_metas(metas, forum)
)
logger.info('Spent {:.05} sec for [{}]'.format(time.time() - s, name))


def main():
dcard = Dcard()
forums = dcard.forums.get(no_school=True)

thread_pool = Pool(processes=8)
result = thread_pool.map_async(collect_metas, [forum['alias'] for forum in forums])
result.get()


if __name__ == '__main__':
s = time.time()
main()
logger.info('Total Work: {:.05} sec'.format(time.time() - s))

0 comments on commit 80c75dc

Please sign in to comment.