Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 198 lines (172 sloc) 7.349 kB
ac53f33 @samuelclay Further Mongo work. Stories are now saved correctly, with tags + auth…
authored
1 from pprint import pprint
2 from django.conf import settings
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
3 from apps.reader.models import MUserStory, UserStory
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
4 from apps.rss_feeds.models import Feed, Story, MStory, StoryAuthor, Tag, MFeedPage, FeedPage
f90029e @samuelclay Whew. Finished entire mongo conversion. Classifiers, Stories, and Use…
authored
5 from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
6 from apps.analyzer.models import ClassifierTitle, ClassifierAuthor, ClassifierFeed, ClassifierTag
04f5113 @samuelclay Creating ObjectID for mongo stories with the story_guid as the ID.
authored
7 import mongoengine, pymongo
ac53f33 @samuelclay Further Mongo work. Stories are now saved correctly, with tags + auth…
authored
8 import sys
7c994d3 @samuelclay Sanity check on story.id->story_story_guid convert.
authored
9 from mongoengine.queryset import OperationError
ac53f33 @samuelclay Further Mongo work. Stories are now saved correctly, with tags + auth…
authored
10 from utils import json
11
12 MONGO_DB = settings.MONGO_DB
13 db = mongoengine.connect(MONGO_DB['NAME'], host=MONGO_DB['HOST'], port=MONGO_DB['PORT'])
14
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
15 def bootstrap_stories():
16 print "Mongo DB stories: %s" % MStory.objects().count()
3cd4ab9 @samuelclay Fixing issues with bootstrapping read stories.
authored
17 # db.stories.drop()
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
18 print "Dropped! Mongo DB stories: %s" % MStory.objects().count()
ac53f33 @samuelclay Further Mongo work. Stories are now saved correctly, with tags + auth…
authored
19
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
20 print "Stories: %s" % Story.objects.all().count()
21 pprint(db.stories.index_information())
ac53f33 @samuelclay Further Mongo work. Stories are now saved correctly, with tags + auth…
authored
22
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
23 feeds = Feed.objects.all().order_by('-average_stories_per_month')
e0282ef @samuelclay Rescuing story bootstrapping. Halfway through it crashes, so restarte…
authored
24 feed_count = feeds.count()
25 i = 0
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
26 for feed in feeds:
e0282ef @samuelclay Rescuing story bootstrapping. Halfway through it crashes, so restarte…
authored
27 i += 1
28 print "%s/%s: %s (%s stories)" % (i, feed_count,
29 feed, Story.objects.filter(story_feed=feed).count())
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
30 sys.stdout.flush()
ac53f33 @samuelclay Further Mongo work. Stories are now saved correctly, with tags + auth…
authored
31
e0282ef @samuelclay Rescuing story bootstrapping. Halfway through it crashes, so restarte…
authored
32 stories = Story.objects.filter(story_feed=feed).values()
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
33 for story in stories:
34 # story['story_tags'] = [tag.name for tag in Tag.objects.filter(story=story['id'])]
496c590 @samuelclay Rescuing story bootstrapping. Halfway through it crashes, so restarte…
authored
35 try:
36 story['story_tags'] = json.decode(story['story_tags'])
37 except:
38 continue
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
39 del story['id']
40 del story['story_author_id']
a7d5ff3 @samuelclay Rescuing story bootstrapping.
authored
41 try:
42 MStory(**story).save()
43 except:
44 continue
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
45
f90029e @samuelclay Whew. Finished entire mongo conversion. Classifiers, Stories, and Use…
authored
46 print "\nMongo DB stories: %s" % MStory.objects().count()
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
47
48 def bootstrap_userstories():
49 print "Mongo DB userstories: %s" % MUserStory.objects().count()
547d7ef @samuelclay Deferring the loading of the iframe + feed to decrease latency before…
authored
50 # db.userstories.drop()
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
51 print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()
52
53 print "UserStories: %s" % UserStory.objects.all().count()
54 pprint(db.userstories.index_information())
55
56 userstories = UserStory.objects.all().values()
57 for userstory in userstories:
58 try:
59 story = Story.objects.get(pk=userstory['story_id'])
60 except Story.DoesNotExist:
61 continue
3cd4ab9 @samuelclay Fixing issues with bootstrapping read stories.
authored
62 try:
63 userstory['story'] = MStory.objects(story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0]
64 except:
65 print '!',
66 continue
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
67 print '.',
68 del userstory['id']
69 del userstory['opinion']
70 del userstory['story_id']
def2f5a @samuelclay Rescuing story bootstrapping.
authored
71 try:
72 MUserStory(**userstory).save()
73 except:
74 print '\n\n!\n\n'
75 continue
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
76
77 print "\nMongo DB userstories: %s" % MUserStory.objects().count()
78
f90029e @samuelclay Whew. Finished entire mongo conversion. Classifiers, Stories, and Use…
authored
79 def bootstrap_classifiers():
80 for sql_classifier, mongo_classifier in ((ClassifierTitle, MClassifierTitle),
81 (ClassifierAuthor, MClassifierAuthor),
82 (ClassifierFeed, MClassifierFeed),
83 (ClassifierTag, MClassifierTag)):
84 collection = mongo_classifier.meta['collection']
85 print "Mongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
547d7ef @samuelclay Deferring the loading of the iframe + feed to decrease latency before…
authored
86 # db[collection].drop()
f90029e @samuelclay Whew. Finished entire mongo conversion. Classifiers, Stories, and Use…
authored
87 print "Dropped! Mongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
88
89 print "%s: %s" % (sql_classifier._meta.object_name, sql_classifier.objects.all().count())
90 pprint(db[collection].index_information())
91
92 for userclassifier in sql_classifier.objects.all().values():
93 del userclassifier['id']
94 if sql_classifier._meta.object_name == 'ClassifierAuthor':
95 author = StoryAuthor.objects.get(pk=userclassifier['author_id'])
96 userclassifier['author'] = author.author_name
97 del userclassifier['author_id']
98 if sql_classifier._meta.object_name == 'ClassifierTag':
99 tag = Tag.objects.get(pk=userclassifier['tag_id'])
100 userclassifier['tag'] = tag.name
101 del userclassifier['tag_id']
102 print '.',
def2f5a @samuelclay Rescuing story bootstrapping.
authored
103 try:
104 mongo_classifier(**userclassifier).save()
105 except:
106 print '\n\n!\n\n'
107 continue
f90029e @samuelclay Whew. Finished entire mongo conversion. Classifiers, Stories, and Use…
authored
108
109 print "\nMongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
110
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
111 def bootstrap_feedpages():
112 print "Mongo DB feed_pages: %s" % MFeedPage.objects().count()
c9d06f9 @samuelclay Compressing stories. Also fixing compression of feed_pages bootstrap.
authored
113 # db.feed_pages.drop()
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
114 print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count()
115
48b1477 @samuelclay Adding a timelimit to feed fetching. 20 seconds, that's all you got.
authored
116 print "FeedPages: %s" % FeedPage.objects.count()
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
117 pprint(db.feed_pages.index_information())
118
4147881 @samuelclay Compressing stories by feed.
authored
119 feeds = Feed.objects.all().order_by('-average_stories_per_month')
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
120 feed_count = feeds.count()
121 i = 0
122 for feed in feeds:
123 i += 1
124 print "%s/%s: %s" % (i, feed_count, feed,)
125 sys.stdout.flush()
4147881 @samuelclay Compressing stories by feed.
authored
126
127 if not MFeedPage.objects(feed_id=feed.pk):
128 feed_page = FeedPage.objects.filter(feed=feed).values()
129 if feed_page:
130 del feed_page[0]['id']
131 feed_page[0]['feed_id'] = feed.pk
132 try:
133 MFeedPage(**feed_page[0]).save()
134 except:
135 print '\n\n!\n\n'
136 continue
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
137
138
139 print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
140
c9d06f9 @samuelclay Compressing stories. Also fixing compression of feed_pages bootstrap.
authored
141 def compress_stories():
142 count = MStory.objects().count()
143 print "Mongo DB stories: %s" % count
144 p = 0.0
145 i = 0
4147881 @samuelclay Compressing stories by feed.
authored
146
147 feeds = Feed.objects.all().order_by('-average_stories_per_month')
148 feed_count = feeds.count()
149 f = 0
150 for feed in feeds:
151 f += 1
152 print "%s/%s: %s" % (f, feed_count, feed,)
153 sys.stdout.flush()
154
155 for story in MStory.objects(story_feed_id=feed.pk):
156 i += 1.0
157 if round(i / count * 100) != p:
158 p = round(i / count * 100)
159 print '%s%%' % p
160 story.save()
c9d06f9 @samuelclay Compressing stories. Also fixing compression of feed_pages bootstrap.
authored
161
15a27e8 @samuelclay Bootstrapping mongo to use story_guids as unqiue to feeds.
authored
162 def reindex_stories():
3835eb9 @samuelclay Deleting stories with invalid id's.
authored
163 db = pymongo.Connection().newsblur
15a27e8 @samuelclay Bootstrapping mongo to use story_guids as unqiue to feeds.
authored
164 count = MStory.objects().count()
165 print "Mongo DB stories: %s" % count
166 p = 0.0
167 i = 0
168
57bc5ad @samuelclay Final bootstrap migration.
authored
169 feeds = Feed.objects.all().order_by('-average_stories_per_month')[100000]
15a27e8 @samuelclay Bootstrapping mongo to use story_guids as unqiue to feeds.
authored
170 feed_count = feeds.count()
171 f = 0
172 for feed in feeds:
173 f += 1
174 print "%s/%s: %s" % (f, feed_count, feed,)
175 sys.stdout.flush()
176 for story in MStory.objects(story_feed_id=feed.pk):
177 i += 1.0
178 if round(i / count * 100) != p:
179 p = round(i / count * 100)
180 print '%s%%' % p
974d5b4 @samuelclay Deleting stories with invalid id's.
authored
181 if isinstance(story.id, unicode):
b26dfa8 @samuelclay Handling feed fetch unique errors.
authored
182 story.story_guid = story.id
183 story.id = pymongo.objectid.ObjectId()
7c994d3 @samuelclay Sanity check on story.id->story_story_guid convert.
authored
184 try:
185 story.save()
186 except OperationError, e:
187 print " ***> OperationError: %s" % e
57bc5ad @samuelclay Final bootstrap migration.
authored
188 except e:
189 print ' ***> Unknown Error: %s' % e
b26dfa8 @samuelclay Handling feed fetch unique errors.
authored
190 db.stories.remove({"_id": story.story_guid})
c9d06f9 @samuelclay Compressing stories. Also fixing compression of feed_pages bootstrap.
authored
191
79d1ffd @samuelclay Holy hell, the Mongo transition is nearly complete. All that's left i…
authored
192 if __name__ == '__main__':
3cd4ab9 @samuelclay Fixing issues with bootstrapping read stories.
authored
193 # bootstrap_stories()
e26ee9d @samuelclay Moving feed_pages from PostgreSQL to Mongo. Adding compression.
authored
194 # bootstrap_userstories()
195 # bootstrap_classifiers()
15a27e8 @samuelclay Bootstrapping mongo to use story_guids as unqiue to feeds.
authored
196 # bootstrap_feedpages()
197 # compress_stories()
198 reindex_stories()
Something went wrong with that request. Please try again.