Skip to content
This repository
Browse code

First half of DynamoDB trial, converting stories from mongo to dynamo…

…db. Still needs to be updated/inserted on feed update, and then processed with all MStory uses.
  • Loading branch information...
commit 0a03154473872f0b5d96e7a3de3d9bddc315c934 1 parent 95326cc
Samuel Clay authored September 04, 2012
32  apps/reader/models.py
@@ -10,7 +10,7 @@
10 10
 from django.contrib.auth.models import User
11 11
 from mongoengine.queryset import OperationError
12 12
 from apps.reader.managers import UserSubscriptionManager
13  
-from apps.rss_feeds.models import Feed, MStory, DuplicateFeed
  13
+from apps.rss_feeds.models import Feed, MStory, DStory, DuplicateFeed
14 14
 from apps.analyzer.models import MClassifierFeed, MClassifierAuthor, MClassifierTag, MClassifierTitle
15 15
 from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
16 16
 from utils.feed_functions import add_object_to_folder
@@ -137,15 +137,23 @@ def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', with
137 137
         else:
138 138
             byscorefunc = r.zrevrangebyscore
139 139
             min_score = current_time
140  
-            # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
141  
-            max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
  140
+            if read_filter == 'unread':
  141
+                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
  142
+                max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
  143
+            else:
  144
+                max_score = 0
142 145
 
143 146
         if settings.DEBUG:
144  
-            print " ---> Unread all stories: %s" % r.zrevrange(unread_ranked_stories_key, 0, -1)
  147
+            debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True)
  148
+            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
  149
+                min_score,
  150
+                max_score,
  151
+                len(debug_stories),
  152
+                debug_stories)
145 153
         story_ids = byscorefunc(unread_ranked_stories_key, min_score, 
146 154
                                   max_score, start=offset, num=limit,
147 155
                                   withscores=withscores)
148  
-
  156
+        print story_ids, ignore_user_stories, order, read_filter
149 157
         r.expire(unread_ranked_stories_key, 24*60*60)
150 158
         if not ignore_user_stories:
151 159
             r.delete(unread_stories_key)
@@ -153,7 +161,19 @@ def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', with
153 161
         # XXX TODO: Remove below line after combing redis for these None's.
154 162
         story_ids = [s for s in story_ids if s and s != 'None'] # ugh, hack
155 163
         
156  
-        return story_ids
  164
+        if withscores:
  165
+            return story_ids
  166
+        elif story_ids:
  167
+            if self.feed.backed_by_dynamodb:
  168
+                mstories = DStory.get_batch(story_ids, table=settings.DDB)
  169
+                mstories = sorted(mstories, key=lambda s: s.story_date, reverse=bool(order=='newest'))
  170
+            else:
  171
+                story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
  172
+                mstories = MStory.objects(id__in=story_ids).order_by(story_date_order)
  173
+            stories = Feed.format_stories(mstories)
  174
+            return stories
  175
+        else:
  176
+            return []
157 177
         
158 178
     @classmethod
159 179
     def feed_stories(cls, user_id, feed_ids, offset=0, limit=6, order='newest', read_filter='all'):
7  apps/reader/views.py
@@ -427,11 +427,8 @@ def load_single_feed(request, feed_id):
427 427
     except UserSubscription.DoesNotExist:
428 428
         usersub = None
429 429
     
430  
-    if usersub and (read_filter == 'unread' or order == 'oldest'):
431  
-        story_ids = usersub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit)
432  
-        story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
433  
-        mstories = MStory.objects(id__in=story_ids).order_by(story_date_order)
434  
-        stories = Feed.format_stories(mstories)
  430
+    if usersub:
  431
+        stories = usersub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit)
435 432
     else:
436 433
         stories = feed.get_stories(offset, limit)
437 434
     
88  apps/rss_feeds/migrations/0059_feed_backed_ddb.py
... ...
@@ -0,0 +1,88 @@
  1
+# -*- coding: utf-8 -*-
  2
+import datetime
  3
+from south.db import db
  4
+from south.v2 import SchemaMigration
  5
+from django.db import models
  6
+
  7
+
  8
+class Migration(SchemaMigration):
  9
+
  10
+    def forwards(self, orm):
  11
+        # Adding field 'Feed.backed_by_dynamodb'
  12
+        db.add_column('feeds', 'backed_by_dynamodb',
  13
+                      self.gf('django.db.models.fields.BooleanField')(default=False),
  14
+                      keep_default=False)
  15
+
  16
+
  17
+    def backwards(self, orm):
  18
+        # Deleting field 'Feed.backed_by_dynamodb'
  19
+        db.delete_column('feeds', 'backed_by_dynamodb')
  20
+
  21
+
  22
+    models = {
  23
+        'rss_feeds.duplicatefeed': {
  24
+            'Meta': {'object_name': 'DuplicateFeed'},
  25
+            'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}),
  26
+            'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}),
  27
+            'duplicate_link': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}),
  28
+            'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
  29
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
  30
+        },
  31
+        'rss_feeds.feed': {
  32
+            'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
  33
+            'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
  34
+            'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
  35
+            'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
  36
+            'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  37
+            'backed_by_dynamodb': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  38
+            'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
  39
+            'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
  40
+            'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
  41
+            'errors_since_good': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  42
+            'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
  43
+            'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  44
+            'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
  45
+            'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  46
+            'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255', 'db_index': 'True'}),
  47
+            'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
  48
+            'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
  49
+            'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  50
+            'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
  51
+            'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  52
+            'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
  53
+            'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
  54
+            'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
  55
+            'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
  56
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
  57
+            'is_push': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
  58
+            'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
  59
+            'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  60
+            'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
  61
+            'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
  62
+            'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  63
+            'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
  64
+            'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
  65
+            'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
  66
+            'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
  67
+            'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
  68
+        },
  69
+        'rss_feeds.feeddata': {
  70
+            'Meta': {'object_name': 'FeedData'},
  71
+            'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
  72
+            'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
  73
+            'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
  74
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
  75
+            'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
  76
+            'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
  77
+            'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
  78
+        },
  79
+        'rss_feeds.feedloadtime': {
  80
+            'Meta': {'object_name': 'FeedLoadtime'},
  81
+            'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
  82
+            'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
  83
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
  84
+            'loadtime': ('django.db.models.fields.FloatField', [], {})
  85
+        }
  86
+    }
  87
+
  88
+    complete_apps = ['rss_feeds']
85  apps/rss_feeds/migrations/0060_dstory.py
... ...
@@ -0,0 +1,85 @@
  1
+# -*- coding: utf-8 -*-
  2
+import datetime
  3
+from south.db import db
  4
+from south.v2 import DataMigration
  5
+from django.db import models
  6
+from apps.rss_feeds.models import DStory
  7
+
  8
+class Migration(DataMigration):
  9
+
  10
+    def forwards(self, orm):
  11
+        from dynamodb_mapper.model import ConnectionBorg
  12
+
  13
+        conn = ConnectionBorg()
  14
+        conn.create_table(DStory, 100, 100, wait_for_active=True)
  15
+
  16
+    def backwards(self, orm):
  17
+        "Write your backwards methods here."
  18
+
  19
+    models = {
  20
+        'rss_feeds.duplicatefeed': {
  21
+            'Meta': {'object_name': 'DuplicateFeed'},
  22
+            'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}),
  23
+            'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}),
  24
+            'duplicate_link': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}),
  25
+            'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
  26
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
  27
+        },
  28
+        'rss_feeds.feed': {
  29
+            'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
  30
+            'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
  31
+            'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
  32
+            'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
  33
+            'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  34
+            'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
  35
+            'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
  36
+            'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
  37
+            'errors_since_good': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  38
+            'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
  39
+            'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  40
+            'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
  41
+            'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  42
+            'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255', 'db_index': 'True'}),
  43
+            'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
  44
+            'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
  45
+            'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  46
+            'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
  47
+            'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
  48
+            'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
  49
+            'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
  50
+            'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
  51
+            'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
  52
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
  53
+            'is_push': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
  54
+            'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
  55
+            'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  56
+            'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
  57
+            'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
  58
+            'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
  59
+            'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
  60
+            'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
  61
+            'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
  62
+            'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
  63
+            'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
  64
+        },
  65
+        'rss_feeds.feeddata': {
  66
+            'Meta': {'object_name': 'FeedData'},
  67
+            'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
  68
+            'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
  69
+            'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
  70
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
  71
+            'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
  72
+            'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
  73
+            'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
  74
+        },
  75
+        'rss_feeds.feedloadtime': {
  76
+            'Meta': {'object_name': 'FeedLoadtime'},
  77
+            'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
  78
+            'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
  79
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
  80
+            'loadtime': ('django.db.models.fields.FloatField', [], {})
  81
+        }
  82
+    }
  83
+
  84
+    complete_apps = ['rss_feeds']
  85
+    symmetrical = True
97  apps/rss_feeds/models.py
@@ -8,8 +8,11 @@
8 8
 import zlib
9 9
 import hashlib
10 10
 import redis
  11
+import bson
  12
+import pytz
11 13
 from collections import defaultdict
12 14
 from operator import itemgetter
  15
+from vendor.dynamodb_mapper.model import DynamoDBModel, ConnectionBorg
13 16
 # from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures
14 17
 from django.db import models
15 18
 from django.db import IntegrityError
@@ -77,6 +80,7 @@ class Feed(models.Model):
77 80
     last_load_time = models.IntegerField(default=0)
78 81
     favicon_color = models.CharField(max_length=6, null=True, blank=True)
79 82
     favicon_not_found = models.BooleanField(default=False)
  83
+    backed_by_dynamodb = models.BooleanField(default=False)
80 84
 
81 85
     class Meta:
82 86
         db_table="feeds"
@@ -295,6 +299,21 @@ def update_all_statistics(self, full=True, force=False):
295 299
     def setup_feed_for_premium_subscribers(self):
296 300
         self.count_subscribers()
297 301
         self.set_next_scheduled_update()
  302
+    
  303
+    def convert_to_dynamodb(self):
  304
+        stories = MStory.objects.filter(story_feed_id=self.pk)
  305
+        batch_stories = []
  306
+        logging.debug('   ---> [%-30s] Converting %s stories to DynamoDB...' % (unicode(self)[:30],
  307
+                                                                                stories.count()))
  308
+        for story in stories:
  309
+            item = story.save_to_dynamodb(batch=True)
  310
+            batch_stories.append(item._to_db_dict())
  311
+            # story.delete()
  312
+
  313
+        DStory.batch_write(batch_stories)
  314
+
  315
+        self.backed_by_dynamodb = True
  316
+        self.save()
298 317
         
299 318
     def check_feed_link_for_feed_address(self):
300 319
         @timelimit(10)
@@ -731,12 +750,7 @@ def get_by_id(cls, feed_id, feed_address=None):
731 750
                     return duplicate_feeds[0].feed
732 751
                 
733 752
     def add_update_stories(self, stories, existing_stories, verbose=False):
734  
-        ret_values = {
735  
-            ENTRY_NEW:0,
736  
-            ENTRY_UPDATED:0,
737  
-            ENTRY_SAME:0,
738  
-            ENTRY_ERR:0
739  
-        }
  753
+        ret_values = dict(new=0, updated=0, same=0, error=0)
740 754
 
741 755
         for story in stories:
742 756
             if not story.get('title'):
@@ -759,9 +773,9 @@ def add_update_stories(self, stories, existing_stories, verbose=False):
759 773
                 )
760 774
                 try:
761 775
                     s.save()
762  
-                    ret_values[ENTRY_NEW] += 1
  776
+                    ret_values['new'] += 1
763 777
                 except (IntegrityError, OperationError):
764  
-                    ret_values[ENTRY_ERR] += 1
  778
+                    ret_values['error'] += 1
765 779
                     if verbose:
766 780
                         logging.info('   ---> [%-30s] ~SN~FRIntegrityError on new story: %s' % (self.feed_title[:30], story.get('title')[:30]))
767 781
             elif existing_story and story_has_changed:
@@ -782,7 +796,7 @@ def add_update_stories(self, stories, existing_stories, verbose=False):
782 796
                     else:
783 797
                         raise MStory.DoesNotExist
784 798
                 except (MStory.DoesNotExist, OperationError):
785  
-                    ret_values[ENTRY_ERR] += 1
  799
+                    ret_values['error'] += 1
786 800
                     if verbose:
787 801
                         logging.info('   ---> [%-30s] ~SN~FROperation on existing story: %s' % (self.feed_title[:30], story.get('title')[:30]))
788 802
                     continue
@@ -817,17 +831,17 @@ def add_update_stories(self, stories, existing_stories, verbose=False):
817 831
                 existing_story.story_tags = story_tags
818 832
                 try:
819 833
                     existing_story.save()
820  
-                    ret_values[ENTRY_UPDATED] += 1
  834
+                    ret_values['updated'] += 1
821 835
                 except (IntegrityError, OperationError):
822  
-                    ret_values[ENTRY_ERR] += 1
  836
+                    ret_values['error'] += 1
823 837
                     if verbose:
824 838
                         logging.info('   ---> [%-30s] ~SN~FRIntegrityError on updated story: %s' % (self.feed_title[:30], story.get('title')[:30]))
825 839
                 except ValidationError:
826  
-                    ret_values[ENTRY_ERR] += 1
  840
+                    ret_values['error'] += 1
827 841
                     if verbose:
828 842
                         logging.info('   ---> [%-30s] ~SN~FRValidationError on updated story: %s' % (self.feed_title[:30], story.get('title')[:30]))
829 843
             else:
830  
-                ret_values[ENTRY_SAME] += 1
  844
+                ret_values['same'] += 1
831 845
                 # logging.debug("Unchanged story: %s " % story.get('title'))
832 846
         
833 847
         return ret_values
@@ -947,10 +961,13 @@ def format_stories(cls, stories_db, feed_id=None):
947 961
     
948 962
     @classmethod
949 963
     def format_story(cls, story_db, feed_id=None, text=False):
  964
+        if isinstance(story_db.story_content_z, unicode):
  965
+            story_db.story_content_z = story_db.story_content_z.decode('base64')
  966
+            
950 967
         story_content = story_db.story_content_z and zlib.decompress(story_db.story_content_z) or ''
951 968
         story                     = {}
952 969
         story['story_tags']       = story_db.story_tags or []
953  
-        story['story_date']       = story_db.story_date
  970
+        story['story_date']       = story_db.story_date.replace(tzinfo=None)
954 971
         story['story_authors']    = story_db.story_author_name
955 972
         story['story_title']      = story_db.story_title
956 973
         story['story_content']    = story_content
@@ -1414,7 +1431,59 @@ def count_comments(self):
1414 1431
         self.share_count = shares.count()
1415 1432
         self.share_user_ids = [s['user_id'] for s in shares]
1416 1433
         self.save()
  1434
+    
  1435
+    def save_to_dynamodb(self, batch=False):
  1436
+        mongo_dict = self._data
  1437
+        ddb_dict = dict(mongo_id=unicode(self.id))
  1438
+        allowed_keys = DStory.__schema__.keys()
  1439
+
  1440
+        for story_key, story_value in mongo_dict.items():
  1441
+            if story_key not in allowed_keys:
  1442
+                continue
  1443
+            elif isinstance(story_value, bson.binary.Binary):
  1444
+                ddb_dict[story_key] = unicode(story_value.encode('base64'))
  1445
+            elif isinstance(story_value, list):
  1446
+                ddb_dict[story_key] = set(story_value)
  1447
+            elif isinstance(story_value, str):
  1448
+                ddb_dict[story_key] = unicode(story_value)
  1449
+            elif isinstance(story_value, datetime.datetime):
  1450
+                ddb_dict[story_key] = story_value.replace(tzinfo=pytz.UTC)
  1451
+            else:
  1452
+                ddb_dict[story_key] = story_value
1417 1453
         
  1454
+        dstory = DStory(**ddb_dict)
  1455
+        if batch:
  1456
+            return dstory
  1457
+        else:
  1458
+            dstory.save()
  1459
+        
  1460
+class DStory(DynamoDBModel):
  1461
+    '''Story backed by Amazon's DynamoDB'''
  1462
+    __table__ = "stories"
  1463
+    __hash_key__ = "mongo_id"
  1464
+    __schema__ = {
  1465
+        "mongo_id": unicode,
  1466
+        "story_feed_id": int,
  1467
+        "story_date": datetime.datetime,
  1468
+        "story_title": unicode,
  1469
+        "story_content_z": unicode,
  1470
+        "story_original_content_z": unicode,
  1471
+        "story_latest_content_z": unicode,
  1472
+        "story_content_type": unicode,
  1473
+        "story_author_name": unicode,
  1474
+        "story_permalink": unicode,
  1475
+        "story_guid": unicode,
  1476
+        "story_tags": set,
  1477
+        "comment_count": int,
  1478
+        "comment_user_ids": set,
  1479
+        "share_count": int,
  1480
+        "share_user_ids": set,
  1481
+    }
  1482
+    
  1483
+    @classmethod
  1484
+    def create_table(cls):
  1485
+        conn = ConnectionBorg()
  1486
+        conn.create_table(cls, 1000, 1000, wait_for_active=True)
1418 1487
 
1419 1488
 class MStarredStory(mongo.Document):
1420 1489
     """Like MStory, but not inherited due to large overhead of _cls and _type in
10  settings.py
@@ -3,7 +3,9 @@
3 3
 import os
4 4
 import datetime
5 5
 from mongoengine import connect
  6
+from vendor.dynamodb_mapper.model import ConnectionBorg
6 7
 import redis
  8
+import boto
7 9
 from utils import jammit
8 10
 
9 11
 # ===================
@@ -411,6 +413,13 @@ def allow_syncdb(self, db, model):
411 413
 AWS_ACCESS_KEY_ID = S3_ACCESS_KEY
412 414
 AWS_SECRET_ACCESS_KEY = S3_SECRET
413 415
 
  416
+os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
  417
+os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY
  418
+try:
  419
+    DDB = ConnectionBorg().get_table('stories')
  420
+except boto.exception.DynamoDBResponseError:
  421
+    DDB = None
  422
+
414 423
 def custom_show_toolbar(request):
415 424
     return DEBUG
416 425
 
@@ -446,4 +455,3 @@ def custom_show_toolbar(request):
446 455
     MIDDLEWARE_CLASSES += ('utils.redis_raw_log_middleware.SqldumpMiddleware',)
447 456
     MIDDLEWARE_CLASSES += ('utils.request_introspection_middleware.DumpRequestMiddleware',)
448 457
     MIDDLEWARE_CLASSES += ('utils.exception_middleware.ConsoleExceptionMiddleware',)
449  
-
58  utils/feed_fetcher.py
@@ -23,7 +23,6 @@
23 23
 # Refresh feed code adapted from Feedjack.
24 24
 # http://feedjack.googlecode.com
25 25
 
26  
-ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
27 26
 FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
28 27
 
29 28
 def mtime(ttime):
@@ -95,12 +94,6 @@ def __init__(self, feed_id, fpf, options):
95 94
         self.feed_id = feed_id
96 95
         self.options = options
97 96
         self.fpf = fpf
98  
-        self.entry_trans = {
99  
-            ENTRY_NEW:'new',
100  
-            ENTRY_UPDATED:'updated',
101  
-            ENTRY_SAME:'same',
102  
-            ENTRY_ERR:'error'}
103  
-        self.entry_keys = sorted(self.entry_trans.keys())
104 97
     
105 98
     def refresh_feed(self):
106 99
         self.feed = Feed.get_by_id(self.feed_id)
@@ -111,11 +104,7 @@ def process(self):
111 104
         start = time.time()
112 105
         self.refresh_feed()
113 106
         
114  
-        ret_values = {
115  
-            ENTRY_NEW:0,
116  
-            ENTRY_UPDATED:0,
117  
-            ENTRY_SAME:0,
118  
-            ENTRY_ERR:0}
  107
+        ret_values = dict(new=0, updated=0, same=0, error=0)
119 108
 
120 109
         # logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))
121 110
 
@@ -198,27 +187,16 @@ def process(self):
198 187
         if not self.feed.feed_link_locked:
199 188
             self.feed.feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
200 189
         
201  
-        guids = []
202  
-        for entry in self.fpf.entries:
203  
-            if entry.get('id', ''):
204  
-                guids.append(entry.get('id', ''))
205  
-            elif entry.get('link'):
206  
-                guids.append(entry.link)
207  
-            elif entry.get('title'):
208  
-                guids.append(entry.title)
209 190
         self.feed = self.feed.save()
210 191
 
211 192
         # Compare new stories to existing stories, adding and updating
212 193
         start_date = datetime.datetime.utcnow()
213  
-        # end_date = datetime.datetime.utcnow()
214 194
         story_guids = []
215 195
         stories = []
216 196
         for entry in self.fpf.entries:
217 197
             story = pre_process_story(entry)
218 198
             if story.get('published') < start_date:
219 199
                 start_date = story.get('published')
220  
-            # if story.get('published') > end_date:
221  
-            #     end_date = story.get('published')
222 200
             stories.append(story)
223 201
             story_guids.append(story.get('guid') or story.get('link'))
224 202
 
@@ -228,11 +206,6 @@ def process(self):
228 206
             story_feed_id=self.feed_id
229 207
         ).limit(len(story_guids)))
230 208
         
231  
-        # MStory.objects(
232  
-        #     (Q(story_date__gte=start_date) & Q(story_date__lte=end_date))
233  
-        #     | (Q(story_guid__in=story_guids)),
234  
-        #     story_feed=self.feed
235  
-        # ).order_by('-story_date')
236 209
         ret_values = self.feed.add_update_stories(stories, existing_stories,
237 210
                                                   verbose=self.options['verbose'])
238 211
 
@@ -253,12 +226,12 @@ def process(self):
253 226
         
254 227
         logging.debug(u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
255 228
                       self.feed.title[:30], 
256  
-                      '~FG~SB' if ret_values[ENTRY_NEW] else '', ret_values[ENTRY_NEW],
257  
-                      '~FY~SB' if ret_values[ENTRY_UPDATED] else '', ret_values[ENTRY_UPDATED],
258  
-                      '~SB' if ret_values[ENTRY_SAME] else '', ret_values[ENTRY_SAME],
259  
-                      '~FR~SB' if ret_values[ENTRY_ERR] else '', ret_values[ENTRY_ERR],
  229
+                      '~FG~SB' if ret_values['new'] else '', ret_values['new'],
  230
+                      '~FY~SB' if ret_values['updated'] else '', ret_values['updated'],
  231
+                      '~SB' if ret_values['same'] else '', ret_values['same'],
  232
+                      '~FR~SB' if ret_values['error'] else '', ret_values['error'],
260 233
                       len(self.fpf.entries)))
261  
-        self.feed.update_all_statistics(full=bool(ret_values[ENTRY_NEW]), force=self.options['force'])
  234
+        self.feed.update_all_statistics(full=bool(ret_values['new']), force=self.options['force'])
262 235
         self.feed.trim_feed()
263 236
         self.feed.save_feed_history(200, "OK")
264 237
         
@@ -272,11 +245,6 @@ def process(self):
272 245
 class Dispatcher:
273 246
     def __init__(self, options, num_threads):
274 247
         self.options = options
275  
-        self.entry_stats = {
276  
-            ENTRY_NEW:0,
277  
-            ENTRY_UPDATED:0,
278  
-            ENTRY_SAME:0,
279  
-            ENTRY_ERR:0}
280 248
         self.feed_stats = {
281 249
             FEED_OK:0,
282 250
             FEED_SAME:0,
@@ -307,12 +275,6 @@ def process_feed_wrapper(self, feed_queue):
307 275
             identity = current_process._identity[0]
308 276
             
309 277
         for feed_id in feed_queue:
310  
-            ret_entries = {
311  
-                ENTRY_NEW: 0,
312  
-                ENTRY_UPDATED: 0,
313  
-                ENTRY_SAME: 0,
314  
-                ENTRY_ERR: 0
315  
-            }
316 278
             start_time = time.time()
317 279
             ret_feed = FEED_ERREXC
318 280
             try:
@@ -348,7 +310,7 @@ def process_feed_wrapper(self, feed_queue):
348 310
                     ret_feed, ret_entries = pfeed.process()
349 311
                     feed = pfeed.feed
350 312
                     
351  
-                    if ret_entries.get(ENTRY_NEW) or self.options['force']:
  313
+                    if ret_entries['new'] or self.options['force']:
352 314
                         start = time.time()
353 315
                         if not feed.known_good or not feed.fetched_once:
354 316
                             feed.known_good = True
@@ -363,7 +325,7 @@ def process_feed_wrapper(self, feed_queue):
363 325
                             logging.debug(u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
364 326
                                           feed.title[:30], time.time() - start))
365 327
                     cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
366  
-                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
  328
+                    # if ret_entries['new'] or ret_entries['updated'] or self.options['force']:
367 329
                     #     feed.get_stories(force=True)
368 330
             except KeyboardInterrupt:
369 331
                 break
@@ -444,7 +406,7 @@ def process_feed_wrapper(self, feed_queue):
444 406
             except IntegrityError:
445 407
                 logging.debug("   ---> [%-30s] ~FRIntegrityError on feed: %s" % (feed.title[:30], feed.feed_address,))
446 408
             
447  
-            if ret_entries[ENTRY_NEW]:
  409
+            if ret_entries['new']:
448 410
                 self.publish_to_subscribers(feed)
449 411
                 
450 412
             done_msg = (u'%2s ---> [%-30s] ~FYProcessed in ~FM~SB%.4ss~FY~SN (~FB%s~FY) [%s]' % (
@@ -453,8 +415,6 @@ def process_feed_wrapper(self, feed_queue):
453 415
             logging.debug(done_msg)
454 416
             
455 417
             self.feed_stats[ret_feed] += 1
456  
-            for key, val in ret_entries.items():
457  
-                self.entry_stats[key] += val
458 418
                 
459 419
         if len(feed_queue) == 1:
460 420
             return feed
3  utils/feed_functions.py
@@ -41,7 +41,8 @@ def run(self):
41 41
             #     return function(*args, **kw)
42 42
         return _2
43 43
     return _1
44  
-    
  44
+
  45
+         
45 46
 def utf8encode(tstr):
46 47
     """ Encodes a unicode string in utf-8
47 48
     """
2  utils/json_functions.py
@@ -43,6 +43,8 @@ def _any(data):
43 43
             ret = data.canonical()
44 44
         elif isinstance(data, list):
45 45
             ret = _list(data)
  46
+        elif isinstance(data, set):
  47
+            ret = _list(list(data))
46 48
         # Same as for lists above.
47 49
         elif isinstance(data, dict):
48 50
             ret = _dict(data)
0  vendor/dynamodb_mapper/__init__.py
No changes.
946  vendor/dynamodb_mapper/model.py
... ...
@@ -0,0 +1,946 @@
  1
+"""Object mapper for Amazon DynamoDB.
  2
+
  3
+Based in part on mongokit's Document interface.
  4
+
  5
+Released under the GNU LGPL, version 3 or later (see COPYING).
  6
+"""
  7
+from __future__ import absolute_import
  8
+
  9
+import json
  10
+import logging
  11
+import threading
  12
+from datetime import datetime, timedelta, tzinfo
  13
+
  14
+import boto
  15
+from boto.dynamodb.item import Item
  16
+from boto.exception import DynamoDBResponseError
  17
+from boto.dynamodb.exceptions import DynamoDBConditionalCheckFailedError
  18
+
  19
+
  20
+log = logging.getLogger(__name__)
  21
+dblog = logging.getLogger(__name__+".database-access")
  22
+
  23
+
  24
+MAX_RETRIES = 100
  25
+# primary key of the magic item used for autoinc
  26
+MAGIC_KEY = -1
  27
+
  28
+class SchemaError(Exception):
  29
+    """SchemaError exception is raised when a schema consistency check fails.
  30
+    Most of the checks are performed in :py:meth:`~.ConnectionBorg.create_table`.
  31
+
  32
+    Common consistency failure includes lacks of ``__table__``, ``__hash_key__``,
  33
+    ``__schema__`` definition or when an :py:class:`~.autoincrement_int` ``hash_key``
  34
+    is used with a ``range_key``.
  35
+    """
  36
+
  37
+
  38
+class MaxRetriesExceededError(Exception):
  39
+    """Raised when a failed operation couldn't be completed after retrying
  40
+    ``MAX_RETRIES`` times (e.g. saving an autoincrementing hash_key).
  41
+    """
  42
+
  43
+
  44
+class OverwriteError(Exception):
  45
+    """Raised when saving a DynamoDBModel instance would overwrite something
  46
+    in the database and we've forbidden that because we believe we're creating
  47
+    a new one (see :meth:`DynamoDBModel.save`).
  48
+    """
  49
+
  50
+
  51
+class ConflictError(Exception):
  52
+    """Atomic edition failure.
  53
+    Raised when an Item has been changed between the read and the write operation
  54
+    and this has been forbid by the ``raise_on_conflict`` argument of
  55
+    :meth:`DynamoDBModel.save` (i.e. when somebody changed the DB's version of
  56
+    your object behind your back).
  57
+    """
  58
+
  59
+
  60
+class InvalidRegionError(Exception):
  61
+    """Raised when ``set_region()`` is called with an invalid region name.
  62
+    """
  63
+
  64
+
  65
+class autoincrement_int(int):
  66
+    """Dummy int subclass for use in your schemas.
  67
+
  68
+    If you're using this class as the type for your key in a hash_key-only
  69
+    table, new objects in your table will have an auto-incrementing primary
  70
+    key.
  71
+
  72
+    Note that you can still insert items with explicit values for your primary
  73
+    key -- the autoincrementing scheme is only used for objects with unset
  74
+    hash_keys (or to be more precise, left set to the default value of 0).
  75
+
  76
+    Auto-incrementing int keys are implemented by storing a special "magic"
  77
+    item in the table with the following properties:
  78
+
  79
+        - ``hash_key_value = -1``
  80
+        - ``__max_hash_key__ = N``
  81
+
  82
+    where N is the maximum used hash_key value.
  83
+
  84
+    Inserting a new item issues an atomic add on the '__max_hash_key__' value.
  85
+    Its new value is returned and used as the primary key for the new elem.
  86
+
  87
+    Note that hash_key_value is set to '-1' while ``__max_hash_key__`` initial
  88
+    value is 0. This will element at key '0' unused. It's actually a garbage item
  89
+    for cases where a value is manually added to an unitialized index.
  90
+    """
  91
+
  92
+_JSON_TYPES = frozenset([list, dict])
  93
+
  94
+
  95
+class UTC(tzinfo):
  96
+    """UTC timezone"""
  97
+    def utcoffset(self, dt):
  98
+        return timedelta(0)
  99
+
  100
+    def tzname(self, dt):
  101
+        return "UTC"
  102
+
  103
+    def dst(self, dt):
  104
+        return timedelta(0)
  105
+
  106
+
  107
+utc_tz = UTC()
  108
+        
  109
+def _get_proto_value(schema_type):
  110
+    """Return a prototype value matching what schema_type will be serialized
  111
+    as in DynamoDB:
  112
+
  113
+      - For strings and numbers, an instance of schema_type.
  114
+      - For "special" types implemented at the mapper level (list, dict,
  115
+        datetime), an empty string (this is what they're stored as in the DB).
  116
+    """
  117
+    # Those types must be serialized as strings
  118
+    if schema_type in _JSON_TYPES:
  119
+        return u""
  120
+
  121
+    if schema_type is datetime:
  122
+        return u""
  123
+
  124
+    # Regular string/number
  125
+    return schema_type()
  126
+
  127
+
  128
+def _get_default_value(schema_type, default=None):
  129
+    """Return a default value matching schema_type or default if provided:
  130
+
  131
+      - For datetime.datetime, it's NOW.
  132
+      - For container types, it's an empty container.
  133
+      - For strings, it's an empty string.
  134
+      - For numbers, it's zero.
  135
+
  136
+    This function may raise TypeError exception if:
  137
+
  138
+       - default was callable and required arguments
  139
+       - default or its return value is not an instance of schema_type
  140
+
  141
+    :param schema_type class object to instanciate
  142
+    :param default default value. May be a value or a callable (functions, class, ...) It must *NOT* require an any argument and it's type must match schema_type
  143
+
  144
+    """
  145
+    if default is not None:
  146
+        # If default is callable(function, constructor, ...), try to dereference it
  147
+        if hasattr(default, '__call__'):
  148
+            # Might raise a "TypeError" if arguments were needed
  149
+            default = default()
  150
+        # Check default value consitency
  151
+        if not isinstance(default, schema_type):
  152
+            raise TypeError("Expected default value of type {}, got: {}".format(schema_type, type(default)))
  153
+        else:
  154
+            return default
  155
+
  156
+    if schema_type is datetime:
  157
+        # Current Timestamp
  158
+        return datetime.now(tz=utc_tz)
  159
+
  160
+    return schema_type()
  161
+
  162
+
  163
+def _python_to_dynamodb(value):
  164
+    """Convert a Python object to a representation suitable to direct storage
  165
+    in DynamoDB, according to a type from a DynamoDBModel schema.
  166
+
  167
+    If value should be represented as a missing value in DynamoDB
  168
+    (empty string or set), None is returned.
  169
+
  170
+    ``_dynamodb_to_python(t, _python_to_dynamodb(v)) == v`` for any v.
  171
+
  172
+    :param value: The Python object to convert.
  173
+
  174
+    :return: ``value``, serialized to DynamoDB, or ``None`` if ``value`` must
  175
+        be represented as a missing attribute.
  176
+    """
  177
+    if isinstance(value, tuple(_JSON_TYPES)):
  178
+        # json serialization hooks for json_* data types.
  179
+        return json.dumps(value, sort_keys=True)
  180
+
  181
+    if isinstance(value, datetime):
  182
+        # datetime instances are stored as UTC in the DB itself.
  183
+        # (that way, they become sortable)
  184
+        # datetime objects without tzinfo are not supported.
  185
+        s = value.astimezone(utc_tz).strftime("%Y-%m-%dT%H:%M:%S.%f%z")
  186
+        # there is not strftime code to output the timezone with the ':' that
  187
+        # is mandated by the W3CDTF format, so here's an ugly hack
  188
+        s = s[:-2] + ':' + s[-2:]
  189
+        return s
  190
+
  191
+    # This case prevents `'fields': False` to be added when genereating expected
  192
+    # values dict in save as this would mean 'field does not exist' instead of
  193
+    # 'field exists and is False'.
  194
+    if isinstance(value, bool):
  195
+        return int(value)
  196
+
  197
+    if value or value == 0:
  198
+        return value
  199
+
  200
+    # Yes, that part is horrible. DynamoDB can't store empty
  201
+    # sets/strings, so we're representing them as missing
  202
+    # attributes on the DB side.
  203
+    return None
  204
+
  205
+
  206
+def _dynamodb_to_python(schema_type, value):
  207
+    """Convert a DynamoDB attribute value to a Python object, according to a
  208
+    type from a DynamoDBModel schema.
  209
+
  210
+    If value is None (usually because the attribute was missing in the first
  211
+    place), a default value is returned (empty string or set, or 0, depending
  212
+    on what the type is).
  213
+
  214
+    ``_dynamodb_to_python(t, _python_to_dynamodb(v)) == v`` for any v.
  215
+
  216
+    :param schema_type: A type supported by the mapper
  217
+
  218
+    .. (TODO Clearly list those).
  219
+
  220
+    :param value: The DynamoDB attribute to convert to a Python object.
  221
+        May be ``None``.
  222
+
  223
+    :return: An instance of ``schema_type``.
  224
+    """
  225
+    # This means a missing attribute for a _JSON_TYPES object is valid,
  226
+    # and results in an empty sequence. Is that a bad thing?
  227
+    if value is None:
  228
+        return None
  229
+
  230
+    if schema_type in _JSON_TYPES:
  231
+        return schema_type(json.loads(value))
  232
+
  233
+    if schema_type is datetime:
  234
+        # Parse TZ-aware isoformat
  235
+
  236
+        # strptime doesn't support timezone parsing (%z flag), so we're forcing
  237
+        # the strings in the database to be UTC (+00:00) for now.
  238
+        # TODO Handle arbitrary timezones (with manual parsing).
  239
+        if value.endswith('Z'):
  240
+            value = value[:-2] + '+00:00'
  241
+        return datetime.strptime(
  242
+            value, "%Y-%m-%dT%H:%M:%S.%f+00:00").replace(tzinfo=utc_tz)
  243
+
  244
+    return schema_type(value)
  245
+
  246
+
  247
+class ConnectionBorg(object):
  248
+    """Borg that handles access to DynamoDB.
  249
+
  250
+    You should never make any explicit/direct ``boto.dynamodb`` calls by yourself
  251
+    except for table maintenance operations :
  252
+
  253
+        - ``boto.dynamodb.table.update_throughput()``
  254
+        - ``boto.dynamodb.table.delete()``
  255
+
  256
+    Remember to call :meth:`set_credentials`, or to set the
  257
+    ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environment variables
  258
+    before making any calls.
  259
+    """
  260
+    _shared_state = {
  261
+        "_aws_access_key_id": None,
  262
+        "_aws_secret_access_key": None,
  263
+        "_region": None,
  264
+        # {thread_id: connection} mapping
  265
+        "_connections": {},
  266
+    }
  267
+
  268
+    def __init__(self):
  269
+        self.__dict__ = self._shared_state
  270
+
  271
+    def _get_connection(self):
  272
+        """Return the DynamoDB connection for the current thread, establishing
  273
+        it if required.
  274
+        """
  275
+        current_thread = threading.current_thread()
  276
+        thread_id = current_thread.ident
  277
+        try:
  278
+            return self._connections[thread_id]
  279
+        except KeyError:
  280
+            log.debug("Creating DynamoDB connection for thread %s.", current_thread)
  281
+            self._connections[thread_id] = boto.connect_dynamodb(
  282
+                aws_access_key_id=self._aws_access_key_id,
  283
+                aws_secret_access_key=self._aws_secret_access_key,
  284
+                region=self._region,
  285
+            )
  286
+            return self._connections[thread_id]
  287
+
  288
+    def _create_autoincrement_magic_item(self, table):
  289
+        item = table.new_item(hash_key=MAGIC_KEY, attrs={
  290
+            "__max_hash_key__": 0
  291
+        })
  292
+        # Conditional write: don't risk overwriting the DB.
  293
+        item.put({item.hash_key_name: False})
  294
+
  295
+    def set_credentials(self, aws_access_key_id, aws_secret_access_key):
  296
+        """Set the DynamoDB credentials. If boto is already configured on this
  297
+        machine, this step is optional.
  298
+        Access keys can be found in `Amazon's console.
  299
+        <https://aws-portal.amazon.com/gp/aws/developer/account/index.html?action=access-key>`_
  300
+
  301
+        :param aws_access_key_id: AWS api access key ID
  302
+
  303
+        :param aws_secret_access_key: AWS api access key
  304
+
  305
+        """
  306
+        self._aws_access_key_id = aws_access_key_id
  307
+        self._aws_secret_access_key = aws_secret_access_key
  308
+
  309
+    def set_region(self, region_name):
  310
+        """Set the DynamoDB region. If this is not set AWS defaults to 'us-east-1'.
  311
+
  312
+        :param region_name: The name of the region to use
  313
+        """
  314
+        for region in boto.dynamodb.regions():
  315
+            if region.name == region_name:
  316
+                self._region = region
  317
+                return
  318
+
  319
+        raise InvalidRegionError("Region name %s is invalid" % region_name)
  320
+
  321
+    def create_table(self, cls, read_units, write_units, wait_for_active=False):
  322
+        """Create a table that'll be used to store instances of cls.
  323
+
  324
+        See `Amazon's developer guide <http://docs.amazonwebservices.com/amazondynamodb/latest/developerguide/ProvisionedThroughputIntro.html>`_
  325
+        for more information about provisioned throughput.
  326
+
  327
+        :param cls: The class whose instances will be stored in the table.
  328
+
  329
+        :param read_units: The number of read units to provision for this table
  330
+            (minimum 5)
  331
+
  332
+        :param write_units: The number of write units to provision for this
  333
+            table (minimum 5).
  334
+
  335
+        :param wait_for_active: If True, create_table will wait for the table
  336
+            to become ACTIVE before returning (otherwise, it'll be CREATING).
  337
+            Note that this can take up to a minute.
  338
+            Defaults to False.
  339
+        """
  340
+        table_name = cls.__table__
  341
+        hash_key_name = cls.__hash_key__
  342
+        range_key_name = cls.__range_key__
  343
+
  344
+        if not table_name:
  345
+            raise SchemaError("Class does not define __table__", cls)
  346
+
  347
+        # FIXME: check key is defined in schema
  348
+        if not hash_key_name:
  349
+            raise SchemaError("Class does not define __hash_key__", cls)
  350
+
  351
+        if not cls.__schema__:
  352
+            raise SchemaError("Class does not define __schema__", cls)
  353
+
  354
+        hash_key_type = cls.__schema__[hash_key_name]
  355
+
  356
+        if hash_key_type is autoincrement_int:
  357
+            if range_key_name:
  358
+                raise SchemaError(
  359
+                    "Class defines both a range key and an autoincrement_int hash key",
  360
+                    cls)
  361
+            if not wait_for_active:
  362
+                # Maybe we should raise ValueError instead?
  363
+                log.info(
  364
+                    "Class %s has autoincrement_int hash key -- forcing wait_for_active",
  365
+                    cls)
  366
+                wait_for_active = True
  367
+
  368
+        conn = self._get_connection()
  369
+        # It's a prototype/an instance, not a type.
  370
+        hash_key_proto_value = _get_proto_value(hash_key_type)
  371
+        # None in the case of a hash-only table.
  372
+        if range_key_name:
  373
+            # We have a range key, its type must be specified.
  374
+            range_key_proto_value = _get_proto_value(
  375
+                cls.__schema__[range_key_name])
  376
+        else:
  377
+            range_key_proto_value = None
  378
+
  379
+        schema = conn.create_schema(
  380
+            hash_key_name=hash_key_name,
  381
+            hash_key_proto_value=hash_key_proto_value,
  382
+            range_key_name=range_key_name,
  383
+            range_key_proto_value=range_key_proto_value
  384
+        )
  385
+        table = conn.create_table(cls.__table__, schema, read_units, write_units)
  386
+        table.refresh(wait_for_active=wait_for_active)
  387
+
  388
+        if hash_key_type is autoincrement_int:
  389
+            self._create_autoincrement_magic_item(table)
  390
+
  391
+        dblog.debug("Created table %s(%s, %s)", cls.__table__, hash_key_name, range_key_name)
  392
+
  393
+        return table
  394
+
  395
+    def get_table(self, name):
  396
+        """Return the table with the requested name."""
  397
+        return self._get_connection().get_table(name)
  398
+
  399
+    def new_batch_list(self):
  400
+        """Create a new batch list."""
  401
+        return self._get_connection().new_batch_list()
  402
+
  403
+    def new_batch_write_list(self):
  404
+        """Create a new batch list."""
  405
+        return self._get_connection().new_batch_write_list()
  406
+
  407
+
  408
+class DynamoDBModel(object):
  409
+    """Abstract base class for all models that use DynamoDB as their storage
  410
+    backend.
  411
+
  412
+    Each subclass must define the following attributes:
  413
+
  414
+      - ``__table__``: the name of the table used for storage.
  415
+      - ``__hash_key__``: the name of the primary hash key.
  416
+      - ``__range_key__``: (optional) if you're using a composite primary key,
  417
+          the name of the range key.
  418
+      - ``__schema__``: ``{attribute_name: attribute_type}`` mapping.
  419
+          Supported attribute_types are: int, long, float, str, unicode, set.
  420
+          Default values are obtained by calling the type with no args
  421
+          (so 0 for numbers, "" for strings and empty sets).
  422
+      - ``__defaults__``: (optional) ``{attribute_name: defaulter}`` mapping.
  423
+          This dict allows to provide a default value for each attribute_name at
  424
+          object creation time. It will *never* be used when loading from the DB.
  425
+          It is fully optional. If no value is supplied the empty value
  426
+          corresponding to the type will be used.
  427
+          "defaulter" may either be a scalar value or a callable with no
  428
+          arguments.
  429
+
  430
+    To redefine serialization/deserialization semantics (e.g. to have more
  431
+    complex schemas, like auto-serialized JSON data structures), override the
  432
+    _from_dict (deserialization) and _to_db_dict (serialization) methods.
  433
+
  434
+    *Important implementation note regarding sets:* DynamoDB can't store empty
  435
+    sets/strings. Therefore, since we have schema information available to us,
  436
+    we're storing empty sets/strings as missing attributes in DynamoDB, and
  437
+    converting back and forth based on the schema.
  438
+
  439
+    So if your schema looks like the following::
  440
+
  441
+        {
  442
+            "id": unicode,
  443
+            "name": str,
  444
+            "cheats": set
  445
+        }
  446
+
  447
+    then::
  448
+
  449
+        {
  450
+            "id": "e1m1",
  451
+            "name": "Hangar",
  452
+            "cheats": set([
  453
+                "idkfa",
  454
+                "iddqd"
  455
+            ])
  456
+        }
  457
+
  458
+    will be stored exactly as is, but::
  459
+
  460
+        {
  461
+            "id": "e1m2",
  462
+            "name": "",
  463
+            "cheats": set()
  464
+        }
  465
+
  466
+    will be stored as simply::
  467
+
  468
+        {
  469
+            "id": "e1m2"
  470
+        }
  471
+
  472
+
  473
+    .. TODO Add checks for common error cases:
  474
+        - Wrong datatypes in the schema
  475
+        - hash_key/range_key incorrectly defined
  476
+    """
  477
+
  478
+    # TODO Add checks to the various methods so that meaningful error messages
  479
+    # are raised when they're incorrectly overridden.
  480