From 02cc2543ae34f2efc91b8fd6f398c53697eb5049 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <vomjom@vomjom.net>
Date: Sat, 3 Apr 2010 21:15:03 -0500
Subject: [PATCH] SuperColumn support in ColumnFamilyMap

---
 README.mkd                    |  16 +++++
 pycassa/columnfamilymap.py    | 121 ++++++++++++++++++++--------------
 tests/test_columnfamilymap.py |  53 +++++++++++++++
 3 files changed, 139 insertions(+), 51 deletions(-)

diff --git a/README.mkd b/README.mkd
index afeffa8b..06d58fe2 100644
--- a/README.mkd
+++ b/README.mkd
@@ -212,6 +212,22 @@ To use SuperColumns, pass super=True to the ColumnFamily constructor.
     >>> list(cf.get_range(super_column='2'))
     [('key1', {'sub3': 'val3', 'sub4': 'val4'})]
 
+You may also use a ColumnFamilyMap with SuperColumns:
+
+    >>> Test.objects = pycassa.ColumnFamilyMap(Test, cf)
+    >>> t = Test()
+    >>> t.key = 'key1'
+    >>> t.super_column = 'super1'
+    >>> t.string_column = 'foobar'
+    >>> t.int_str_column = 5
+    >>> t.float_column = t.float_str_column = 35.8
+    >>> t.datetime_str_column = datetime.now()
+    >>> Test.objects.insert(t)
+    >>> Test.objects.get(t.key)
+    {'super1': <__main__.Test object at 0x20ab350>}
+    >>> Test.objects.multiget([t.key])
+    {'key1': {'super1': <__main__.Test object at 0x20ab550>}}
+
 These output values retain the same format as given by the Cassandra thrift interface.
 
 Advanced
diff --git a/pycassa/columnfamilymap.py b/pycassa/columnfamilymap.py
index e9be9b43..518f5249 100644
--- a/pycassa/columnfamilymap.py
+++ b/pycassa/columnfamilymap.py
@@ -12,7 +12,9 @@ def combine_columns(column_dict, columns):
     for column, type in column_dict.iteritems():
         combined_columns[column] = type.default
     for column, value in columns.iteritems():
-        combined_columns[column] = column_dict[column].unpack(value)
+        col_cls = column_dict.get(column, None)
+        if col_cls is not None:
+            combined_columns[column] = col_cls.unpack(value)
     return combined_columns
 
 class ColumnFamilyMap(object):
@@ -45,29 +47,34 @@ def get(self, key, *args, **kwargs):
         ----------
         key : str
             The key to fetch
-        columns : [str]
-            Limit the columns fetched to the specified list
-        column_start = str
-            Only fetch when a column is >= column_start
-        column_finish = str
-            Only fetch when a column is <= column_finish
-        column_reversed = bool
-            Fetch the columns in reverse order. Currently this does nothing
-            because columns are converted into a dict.
-        column_count = int
-            Limit the number of columns fetched per key
-        include_timestamp = bool
-            If true, return a (value, timestamp) tuple for each column
+        super_column : str
+            Fetch only this super_column
+        read_consistency_level : ConsistencyLevel
+            Affects the guaranteed replication factor before returning from
+            any read operation
 
         Returns
         -------
         Class instance
         """
-        if 'columns' not in kwargs:
+        if 'columns' not in kwargs and not self.column_family.super:
             kwargs['columns'] = self.columns.keys()
+
         columns = self.column_family.get(key, *args, **kwargs)
-        columns = combine_columns(self.columns, columns)
-        return create_instance(self.cls, key=key, **columns)
+
+        if self.column_family.super:
+            if 'super_column' not in kwargs:
+                vals = {}
+                for super_column, subcols in columns.iteritems():
+                    combined = combine_columns(self.columns, subcols)
+                    vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
+                return vals
+
+            combined = combine_columns(self.columns, columns)
+            return create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)
+
+        combined = combine_columns(self.columns, columns)
+        return create_instance(self.cls, key=key, **combined)
 
     def multiget(self, *args, **kwargs):
         """
@@ -77,31 +84,34 @@ def multiget(self, *args, **kwargs):
         ----------
         keys : [str]
             A list of keys to fetch
-        columns : [str]
-            Limit the columns fetched to the specified list
-        column_start = str
-            Only fetch when a column is >= column_start
-        column_finish = str
-            Only fetch when a column is <= column_finish
-        column_reversed = bool
-            Fetch the columns in reverse order. Currently this does nothing
-            because columns are converted into a dict.
-        column_count = int
-            Limit the number of columns fetched per key
-        include_timestamp = bool
-            If true, return a (value, timestamp) tuple for each column
+        super_column : str
+            Fetch only this super_column
+        read_consistency_level : ConsistencyLevel
+            Affects the guaranteed replication factor before returning from
+            any read operation
 
         Returns
         -------
         {'key': Class instance} 
         """
-        if 'columns' not in kwargs:
+        if 'columns' not in kwargs and not self.column_family.super:
             kwargs['columns'] = self.columns.keys()
         kcmap = self.column_family.multiget(*args, **kwargs)
         ret = {}
         for key, columns in kcmap.iteritems():
-            columns = combine_columns(self.columns, columns)
-            ret[key] = create_instance(self.cls, key=key, **columns)
+            if self.column_family.super:
+                if 'super_column' not in kwargs:
+                    vals = {}
+                    for super_column, subcols in columns.iteritems():
+                        combined = combine_columns(self.columns, subcols)
+                        vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
+                    ret[key] = vals
+                else:
+                    combined = combine_columns(self.columns, columns)
+                    ret[key] = create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)
+            else:
+                combined = combine_columns(self.columns, columns)
+                ret[key] = create_instance(self.cls, key=key, **combined)
         return ret
 
     def get_count(self, *args, **kwargs):
@@ -129,31 +139,34 @@ def get_range(self, *args, **kwargs):
             Start from this key (inclusive)
         finish : str
             End at this key (inclusive)
-        columns : [str]
-            Limit the columns fetched to the specified list
-        column_start = str
-            Only fetch when a column is >= column_start
-        column_finish = str
-            Only fetch when a column is <= column_finish
-        column_reversed = bool
-            Fetch the columns in reverse order. Currently this does nothing
-            because columns are converted into a dict.
-        column_count = int
-            Limit the number of columns fetched per key
-        row_count = int
+        row_count : int
             Limit the number of rows fetched
-        include_timestamp = bool
-            If true, return a (value, timestamp) tuple for each column
+        super_column : str
+            Fetch only this super_column
+        read_consistency_level : ConsistencyLevel
+            Affects the guaranteed replication factor before returning from
+            any read operation
 
         Returns
         -------
         iterator over Class instance
         """
-        if 'columns' not in kwargs:
+        if 'columns' not in kwargs and not self.column_family.super:
             kwargs['columns'] = self.columns.keys()
         for key, columns in self.column_family.get_range(*args, **kwargs):
-            columns = combine_columns(self.columns, columns)
-            yield create_instance(self.cls, key=key, **columns)
+            if self.column_family.super:
+                if 'super_column' not in kwargs:
+                    vals = {}
+                    for super_column, subcols in columns.iteritems():
+                        combined = combine_columns(self.columns, subcols)
+                        vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
+                    yield vals
+                else:
+                    combined = combine_columns(self.columns, columns)
+                    yield create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)
+            else:
+                combined = combine_columns(self.columns, columns)
+                yield create_instance(self.cls, key=key, **combined)
 
     def insert(self, instance, columns=None):
         """
@@ -177,6 +190,9 @@ def insert(self, instance, columns=None):
         for column in columns:
             insert_dict[column] = self.columns[column].pack(instance.__dict__[column])
 
+        if self.column_family.super:
+            insert_dict = {instance.super_column: insert_dict}
+
         return self.column_family.insert(instance.key, insert_dict)
 
     def remove(self, instance, column=None):
@@ -188,7 +204,7 @@ def remove(self, instance, column=None):
         instance : Class instance
             Remove the instance where the key is instance.key
         column : str
-            If set, remove only this column
+            If set, remove only this Column. Doesn't do anything for SuperColumns
 
         Returns
         -------
@@ -196,4 +212,7 @@ def remove(self, instance, column=None):
         """
         # Hmm, should we only remove the columns specified on construction?
         # It's slower, so we'll leave it out.
+
+        if self.column_family.super:
+            return self.column_family.remove(instance.key, column=instance.super_column)
         return self.column_family.remove(instance.key, column)
diff --git a/tests/test_columnfamilymap.py b/tests/test_columnfamilymap.py
index e1e49b15..4e8b460a 100644
--- a/tests/test_columnfamilymap.py
+++ b/tests/test_columnfamilymap.py
@@ -142,3 +142,56 @@ def test_has_defaults(self):
         assert instance.intstrcol == TestUTF8.intstrcol.default
         assert instance.floatstrcol == TestUTF8.floatstrcol.default
         assert instance.datetimestrcol == TestUTF8.datetimestrcol.default
+
+class TestSuperColumnFamilyMap:
+    def setUp(self):
+        self.client = connect()
+        self.cf = ColumnFamily(self.client, 'Test Keyspace', 'Test Super',
+                               write_consistency_level=ConsistencyLevel.ONE,
+                               timestamp=self.timestamp,
+                               super=True)
+        self.map = ColumnFamilyMap(TestUTF8, self.cf)
+        try:
+            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
+        except NotFoundException:
+            self.timestamp_n = 0
+        self.clear()
+
+    def tearDown(self):
+        self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}})
+
+    # Since the timestamp passed to Cassandra will be in the same second
+    # with the default timestamp function, causing problems with removing
+    # and inserting (Cassandra doesn't know which is later), we supply our own
+    def timestamp(self):
+        self.timestamp_n += 1
+        return self.timestamp_n
+
+    def clear(self):
+        for key, columns in self.cf.get_range(include_timestamp=True):
+            for subcolumns in columns.itervalues():
+                for value, timestamp in subcolumns.itervalues():
+                    self.timestamp_n = max(self.timestamp_n, timestamp)
+            self.cf.remove(key)
+
+    def instance(self, key, super_column):
+        instance = TestUTF8()
+        instance.key = key
+        instance.super_column = super_column
+        instance.strcol = '1'
+        instance.intcol = 2
+        instance.floatcol = 3.5
+        instance.datetimecol = datetime.now().replace(microsecond=0)
+        instance.intstrcol = 8
+        instance.floatstrcol = 4.6
+        instance.datetimestrcol = datetime.now().replace(microsecond=0)
+
+        return instance
+
+    def test_super(self):
+        instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1')
+        assert_raises(NotFoundException, self.map.get, instance.key)
+        self.map.insert(instance)
+        assert self.map.get(instance.key)[instance.super_column] == instance
+        assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance
+        assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]