From 02cc2543ae34f2efc91b8fd6f398c53697eb5049 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sat, 3 Apr 2010 21:15:03 -0500 Subject: [PATCH] SuperColumn support in ColumnFamilyMap --- README.mkd | 16 +++++ pycassa/columnfamilymap.py | 121 ++++++++++++++++++++-------------- tests/test_columnfamilymap.py | 53 +++++++++++++++ 3 files changed, 139 insertions(+), 51 deletions(-) diff --git a/README.mkd b/README.mkd index afeffa8b..06d58fe2 100644 --- a/README.mkd +++ b/README.mkd @@ -212,6 +212,22 @@ To use SuperColumns, pass super=True to the ColumnFamily constructor. >>> list(cf.get_range(super_column='2')) [('key1', {'sub3': 'val3', 'sub4': 'val4'})] +You may also use a ColumnFamilyMap with SuperColumns: + + >>> Test.objects = pycassa.ColumnFamilyMap(Test, cf) + >>> t = Test() + >>> t.key = 'key1' + >>> t.super_column = 'super1' + >>> t.string_column = 'foobar' + >>> t.int_str_column = 5 + >>> t.float_column = t.float_str_column = 35.8 + >>> t.datetime_str_column = datetime.now() + >>> Test.objects.insert(t) + >>> Test.objects.get(t.key) + {'super1': <__main__.Test object at 0x20ab350>} + >>> Test.objects.multiget([t.key]) + {'key1': {'super1': <__main__.Test object at 0x20ab550>}} + These output values retain the same format as given by the Cassandra thrift interface. Advanced diff --git a/pycassa/columnfamilymap.py b/pycassa/columnfamilymap.py index e9be9b43..518f5249 100644 --- a/pycassa/columnfamilymap.py +++ b/pycassa/columnfamilymap.py @@ -12,7 +12,9 @@ def combine_columns(column_dict, columns): for column, type in column_dict.iteritems(): combined_columns[column] = type.default for column, value in columns.iteritems(): - combined_columns[column] = column_dict[column].unpack(value) + col_cls = column_dict.get(column, None) + if col_cls is not None: + combined_columns[column] = col_cls.unpack(value) return combined_columns class ColumnFamilyMap(object): @@ -45,29 +47,34 @@ def get(self, key, *args, **kwargs): ---------- key : str The key to fetch - columns : [str] - Limit the columns fetched to the specified list - column_start = str - Only fetch when a column is >= column_start - column_finish = str - Only fetch when a column is <= column_finish - column_reversed = bool - Fetch the columns in reverse order. Currently this does nothing - because columns are converted into a dict. - column_count = int - Limit the number of columns fetched per key - include_timestamp = bool - If true, return a (value, timestamp) tuple for each column + super_column : str + Fetch only this super_column + read_consistency_level : ConsistencyLevel + Affects the guaranteed replication factor before returning from + any read operation Returns ------- Class instance """ - if 'columns' not in kwargs: + if 'columns' not in kwargs and not self.column_family.super: kwargs['columns'] = self.columns.keys() + columns = self.column_family.get(key, *args, **kwargs) - columns = combine_columns(self.columns, columns) - return create_instance(self.cls, key=key, **columns) + + if self.column_family.super: + if 'super_column' not in kwargs: + vals = {} + for super_column, subcols in columns.iteritems(): + combined = combine_columns(self.columns, subcols) + vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined) + return vals + + combined = combine_columns(self.columns, columns) + return create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined) + + combined = combine_columns(self.columns, columns) + return create_instance(self.cls, key=key, **combined) def multiget(self, *args, **kwargs): """ @@ -77,31 +84,34 @@ def multiget(self, *args, **kwargs): ---------- keys : [str] A list of keys to fetch - columns : [str] - Limit the columns fetched to the specified list - column_start = str - Only fetch when a column is >= column_start - column_finish = str - Only fetch when a column is <= column_finish - column_reversed = bool - Fetch the columns in reverse order. Currently this does nothing - because columns are converted into a dict. - column_count = int - Limit the number of columns fetched per key - include_timestamp = bool - If true, return a (value, timestamp) tuple for each column + super_column : str + Fetch only this super_column + read_consistency_level : ConsistencyLevel + Affects the guaranteed replication factor before returning from + any read operation Returns ------- {'key': Class instance} """ - if 'columns' not in kwargs: + if 'columns' not in kwargs and not self.column_family.super: kwargs['columns'] = self.columns.keys() kcmap = self.column_family.multiget(*args, **kwargs) ret = {} for key, columns in kcmap.iteritems(): - columns = combine_columns(self.columns, columns) - ret[key] = create_instance(self.cls, key=key, **columns) + if self.column_family.super: + if 'super_column' not in kwargs: + vals = {} + for super_column, subcols in columns.iteritems(): + combined = combine_columns(self.columns, subcols) + vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined) + ret[key] = vals + else: + combined = combine_columns(self.columns, columns) + ret[key] = create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined) + else: + combined = combine_columns(self.columns, columns) + ret[key] = create_instance(self.cls, key=key, **combined) return ret def get_count(self, *args, **kwargs): @@ -129,31 +139,34 @@ def get_range(self, *args, **kwargs): Start from this key (inclusive) finish : str End at this key (inclusive) - columns : [str] - Limit the columns fetched to the specified list - column_start = str - Only fetch when a column is >= column_start - column_finish = str - Only fetch when a column is <= column_finish - column_reversed = bool - Fetch the columns in reverse order. Currently this does nothing - because columns are converted into a dict. - column_count = int - Limit the number of columns fetched per key - row_count = int + row_count : int Limit the number of rows fetched - include_timestamp = bool - If true, return a (value, timestamp) tuple for each column + super_column : str + Fetch only this super_column + read_consistency_level : ConsistencyLevel + Affects the guaranteed replication factor before returning from + any read operation Returns ------- iterator over Class instance """ - if 'columns' not in kwargs: + if 'columns' not in kwargs and not self.column_family.super: kwargs['columns'] = self.columns.keys() for key, columns in self.column_family.get_range(*args, **kwargs): - columns = combine_columns(self.columns, columns) - yield create_instance(self.cls, key=key, **columns) + if self.column_family.super: + if 'super_column' not in kwargs: + vals = {} + for super_column, subcols in columns.iteritems(): + combined = combine_columns(self.columns, subcols) + vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined) + yield vals + else: + combined = combine_columns(self.columns, columns) + yield create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined) + else: + combined = combine_columns(self.columns, columns) + yield create_instance(self.cls, key=key, **combined) def insert(self, instance, columns=None): """ @@ -177,6 +190,9 @@ def insert(self, instance, columns=None): for column in columns: insert_dict[column] = self.columns[column].pack(instance.__dict__[column]) + if self.column_family.super: + insert_dict = {instance.super_column: insert_dict} + return self.column_family.insert(instance.key, insert_dict) def remove(self, instance, column=None): @@ -188,7 +204,7 @@ def remove(self, instance, column=None): instance : Class instance Remove the instance where the key is instance.key column : str - If set, remove only this column + If set, remove only this Column. Doesn't do anything for SuperColumns Returns ------- @@ -196,4 +212,7 @@ def remove(self, instance, column=None): """ # Hmm, should we only remove the columns specified on construction? # It's slower, so we'll leave it out. + + if self.column_family.super: + return self.column_family.remove(instance.key, column=instance.super_column) return self.column_family.remove(instance.key, column) diff --git a/tests/test_columnfamilymap.py b/tests/test_columnfamilymap.py index e1e49b15..4e8b460a 100644 --- a/tests/test_columnfamilymap.py +++ b/tests/test_columnfamilymap.py @@ -142,3 +142,56 @@ def test_has_defaults(self): assert instance.intstrcol == TestUTF8.intstrcol.default assert instance.floatstrcol == TestUTF8.floatstrcol.default assert instance.datetimestrcol == TestUTF8.datetimestrcol.default + +class TestSuperColumnFamilyMap: + def setUp(self): + self.client = connect() + self.cf = ColumnFamily(self.client, 'Test Keyspace', 'Test Super', + write_consistency_level=ConsistencyLevel.ONE, + timestamp=self.timestamp, + super=True) + self.map = ColumnFamilyMap(TestUTF8, self.cf) + try: + self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp']) + except NotFoundException: + self.timestamp_n = 0 + self.clear() + + def tearDown(self): + self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}}) + + # Since the timestamp passed to Cassandra will be in the same second + # with the default timestamp function, causing problems with removing + # and inserting (Cassandra doesn't know which is later), we supply our own + def timestamp(self): + self.timestamp_n += 1 + return self.timestamp_n + + def clear(self): + for key, columns in self.cf.get_range(include_timestamp=True): + for subcolumns in columns.itervalues(): + for value, timestamp in subcolumns.itervalues(): + self.timestamp_n = max(self.timestamp_n, timestamp) + self.cf.remove(key) + + def instance(self, key, super_column): + instance = TestUTF8() + instance.key = key + instance.super_column = super_column + instance.strcol = '1' + instance.intcol = 2 + instance.floatcol = 3.5 + instance.datetimecol = datetime.now().replace(microsecond=0) + instance.intstrcol = 8 + instance.floatstrcol = 4.6 + instance.datetimestrcol = datetime.now().replace(microsecond=0) + + return instance + + def test_super(self): + instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1') + assert_raises(NotFoundException, self.map.get, instance.key) + self.map.insert(instance) + assert self.map.get(instance.key)[instance.super_column] == instance + assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance + assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]