Skip to content
This repository has been archived by the owner on Aug 4, 2020. It is now read-only.

Commit

Permalink
SuperColumn support in ColumnFamilyMap
Browse files Browse the repository at this point in the history
  • Loading branch information
jhseu committed Apr 4, 2010
1 parent abdfbc1 commit 02cc254
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 51 deletions.
16 changes: 16 additions & 0 deletions README.mkd
Expand Up @@ -212,6 +212,22 @@ To use SuperColumns, pass super=True to the ColumnFamily constructor.
>>> list(cf.get_range(super_column='2'))
[('key1', {'sub3': 'val3', 'sub4': 'val4'})]

You may also use a ColumnFamilyMap with SuperColumns:

>>> Test.objects = pycassa.ColumnFamilyMap(Test, cf)
>>> t = Test()
>>> t.key = 'key1'
>>> t.super_column = 'super1'
>>> t.string_column = 'foobar'
>>> t.int_str_column = 5
>>> t.float_column = t.float_str_column = 35.8
>>> t.datetime_str_column = datetime.now()
>>> Test.objects.insert(t)
>>> Test.objects.get(t.key)
{'super1': <__main__.Test object at 0x20ab350>}
>>> Test.objects.multiget([t.key])
{'key1': {'super1': <__main__.Test object at 0x20ab550>}}

These output values retain the same format as given by the Cassandra thrift interface.

Advanced
Expand Down
121 changes: 70 additions & 51 deletions pycassa/columnfamilymap.py
Expand Up @@ -12,7 +12,9 @@ def combine_columns(column_dict, columns):
for column, type in column_dict.iteritems():
combined_columns[column] = type.default
for column, value in columns.iteritems():
combined_columns[column] = column_dict[column].unpack(value)
col_cls = column_dict.get(column, None)
if col_cls is not None:
combined_columns[column] = col_cls.unpack(value)
return combined_columns

class ColumnFamilyMap(object):
Expand Down Expand Up @@ -45,29 +47,34 @@ def get(self, key, *args, **kwargs):
----------
key : str
The key to fetch
columns : [str]
Limit the columns fetched to the specified list
column_start = str
Only fetch when a column is >= column_start
column_finish = str
Only fetch when a column is <= column_finish
column_reversed = bool
Fetch the columns in reverse order. Currently this does nothing
because columns are converted into a dict.
column_count = int
Limit the number of columns fetched per key
include_timestamp = bool
If true, return a (value, timestamp) tuple for each column
super_column : str
Fetch only this super_column
read_consistency_level : ConsistencyLevel
Affects the guaranteed replication factor before returning from
any read operation
Returns
-------
Class instance
"""
if 'columns' not in kwargs:
if 'columns' not in kwargs and not self.column_family.super:
kwargs['columns'] = self.columns.keys()

columns = self.column_family.get(key, *args, **kwargs)
columns = combine_columns(self.columns, columns)
return create_instance(self.cls, key=key, **columns)

if self.column_family.super:
if 'super_column' not in kwargs:
vals = {}
for super_column, subcols in columns.iteritems():
combined = combine_columns(self.columns, subcols)
vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
return vals

combined = combine_columns(self.columns, columns)
return create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)

combined = combine_columns(self.columns, columns)
return create_instance(self.cls, key=key, **combined)

def multiget(self, *args, **kwargs):
"""
Expand All @@ -77,31 +84,34 @@ def multiget(self, *args, **kwargs):
----------
keys : [str]
A list of keys to fetch
columns : [str]
Limit the columns fetched to the specified list
column_start = str
Only fetch when a column is >= column_start
column_finish = str
Only fetch when a column is <= column_finish
column_reversed = bool
Fetch the columns in reverse order. Currently this does nothing
because columns are converted into a dict.
column_count = int
Limit the number of columns fetched per key
include_timestamp = bool
If true, return a (value, timestamp) tuple for each column
super_column : str
Fetch only this super_column
read_consistency_level : ConsistencyLevel
Affects the guaranteed replication factor before returning from
any read operation
Returns
-------
{'key': Class instance}
"""
if 'columns' not in kwargs:
if 'columns' not in kwargs and not self.column_family.super:
kwargs['columns'] = self.columns.keys()
kcmap = self.column_family.multiget(*args, **kwargs)
ret = {}
for key, columns in kcmap.iteritems():
columns = combine_columns(self.columns, columns)
ret[key] = create_instance(self.cls, key=key, **columns)
if self.column_family.super:
if 'super_column' not in kwargs:
vals = {}
for super_column, subcols in columns.iteritems():
combined = combine_columns(self.columns, subcols)
vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
ret[key] = vals
else:
combined = combine_columns(self.columns, columns)
ret[key] = create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)
else:
combined = combine_columns(self.columns, columns)
ret[key] = create_instance(self.cls, key=key, **combined)
return ret

def get_count(self, *args, **kwargs):
Expand Down Expand Up @@ -129,31 +139,34 @@ def get_range(self, *args, **kwargs):
Start from this key (inclusive)
finish : str
End at this key (inclusive)
columns : [str]
Limit the columns fetched to the specified list
column_start = str
Only fetch when a column is >= column_start
column_finish = str
Only fetch when a column is <= column_finish
column_reversed = bool
Fetch the columns in reverse order. Currently this does nothing
because columns are converted into a dict.
column_count = int
Limit the number of columns fetched per key
row_count = int
row_count : int
Limit the number of rows fetched
include_timestamp = bool
If true, return a (value, timestamp) tuple for each column
super_column : str
Fetch only this super_column
read_consistency_level : ConsistencyLevel
Affects the guaranteed replication factor before returning from
any read operation
Returns
-------
iterator over Class instance
"""
if 'columns' not in kwargs:
if 'columns' not in kwargs and not self.column_family.super:
kwargs['columns'] = self.columns.keys()
for key, columns in self.column_family.get_range(*args, **kwargs):
columns = combine_columns(self.columns, columns)
yield create_instance(self.cls, key=key, **columns)
if self.column_family.super:
if 'super_column' not in kwargs:
vals = {}
for super_column, subcols in columns.iteritems():
combined = combine_columns(self.columns, subcols)
vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined)
yield vals
else:
combined = combine_columns(self.columns, columns)
yield create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined)
else:
combined = combine_columns(self.columns, columns)
yield create_instance(self.cls, key=key, **combined)

def insert(self, instance, columns=None):
"""
Expand All @@ -177,6 +190,9 @@ def insert(self, instance, columns=None):
for column in columns:
insert_dict[column] = self.columns[column].pack(instance.__dict__[column])

if self.column_family.super:
insert_dict = {instance.super_column: insert_dict}

return self.column_family.insert(instance.key, insert_dict)

def remove(self, instance, column=None):
Expand All @@ -188,12 +204,15 @@ def remove(self, instance, column=None):
instance : Class instance
Remove the instance where the key is instance.key
column : str
If set, remove only this column
If set, remove only this Column. Doesn't do anything for SuperColumns
Returns
-------
int timestamp
"""
# Hmm, should we only remove the columns specified on construction?
# It's slower, so we'll leave it out.

if self.column_family.super:
return self.column_family.remove(instance.key, column=instance.super_column)
return self.column_family.remove(instance.key, column)
53 changes: 53 additions & 0 deletions tests/test_columnfamilymap.py
Expand Up @@ -142,3 +142,56 @@ def test_has_defaults(self):
assert instance.intstrcol == TestUTF8.intstrcol.default
assert instance.floatstrcol == TestUTF8.floatstrcol.default
assert instance.datetimestrcol == TestUTF8.datetimestrcol.default

class TestSuperColumnFamilyMap:
def setUp(self):
self.client = connect()
self.cf = ColumnFamily(self.client, 'Test Keyspace', 'Test Super',
write_consistency_level=ConsistencyLevel.ONE,
timestamp=self.timestamp,
super=True)
self.map = ColumnFamilyMap(TestUTF8, self.cf)
try:
self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
except NotFoundException:
self.timestamp_n = 0
self.clear()

def tearDown(self):
self.cf.insert('meta', {'meta': {'timestamp': str(self.timestamp_n)}})

# Since the timestamp passed to Cassandra will be in the same second
# with the default timestamp function, causing problems with removing
# and inserting (Cassandra doesn't know which is later), we supply our own
def timestamp(self):
self.timestamp_n += 1
return self.timestamp_n

def clear(self):
for key, columns in self.cf.get_range(include_timestamp=True):
for subcolumns in columns.itervalues():
for value, timestamp in subcolumns.itervalues():
self.timestamp_n = max(self.timestamp_n, timestamp)
self.cf.remove(key)

def instance(self, key, super_column):
instance = TestUTF8()
instance.key = key
instance.super_column = super_column
instance.strcol = '1'
instance.intcol = 2
instance.floatcol = 3.5
instance.datetimecol = datetime.now().replace(microsecond=0)
instance.intstrcol = 8
instance.floatstrcol = 4.6
instance.datetimestrcol = datetime.now().replace(microsecond=0)

return instance

def test_super(self):
instance = self.instance('TestSuperColumnFamilyMap.test_super', 'super1')
assert_raises(NotFoundException, self.map.get, instance.key)
self.map.insert(instance)
assert self.map.get(instance.key)[instance.super_column] == instance
assert self.map.multiget([instance.key])[instance.key][instance.super_column] == instance
assert list(self.map.get_range(start=instance.key, finish=instance.key)) == [{instance.super_column: instance}]

0 comments on commit 02cc254

Please sign in to comment.