From 9d3363d512c9d0b150f8569e7be0909a9f1942ec Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 23 Dec 2009 19:01:27 -0600 Subject: [PATCH] add string column variants --- README.mkd | 21 +++++----- pycasso/columntypes.py | 27 ++++++++++++- tests/test_columnfamilymap.py | 75 ++++++++++++++--------------------- 3 files changed, 66 insertions(+), 57 deletions(-) diff --git a/README.mkd b/README.mkd index 9d40256f..cbb6dee5 100644 --- a/README.mkd +++ b/README.mkd @@ -124,13 +124,16 @@ You can also map existing classes using ColumnFamilyMap. >>> class Test(object): ... string_column = pycasso.StringColumn(default='Your Default') + ... int_str_column = pycasso.IntStringColumn(default=5) ... int_column = pycasso.Int64Column(default=0) + ... float_str_column = pycasso.FloatStringColumn(default=8.0) ... float_column = pycasso.Float64Column(default=0.0) - ... datetime_column = pycasso.DateTimeColumn() # default=None + ... datetime_str_column = pycasso.DateTimeStringColumn() # default=None + ... datetime_column = pycasso.DateTimeColumn() The defaults will be filled in whenever you retrieve instances from the Cassandra server and the column doesn't exist. If, for example, you add columns in the future, you simply add the relevant column and the default will be there when you get old instances. -Int64Column and Float64Column are named as such because they are converted to and from a 64-bit format when communicating with the Cassandra server. DateTimeColumn is stored in the same format as the time() system call, or the number of seconds since Epoch (00:00:00, January 1, 1970). +The difference between IntStringColumn and Int64Column is how it's stored in Cassandra. If you want maximum compatibility with other languages, use IntStringColumn, FloatStringColumn, and DateTimeStringColumn. Int64Column is stored as an int64_t, Float64Column is stored as a double, and DateTimeColumn is stored in the same format as the time() system call (seconds since 1970-01-01 00:00:00). These may end up being more compact and faster than the string representations. >>> Test.objects = pycasso.ColumnFamilyMap(Test, cf) @@ -139,20 +142,20 @@ All the functions are exactly the same, except that they return instances of the >>> t = Test() >>> t.key = 'maptest' >>> t.string_column = 'string test' - >>> t.int_column = 18 - >>> t.float_column = 35.8 + >>> t.int_column = t.int_str_column = 18 + >>> t.float_column = t.float_str_column = 35.8 >>> from datetime import datetime - >>> t.datetime_column = datetime.now() + >>> t.datetime_column = t.datetime_str_column = datetime.now() >>> Test.objects.insert(t) 1261395560 >>> Test.objects.get(t.key).string_column 'string test' - >>> Test.objects.get(t.key).int_column + >>> Test.objects.get(t.key).int_str_column 18 >>> Test.objects.get(t.key).float_column 35.799999999999997 - >>> Test.objects.get(t.key).datetime_column + >>> Test.objects.get(t.key).datetime_str_column datetime.datetime(2009, 12, 23, 17, 6, 3) >>> Test.objects.multiget([t.key]) @@ -160,7 +163,7 @@ All the functions are exactly the same, except that they return instances of the >>> list(Test.objects.get_range()) [<__main__.Test object at 0x7f8ddde0b710>] >>> Test.objects.get_count(t.key) - 4 + 7 >>> Test.objects.remove(t) 1261395603 @@ -209,7 +212,7 @@ pycasso currently returns Cassandra Columns and SubColumns as python dictionarie >>> cf = pycasso.ColumnFamily(client, 'Test Keyspace', 'Test ColumnFamily', dict_class=collections.OrderedDict) -You may also define your own Column types for the mapper. If, for example, you need absolute compatibility with other systems, you could make an integer column that is stored as a string: +You may also define your own Column types for the mapper. For example, the IntStringColumn is defined as: >>> class IntStringColumn(pycasso.Column): ... def pack(self, val): diff --git a/pycasso/columntypes.py b/pycasso/columntypes.py index 67b8aeb6..c1367413 100644 --- a/pycasso/columntypes.py +++ b/pycasso/columntypes.py @@ -2,8 +2,9 @@ import struct import time -__all__ = ['Column', 'BytesColumn', 'DateTimeColumn', 'Float64Column', - 'Int64Column', 'StringColumn'] +__all__ = ['Column', 'BytesColumn', 'DateTimeColumn', 'DateTimeStringColumn', + 'Float64Column', 'FloatStringColumn', 'Int64Column', + 'IntStringColumn', 'StringColumn'] class Column(object): def __init__(self, default=None): @@ -27,6 +28,14 @@ def pack(self, val): def unpack(self, val): return datetime.fromtimestamp(self.struct.unpack(val)[0]) +class DateTimeStringColumn(Column): + format = '%Y-%m-%d %H:%M:%S' + def pack(self, val): + return val.strftime(self.format) + + def unpack(self, val): + return datetime.strptime(val, self.format) + class Float64Column(Column): def __init__(self, *args, **kwargs): Column.__init__(self, *args, **kwargs) @@ -38,6 +47,13 @@ def pack(self, val): def unpack(self, val): return self.struct.unpack(val)[0] +class FloatStringColumn(Column): + def pack(self, val): + return str(val) + + def unpack(self, val): + return float(val) + class Int64Column(Column): def __init__(self, *args, **kwargs): Column.__init__(self, *args, **kwargs) @@ -49,5 +65,12 @@ def pack(self, val): def unpack(self, val): return self.struct.unpack(val)[0] +class IntStringColumn(Column): + def pack(self, val): + return str(val) + + def unpack(self, val): + return int(val) + class StringColumn(BytesColumn): pass diff --git a/tests/test_columnfamilymap.py b/tests/test_columnfamilymap.py index 51ecaa09..6cf28386 100644 --- a/tests/test_columnfamilymap.py +++ b/tests/test_columnfamilymap.py @@ -2,7 +2,8 @@ from pycasso import connect, gm_timestamp, ColumnFamily, ColumnFamilyMap, \ ConsistencyLevel, NotFoundException, StringColumn, Int64Column, \ - Float64Column, DateTimeColumn + Float64Column, DateTimeColumn, IntStringColumn, FloatStringColumn, \ + DateTimeStringColumn from nose.tools import assert_raises class TestUTF8(object): @@ -10,6 +11,9 @@ class TestUTF8(object): intcol = Int64Column(default=0) floatcol = Float64Column(default=0.0) datetimecol = DateTimeColumn(default=None) + intstrcol = IntStringColumn() + floatstrcol = FloatStringColumn() + datetimestrcol = DateTimeStringColumn() def __eq__(self, other): return self.__dict__ == other.__dict__ @@ -46,37 +50,33 @@ def clear(self): self.timestamp_n = max(self.timestamp_n, timestamp) self.cf.remove(key) + def instance(self, key): + instance = TestUTF8() + instance.key = key + instance.strcol = '1' + instance.intcol = 2 + instance.floatcol = 3.5 + instance.datetimecol = datetime.now().replace(microsecond=0) + instance.intstrcol = 8 + instance.floatstrcol = 4.6 + instance.datetimestrcol = datetime.now().replace(microsecond=0) + + return instance + def test_empty(self): key = 'TestColumnFamilyMap.test_empty' assert_raises(NotFoundException, self.map.get, key) assert len(self.map.multiget([key])) == 0 def test_insert_get(self): - instance = TestUTF8() - instance.key = 'TestColumnFamilyMap.test_insert_get' - instance.strcol = '1' - instance.intcol = 2 - instance.floatcol = 3.5 - instance.datetimecol = datetime.now().replace(microsecond=0) + instance = self.instance('TestColumnFamilyMap.test_insert_get') assert_raises(NotFoundException, self.map.get, instance.key) self.map.insert(instance) assert self.map.get(instance.key) == instance def test_insert_multiget(self): - instance1 = TestUTF8() - instance1.key = 'TestColumnFamilyMap.test_insert_multiget1' - instance1.strcol = '1' - instance1.intcol = 2 - instance1.floatcol = 3.5 - instance1.datetimecol = datetime.now().replace(microsecond=0) - - instance2 = TestUTF8() - instance2.key = 'TestColumnFamilyMap.test_insert_multiget2' - instance2.strcol = '1' - instance2.intcol = 2 - instance2.floatcol = 3.5 - instance2.datetimecol = datetime.now().replace(microsecond=0) - + instance1 = self.instance('TestColumnFamilyMap.test_insert_multiget1') + instance2 = self.instance('TestColumnFamilyMap.test_insert_multiget2') missing_key = 'TestColumnFamilyMap.test_insert_multiget3' self.map.insert(instance1) @@ -88,24 +88,14 @@ def test_insert_multiget(self): assert missing_key not in rows def test_insert_get_count(self): - instance = TestUTF8() - instance.key = 'TestColumnFamilyMap.test_insert_get_count' - instance.strcol = '1' - instance.intcol = 2 - instance.floatcol = 3.5 - instance.datetimecol = datetime.now().replace(microsecond=0) + instance = self.instance('TestColumnFamilyMap.test_insert_get_count') self.map.insert(instance) - assert self.map.get_count(instance.key) == 4 + assert self.map.get_count(instance.key) == 7 def test_insert_get_range(self): instances = [] for i in xrange(5): - instance = TestUTF8() - instance.key = 'TestColumnFamilyMap.test_insert_get_range%s' % i - instance.strcol = '1' - instance.intcol = 2 - instance.floatcol = 3.5 - instance.datetimecol = datetime.now().replace(microsecond=0) + instance = self.instance('TestColumnFamilyMap.test_insert_get_range%s' % i) instances.append(instance) for instance in instances: @@ -116,24 +106,14 @@ def test_insert_get_range(self): assert rows == instances def test_remove(self): - instance = TestUTF8() - instance.key = 'TestColumnFamilyMap.test_remove' - instance.strcol = '1' - instance.intcol = 2 - instance.floatcol = 3.5 - instance.datetimecol = datetime.now().replace(microsecond=0) + instance = self.instance('TestColumnFamilyMap.test_remove') self.map.insert(instance) self.map.remove(instance) assert_raises(NotFoundException, self.map.get, instance.key) def test_does_not_insert_extra_column(self): - instance = TestUTF8() - instance.key = 'TestColumnFamilyMap.test_does_not_insert_extra_column' - instance.strcol = '1' - instance.intcol = 2 - instance.floatcol = 3.5 - instance.datetimecol = datetime.now().replace(microsecond=0) + instance = self.instance('TestColumnFamilyMap.test_does_not_insert_extra_column') instance.othercol = 'Test' self.map.insert(instance) @@ -153,3 +133,6 @@ def test_has_defaults(self): assert instance.intcol == TestUTF8.intcol.default assert instance.floatcol == TestUTF8.floatcol.default assert instance.datetimecol == TestUTF8.datetimecol.default + assert instance.intstrcol == TestUTF8.intstrcol.default + assert instance.floatstrcol == TestUTF8.floatstrcol.default + assert instance.datetimestrcol == TestUTF8.datetimestrcol.default