Skip to content

Commit

Permalink
Support Nested with flatten_nested=0 (#285)
Browse files Browse the repository at this point in the history
Support Nested with flatten_nested=0
  • Loading branch information
spff committed Jan 19, 2022
1 parent e66fe4a commit d95f14b
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 5 deletions.
36 changes: 33 additions & 3 deletions clickhouse_driver/block.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .reader import read_varint, read_binary_uint8, read_binary_int32
from .varint import write_varint
from .writer import write_binary_uint8, write_binary_int32
from .columns import nestedcolumn


class BlockInfo(object):
Expand Down Expand Up @@ -151,17 +152,46 @@ def get_column_by_index(self, index):
return [row[index] for row in self.data]

def _mutate_dicts_to_rows(self, data):
column_names = [x[0] for x in self.columns_with_types]

check_row_type = False
if self.types_check:
check_row_type = self._check_dict_row_type

return self._pure_mutate_dicts_to_rows(
data,
self.columns_with_types,
check_row_type,
)

def _pure_mutate_dicts_to_rows(
self,
data,
columns_with_types,
check_row_type,
):
columns_with_cwt = []
for x in columns_with_types:
cwt = None
if x[1].startswith('Nested'):
cwt = nestedcolumn.get_columns_with_types(x[1])
columns_with_cwt.append((x[0], cwt))

for i, row in enumerate(data):
if check_row_type:
check_row_type(row)

data[i] = [row[name] for name in column_names]
new_data = []
for name, cwt in columns_with_cwt:
if cwt is None:
new_data.append(row[name])
else:
new_data.append(self._pure_mutate_dicts_to_rows(
row[name],
cwt,
check_row_type
))
data[i] = new_data
# return for recursion
return data

def _check_rows(self, data):
expected_row_len = len(self.columns_with_types)
Expand Down
73 changes: 73 additions & 0 deletions clickhouse_driver/columns/nestedcolumn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

from .arraycolumn import create_array_column


def create_nested_column(spec, column_by_spec_getter):
return create_array_column(
'Array(Tuple({}))'.format(','.join(get_nested_columns(spec))),
column_by_spec_getter=column_by_spec_getter
)


def get_nested_columns(spec):
brackets = 0
column_begin = 0

inner_spec = get_inner_spec(spec)
nested_columns = []
for i, x in enumerate(inner_spec + ','):
if x == ',':
if brackets == 0:
nested_columns.append(inner_spec[column_begin:i])
column_begin = i + 1
elif x == '(':
brackets += 1
elif x == ')':
brackets -= 1
elif x == ' ':
if brackets == 0:
column_begin = i + 1
return nested_columns


def get_columns_with_types(spec):
brackets = 0
prev_comma = 0
prev_space = 0

inner_spec = get_inner_spec(spec)
columns_with_types = []

for i, x in enumerate(inner_spec + ','):
if x == ',':
if brackets == 0:
columns_with_types.append((
inner_spec[prev_comma:prev_space].strip(),
inner_spec[prev_space:i]
))
prev_comma = i + 1
elif x == '(':
brackets += 1
elif x == ')':
brackets -= 1
elif x == ' ':
if brackets == 0:
prev_space = i + 1
return columns_with_types


def get_inner_spec(spec):
brackets = 1
offset = len('Nested(')
i = offset
for i, ch in enumerate(spec[offset:], offset):
if brackets == 0:
break

if ch == '(':
brackets += 1

elif ch == ')':
brackets -= 1

return spec[offset:i]
4 changes: 4 additions & 0 deletions clickhouse_driver/columns/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
)
from .stringcolumn import create_string_column
from .tuplecolumn import create_tuple_column
from .nestedcolumn import create_nested_column
from .uuidcolumn import UUIDColumn
from .intervalcolumn import (
IntervalYearColumn, IntervalMonthColumn, IntervalWeekColumn,
Expand Down Expand Up @@ -84,6 +85,9 @@ def create_column_with_options(x):
elif spec.startswith('Tuple'):
return create_tuple_column(spec, create_column_with_options)

elif spec.startswith('Nested'):
return create_nested_column(spec, create_column_with_options)

elif spec.startswith('Nullable'):
return create_nullable_column(spec, create_column_with_options)

Expand Down
66 changes: 64 additions & 2 deletions docs/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,10 @@ INSERT types: :class:`list`, :class:`tuple`.
SELECT type: :class:`tuple`.


Nested
Nested(flatten_nested=1, default)
------

Nested type is represented by sequence of arrays. In example below actual
Nested type is represented by sequence of arrays when flatten_nested=1. In example below actual
columns for are ``col.name`` and ``col.version``.

.. code-block:: sql
Expand Down Expand Up @@ -335,6 +335,68 @@ Inserting data into nested column with ``clickhouse-driver``:
(['a', 'b', 'c'], [100, 200, 300]),
])
Nested(flatten_nested=0)
------

Nested type is represented by array of named tuples when flatten_nested=0.

.. code-block:: sql
:) SET flatten_nested = 0;
SET flatten_nested = 0
Ok.
0 rows in set. Elapsed: 0.006 sec.
:) CREATE TABLE test_nested (col Nested(name String, version UInt16)) Engine = Memory;
CREATE TABLE test_nested
(
`col` Nested(name String, version UInt16)
)
ENGINE = Memory
Ok.
0 rows in set. Elapsed: 0.005 sec.
:) DESCRIBE TABLE test_nested FORMAT TSV;
DESCRIBE TABLE test_nested
FORMAT TSV
col Nested(name String, version UInt16)
1 rows in set. Elapsed: 0.004 sec.
Inserting data into nested column in ``clickhouse-client``:

.. code-block:: sql
:) INSERT INTO test_nested VALUES ([('a', 100), ('b', 200), ('c', 300)]);
INSERT INTO test_nested VALUES
Ok.
1 rows in set. Elapsed: 0.003 sec.
Inserting data into nested column with ``clickhouse-driver``:

.. code-block:: python
client.execute(
'INSERT INTO test_nested VALUES',
[([('a', 100), ('b', 200), ('c', 300)]),]
)
# or
client.execute(
'INSERT INTO test_nested VALUES',
[{'col': [{'name': 'a', 'version': 100}, {'name': 'b', 'version': 200}, {'name': 'c', 'version': 300}]}]
)
Map(key, value)
------------------

Expand Down
116 changes: 116 additions & 0 deletions tests/columns/test_nested.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from tests.testcase import BaseTestCase
from tests.util import require_server_version
from clickhouse_driver.columns import nestedcolumn


class NestedTestCase(BaseTestCase):
def setUp(self):
ret = super(NestedTestCase, self).setUp()
return ret

def entuple(self, lst):
return tuple(
self.entuple(x) if isinstance(x, list) else x for x in lst
)

@require_server_version(21, 3, 13)
def test_simple(self):
columns = 'n Nested(i Int32, s String)'

# INSERT INTO test_nested VALUES ([(0, 'a'), (1, 'b')]);
data = [([(0, 'a'), (1, 'b')],)]

with self.create_table(columns, flatten_nested=0):
self.client.execute(
'INSERT INTO test (n) VALUES', data
)

query = 'SELECT * FROM test'
inserted = self.emit_cli(query)
self.assertEqual(inserted, "[(0,'a'),(1,'b')]\n")

inserted = self.client.execute(query)
self.assertEqual(inserted, data)

projected_i = self.client.execute('SELECT n.i FROM test')
self.assertEqual(
projected_i,
[([0, 1],)]
)

projected_s = self.client.execute('SELECT n.s FROM test')
self.assertEqual(
projected_s,
[(['a', 'b'],)]
)

@require_server_version(21, 3, 13)
def test_multiple_rows(self):
columns = 'n Nested(i Int32, s String)'

data = [([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)]

with self.create_table(columns, flatten_nested=0):
self.client.execute(
'INSERT INTO test (n) VALUES', data
)

query = 'SELECT * FROM test'
inserted = self.emit_cli(query)
self.assertEqual(
inserted,
"[(0,'a'),(1,'b')]\n[(3,'d'),(4,'e')]\n"
)

inserted = self.client.execute(query)
self.assertEqual(inserted, data)

@require_server_version(21, 3, 13)
def test_dict(self):
columns = 'n Nested(i Int32, s String)'

data = [
{'n': [{'i': 0, 's': 'a'}, {'i': 1, 's': 'b'}]},
{'n': [{'i': 3, 's': 'd'}, {'i': 4, 's': 'e'}]},
]

with self.create_table(columns, flatten_nested=0):
self.client.execute(
'INSERT INTO test (n) VALUES', data
)

query = 'SELECT * FROM test'
inserted = self.emit_cli(query)
self.assertEqual(
inserted,
"[(0,'a'),(1,'b')]\n[(3,'d'),(4,'e')]\n"
)

inserted = self.client.execute(query)
self.assertEqual(
inserted,
[([(0, 'a'), (1, 'b')],), ([(3, 'd'), (4, 'e')],)]
)

def test_get_nested_columns(self):
self.assertEqual(
nestedcolumn.get_nested_columns(
'Nested(a Tuple(Array(Int8)),\n b Nullable(String))',
),
['Tuple(Array(Int8))', 'Nullable(String)']
)

def test_get_columns_with_types(self):
self.assertEqual(
nestedcolumn.get_columns_with_types(
'Nested(a Tuple(Array(Int8)),\n b Nullable(String))',
),
[('a', 'Tuple(Array(Int8))'), ('b', 'Nullable(String)')]
)

def test_get_inner_spec(self):
inner = 'a Tuple(Array(Int8), Array(Int64)), b Nullable(String)'
self.assertEqual(
nestedcolumn.get_inner_spec('Nested({}) dummy '.format(inner)),
inner
)

0 comments on commit d95f14b

Please sign in to comment.