Skip to content

Commit

Permalink
Add input_format_null_as_default option. Fix #312
Browse files Browse the repository at this point in the history
  • Loading branch information
xzkostyan committed Jun 2, 2022
1 parent d10048d commit ae8aa4a
Show file tree
Hide file tree
Showing 13 changed files with 99 additions and 30 deletions.
12 changes: 10 additions & 2 deletions clickhouse_driver/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Client(object):
Defaults to ``1048576``.
* ``strings_as_bytes`` -- turns off string column encoding/decoding.
* ``strings_encoding`` -- specifies string encoding. UTF-8 by default.
* ``use_numpy`` -- Use numpy for columns reading. New in version
* ``use_numpy`` -- Use NumPy for columns reading. New in version
*0.2.0*.
* ``opentelemetry_traceparent`` -- OpenTelemetry traceparent header as
described by W3C Trace Context recommendation.
Expand All @@ -49,6 +49,10 @@ class Client(object):
* ``quota_key`` -- A string to differentiate quotas when the user have
keyed quotas configured on server.
New in version *0.2.3*.
* ``input_format_null_as_default`` -- Initialize null fields with
default values if data type of this field is not
nullable. Does not work for NumPy. Default: False.
New in version *0.2.4*.
"""

available_client_settings = (
Expand All @@ -58,7 +62,8 @@ class Client(object):
'use_numpy',
'opentelemetry_traceparent',
'opentelemetry_tracestate',
'quota_key'
'quota_key',
'input_format_null_as_default'
)

def __init__(self, *args, **kwargs):
Expand All @@ -85,6 +90,9 @@ def __init__(self, *args, **kwargs):
),
'quota_key': self.settings.pop(
'quota_key', ''
),
'input_format_null_as_default': self.settings.pop(
'input_format_null_as_default', False
)
}

Expand Down
6 changes: 4 additions & 2 deletions clickhouse_driver/columns/arraycolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, nested_column, **kwargs):
self.nested_column = nested_column
self._write_depth_0_size = True
super(ArrayColumn, self).__init__(**kwargs)
self.null_value = [nested_column.null_value]

def write_data(self, data, buf):
# Column of Array(T) is stored in "compact" format and passed to server
Expand Down Expand Up @@ -99,6 +100,7 @@ def _write_nulls_data(self, value, buf):
self.nested_column._write_nulls_map(value, buf)

def _write(self, value, buf):
value = self.prepare_items(value)
self._write_sizes(value, buf)
self._write_nulls_data(value, buf)
self._write_data(value, buf)
Expand Down Expand Up @@ -145,6 +147,6 @@ def _read(self, size, buf):
return tuple(data)


def create_array_column(spec, column_by_spec_getter):
def create_array_column(spec, column_by_spec_getter, column_options):
inner = spec[6:-1]
return ArrayColumn(column_by_spec_getter(inner))
return ArrayColumn(column_by_spec_getter(inner), **column_options)
18 changes: 14 additions & 4 deletions clickhouse_driver/columns/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ class Column(object):
def __init__(self, types_check=False, **kwargs):
self.nullable = False
self.types_check_enabled = types_check
self.input_null_as_default = False
if 'context' in kwargs:
settings = kwargs['context'].client_settings
self.input_null_as_default = settings \
.get('input_format_null_as_default', False)

super(Column, self).__init__()

def make_null_struct(self, n_items):
Expand All @@ -39,22 +45,26 @@ def check_item_type(self, value):
def prepare_items(self, items):
nullable = self.nullable
null_value = self.null_value
null_as_default = self.input_null_as_default

check_item = self.check_item
if self.types_check_enabled:
check_item_type = self.check_item_type
else:
check_item_type = False

if (not self.nullable and not check_item_type and
if (not (self.nullable or null_as_default) and not check_item_type and
not check_item and not self.before_write_items):
return items

nulls_map = [False] * len(items) if self.nullable else None
for i, x in enumerate(items):
if x is None and nullable:
nulls_map[i] = True
x = null_value
if x is None:
if nullable:
nulls_map[i] = True
x = null_value
elif null_as_default:
x = null_value

else:
if check_item_type:
Expand Down
4 changes: 2 additions & 2 deletions clickhouse_driver/columns/lowcardinalitycolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from .intcolumn import UInt8Column, UInt16Column, UInt32Column, UInt64Column


def create_low_cardinality_column(spec, column_by_spec_getter):
def create_low_cardinality_column(spec, column_by_spec_getter, column_options):
inner = spec[15:-1]
nested = column_by_spec_getter(inner)
return LowCardinalityColumn(nested)
return LowCardinalityColumn(nested, **column_options)


class LowCardinalityColumn(Column):
Expand Down
6 changes: 4 additions & 2 deletions clickhouse_driver/columns/mapcolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
class MapColumn(Column):
py_types = (dict, )

null_value = {}

def __init__(self, key_column, value_column, **kwargs):
self.offset_column = UInt64Column()
self.key_column = key_column
Expand Down Expand Up @@ -50,9 +52,9 @@ def write_items(self, items, buf):
self.value_column.write_data(values, buf)


def create_map_column(spec, column_by_spec_getter):
def create_map_column(spec, column_by_spec_getter, column_options):
key, value = spec[4:-1].split(',')
key_column = column_by_spec_getter(key.strip())
value_column = column_by_spec_getter(value.strip())

return MapColumn(key_column, value_column)
return MapColumn(key_column, value_column, **column_options)
4 changes: 2 additions & 2 deletions clickhouse_driver/columns/nestedcolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
get_inner_columns_with_types


def create_nested_column(spec, column_by_spec_getter):
def create_nested_column(spec, column_by_spec_getter, column_options):
return create_array_column(
'Array(Tuple({}))'.format(','.join(get_nested_columns(spec))),
column_by_spec_getter=column_by_spec_getter
column_by_spec_getter, column_options
)


Expand Down
5 changes: 3 additions & 2 deletions clickhouse_driver/columns/numpy/lowcardinalitycolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def _read_data(self, n_items, buf, nulls_map=None):
return pd.Categorical.from_codes(keys, index)


def create_numpy_low_cardinality_column(spec, column_by_spec_getter):
def create_numpy_low_cardinality_column(spec, column_by_spec_getter,
column_options):
inner = spec[15:-1]
nested = column_by_spec_getter(inner)
return NumpyLowCardinalityColumn(nested)
return NumpyLowCardinalityColumn(nested, **column_options)
9 changes: 6 additions & 3 deletions clickhouse_driver/columns/numpy/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,17 @@ def create_column_with_options(x):
return create_numpy_datetime_column(spec, column_options)

elif spec.startswith('Tuple'):
return create_tuple_column(spec, create_column_with_options)
return create_tuple_column(
spec, create_column_with_options, column_options
)

elif spec.startswith('Nullable'):
return create_nullable_column(spec, create_column_with_options)

elif spec.startswith('LowCardinality'):
return create_numpy_low_cardinality_column(spec,
create_column_with_options)
return create_numpy_low_cardinality_column(
spec, create_column_with_options, column_options
)
else:
for alias, primitive in aliases:
if spec.startswith(alias):
Expand Down
5 changes: 3 additions & 2 deletions clickhouse_driver/columns/numpy/tuplecolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ def read_items(self, n_items, buf):
return self.read_data(n_items, buf)


def create_tuple_column(spec, column_by_spec_getter):
def create_tuple_column(spec, column_by_spec_getter, column_options):
inner_spec = get_inner_spec('Tuple', spec)
columns = get_inner_columns(inner_spec)

return TupleColumn([column_by_spec_getter(x) for x in columns])
return TupleColumn([column_by_spec_getter(x) for x in columns],
**column_options)
23 changes: 17 additions & 6 deletions clickhouse_driver/columns/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,26 +90,37 @@ def create_column_with_options(x):
return create_decimal_column(spec, column_options)

elif spec.startswith('Array'):
return create_array_column(spec, create_column_with_options)
return create_array_column(
spec, create_column_with_options, column_options
)

elif spec.startswith('Tuple'):
return create_tuple_column(spec, create_column_with_options)
return create_tuple_column(
spec, create_column_with_options, column_options
)

elif spec.startswith('Nested'):
return create_nested_column(spec, create_column_with_options)
return create_nested_column(
spec, create_column_with_options, column_options
)

elif spec.startswith('Nullable'):
return create_nullable_column(spec, create_column_with_options)

elif spec.startswith('LowCardinality'):
return create_low_cardinality_column(spec, create_column_with_options)
return create_low_cardinality_column(
spec, create_column_with_options, column_options
)

elif spec.startswith('SimpleAggregateFunction'):
return create_simple_aggregate_function_column(
spec, create_column_with_options)
spec, create_column_with_options
)

elif spec.startswith('Map'):
return create_map_column(spec, create_column_with_options)
return create_map_column(
spec, create_column_with_options, column_options
)

else:
for alias, primitive in aliases:
Expand Down
7 changes: 5 additions & 2 deletions clickhouse_driver/columns/tuplecolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ class TupleColumn(Column):
def __init__(self, nested_columns, **kwargs):
self.nested_columns = nested_columns
super(TupleColumn, self).__init__(**kwargs)
self.null_value = tuple(x.null_value for x in nested_columns)

def write_data(self, items, buf):
items = self.prepare_items(items)
items = list(zip(*items))

for i, x in enumerate(self.nested_columns):
Expand All @@ -27,8 +29,9 @@ def read_items(self, n_items, buf):
return self.read_data(n_items, buf)


def create_tuple_column(spec, column_by_spec_getter):
def create_tuple_column(spec, column_by_spec_getter, column_options):
inner_spec = get_inner_spec('Tuple', spec)
columns = get_inner_columns(inner_spec)

return TupleColumn([column_by_spec_getter(x) for x in columns])
return TupleColumn([column_by_spec_getter(x) for x in columns],
**column_options)
28 changes: 28 additions & 0 deletions tests/test_settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from parameterized import parameterized

from clickhouse_driver.errors import ServerException, ErrorCodes
from tests.testcase import BaseTestCase
from tests.util import require_server_version
Expand Down Expand Up @@ -116,6 +118,32 @@ def test_query_settings_override_client_settings(self):
self.assertEqual(rv, [('max_query_size', '242', 1)])


class InputFormatNullTestCase(BaseTestCase):
# Min stable map version
required_server_version = (21, 8, 1)

@parameterized.expand([
('a Int8, b String', [(None, None)], [(0, '')], '0\t\n'),
('a LowCardinality(String)', [(None, )], [('', )], '\n'),
('a Tuple(Int32, Int32)', [(None,)], [((0, 0), )], '(0,0)\n'),
('a Array(Array(Int32))', [(None,)], [([[0]],)], '[[0]]\n'),
('a Map(String, UInt64)', [(None,)], [({},)], '{}\n'),
('a Nested(i Int32)', [(None, )], [([0], )], '[0]\n')
])
def test_input_format_null_as_default(self, spec, data, res, cli_res):
client_settings = {'input_format_null_as_default': True}

with self.created_client(settings=client_settings) as client:
with self.create_table(spec):
client.execute('INSERT INTO test VALUES', data)

query = 'SELECT * FROM test'
inserted = self.emit_cli(query)
self.assertEqual(inserted, cli_res)
inserted = client.execute(query)
self.assertEqual(inserted, res)


class LimitsTestCase(BaseTestCase):
def test_max_result_rows_apply(self):
query = 'SELECT number FROM system.numbers LIMIT 10'
Expand Down
2 changes: 1 addition & 1 deletion tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

def skip_by_server_version(testcase, version_required):
testcase.skipTest(
'Mininum revision required: {}'.format(
'Minimum revision required: {}'.format(
'.'.join(str(x) for x in version_required)
)
)
Expand Down

0 comments on commit ae8aa4a

Please sign in to comment.