Skip to content

Commit

Permalink
Merge pull request #365 from joelgibson/datetime64-speedup
Browse files Browse the repository at this point in the history
Speed up reading Datetime64 in numpy mode.
  • Loading branch information
xzkostyan committed Mar 26, 2023
2 parents 2942ae3 + 5849ce5 commit 94f2585
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
14 changes: 7 additions & 7 deletions clickhouse_driver/columns/numpy/datetimecolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def read_items(self, n_items, buf):


class NumpyDateTime64Column(NumpyDateTimeColumnBase):
dtype = np.dtype(np.uint64)
dtype = np.dtype(np.int64)
datetime_dtype = 'datetime64[ns]'

max_scale = 9
Expand All @@ -75,15 +75,15 @@ def __init__(self, scale=0, **kwargs):
super(NumpyDateTime64Column, self).__init__(**kwargs)

def read_items(self, n_items, buf):
scale = 10 ** self.scale
frac_scale = 10 ** (self.max_scale - self.scale)

# Clickhouse: t seconds is represented as t * 10^scale.
# datetime64[ns]: t seconds is represented as t * 10^9.
# Since 0 <= scale <= 9, multiply by the integer 10^(9 - scale).
items = super(NumpyDateTime64Column, self).read_items(n_items, buf)

seconds = (items // scale).astype('datetime64[s]')
microseconds = ((items % scale) * frac_scale).astype('timedelta64[ns]')
tmp = np.copy(items)
tmp *= 10 ** (9 - self.scale)
dt = tmp.view(dtype='datetime64[ns]')

dt = seconds + microseconds
return self.apply_timezones_after_read(dt)

def write_items(self, items, buf):
Expand Down
13 changes: 13 additions & 0 deletions tests/numpy/columns/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,19 @@ def test_insert_integer_bounds(self):
inserted = self.emit_cli(query)
self.assertEqual(inserted, '0\n1\n1500000000\n4294967295\n')

@require_server_version(20, 1, 2)
def test_negative_timestamps(self):
with self.create_table("a DateTime64(3, 'UTC')"):
times = np.array(['1900-01-01 00:00'], dtype='datetime64[ns]')
self.client.execute(
'INSERT INTO test(a) VALUES',
[times],
columnar=True,
)

inserted = self.client.execute('SELECT * FROM test', columnar=True)
self.assertArraysEqual(inserted[0], times)


class DateTimeTimezonesTestCase(BaseDateTimeTestCase):
dt_type = 'DateTime'
Expand Down

0 comments on commit 94f2585

Please sign in to comment.