Skip to content

Commit

Permalink
Remove boxing issue on dataframes INSERT
Browse files Browse the repository at this point in the history
Transpose parameter causes boxing when is set to true.
This leads to memory_overhead and speed penalty.
  • Loading branch information
xzkostyan committed Aug 1, 2021
1 parent a486eb3 commit f1b56bb
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 24 deletions.
12 changes: 3 additions & 9 deletions clickhouse_driver/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,20 +395,15 @@ def query_dataframe(
)

def insert_dataframe(
self, query, dataframe, transpose=True, external_tables=None,
query_id=None, settings=None):
self, query, dataframe, external_tables=None, query_id=None,
settings=None):
"""
*New in version 0.2.0.*
Inserts pandas DataFrame with specified query.
:param query: query that will be send to server.
:param dataframe: pandas DataFrame.
:param transpose: whether or not transpose DataFrame before sending.
This is necessary action as DataFrame can be sent in
columnar form. If DataFrame is already in columnar
form set this parameter to ``False``.
Defaults to ``True``.
:param external_tables: external tables to send.
Defaults to ``None`` (no external tables).
:param query_id: the query identifier. If no query id specified
Expand All @@ -423,8 +418,7 @@ def insert_dataframe(
except ImportError:
raise RuntimeError('Extras for NumPy must be installed')

frame = dataframe.transpose() if transpose else dataframe
columns = columns = [frame[col].values for col in frame]
columns = [dataframe[col].values for col in dataframe]

return self.execute(
query, columns, columnar=True, external_tables=external_tables,
Expand Down
15 changes: 0 additions & 15 deletions tests/numpy/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,21 +120,6 @@ def test_insert_simple(self):
df2 = self.client.query_dataframe('SELECT * FROM test ORDER BY a')
self.assertTrue(df.equals(df2))

def test_insert_transposed(self):
n = 10
df = pd.DataFrame({
'a': range(n),
'b': [float(x) for x in range(n)]
})

with self.create_table('a Int64, b Float64'):
rv = self.client.insert_dataframe(
'INSERT INTO test VALUES', df.transpose(), transpose=False
)
self.assertEqual(rv, n)
df2 = self.client.query_dataframe('SELECT * FROM test ORDER BY a')
self.assertTrue(df.equals(df2))


class NoNumPyTestCase(BaseTestCase):
def setUp(self):
Expand Down

0 comments on commit f1b56bb

Please sign in to comment.