# Standalone function clickhouse_multiquery

In [None]:
from clickhttp import clickhouse_multiquery


# This function is specific to Apache Airflow and doesn't return DataFrame.
multiquery: str = """{Clickhouse_multiquery_string}"""
connection_id: str = "{airflow_connection_id}"
clickhouse_multiquery(multiquery=multiquery, connection=connection_id,)

# Class ClickHttpSession

## Getting Started

### Enable logging

In [1]:
import logging


# Logging events to console
logging.basicConfig(level=logging.INFO)

### Creating UserConn object to provide connection parameters

In [2]:
from clickhttp import UserConn


# Create connection object manually for local tests
conn = UserConn('user',       # Login
                'password',   # Passowrd
                'localhost',  # Address
                8123,         # Port
                'default',)   # Schema

In [None]:
from clickhttp import get_conn, UserConn


# Create connection from Airflow Connection ID
conn: UserConn = get_conn('connection_id')

### ClickHttpSession class initializing

In [4]:
from clickhttp import ClickHttpSession, FrameType


sess = ClickHttpSession(connection=conn,              # UserConn object. The only mandatory parameter for class initialization
                        frame_type=FrameType.pandas,  # Preferred DataFrame type, default is pandas
                        chunk_size=52_428_800,        # Maximum size of the transmitted packet in bytes for the insert_frame method. Default is 50 MB
                        is_compressed=True,           # Server operation in compression mode. Default is True
                        proxy=None,                   # Proxy server address. Default is absent
                        timeout=None,)                # Time to wait for a response from the server. Default is disabled

print(sess)  # Check the session status

INFO:root:
---------------------------------------------------------------------------------------------
| Clickhouse Multi-Query session started. Session ID: 4a9adf30-3a26-48fd-8b36-8d955c0f3c95. |
---------------------------------------------------------------------------------------------


ClickHttpSession object.
Status:      Open
Session ID:  4a9adf30-3a26-48fd-8b36-8d955c0f3c95
Server Mode: Compressed


### If necessary, you can enable or disable packet compression after the class has been initialized

In [8]:
sess.change_mode  # This static method changes the state of the compression/no compression mode

INFO:root:
--------------------------------------------------------------
| Clickhouse Multi-Query session mode changed to Compressed. |
--------------------------------------------------------------


### The static method output_format will print the selected DataFrame type for reading data from the server to the console

In [6]:
sess.output_format  # current type is pandas

'pandas'

In [5]:

sess.frame_type = FrameType.polars  # change type to polars.DataFrame
sess.output_format  # current type is polars

'polars'

### Opening and Closing a Session

#### When using the context manager with, the session will be opened and closed automatically upon completion.

#### Closing the session manually is done through the close() method, while opening a new session is accomplished using the reopen() method

In [8]:
sess.close()   # Closing session

sess.reopen()  # Open new session

INFO:root:
------------------------------------------
| Clickhouse Multi-Query session closed. |
------------------------------------------
INFO:root:
-------------------------------------------------------------------------------------------------
| New Clickhouse Multi-Query session started. Session ID: 774a247c-493d-4a42-bdba-84ade0df27a9. |
-------------------------------------------------------------------------------------------------


### Setting a Proxy Server After Class Initialization

In [9]:
# To establish a connection through a proxy, you need to pass proxy server address
# as a string to set_proxy method in format <proxy-server address>:<port>
# proxying will apply to both HTTP and HTTPS protocols
sess.set_proxy('http://localhost:431')

# To remove proxying, you need to call set_proxy method without passing any arguments
sess.set_proxy()

INFO:root:
-----------------------------------------------------------------------------
| ClickHttpSession change proxy settings with proxy 'http://localhost:431'. |
-----------------------------------------------------------------------------
INFO:root:
------------------------------------------
| ClickHttpSession proxy settings clear. |
------------------------------------------


### Sending a Command to Server

In [None]:
# For send command to server that does not expect a DataFrame to be returned, execute method has been added
command: str = "TRUNCATE TABLE {table_name}" # Provided as an example
sess.execute(command) # This method does not return anything

### Reading DataFrame

In [6]:
# As an example, we will retrieve simple data from Clickhouse database that does not require querying tables in the form of a polars.DataFrame
query: str = """select
    today()                     as test_date
  , now()                       as test_datetime
  , toDateTime(now()
             , 'Europe/Moscow') as test_timestamp
  , toDateTime(now()
             , 'UTC')           as test_tsutc
  , 1                           as test_int
  , 1.0                         as test_float
  , false                       as test_bool
  , array(1
        , 2
        , 3)                    as test_arrayint
  , array('one'
        , 'two'
        , 'three')              as test_arraystr
"""

df = sess.read_frame(query)

print(df.data)

shape: (1, 9)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ test_date ┆ test_date ┆ test_time ┆ test_tsut ┆ … ┆ test_floa ┆ test_bool ┆ test_arra ┆ test_arr │
│ ---       ┆ time      ┆ stamp     ┆ c         ┆   ┆ t         ┆ ---       ┆ yint      ┆ aystr    │
│ date      ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ bool      ┆ ---       ┆ ---      │
│           ┆ datetime[ ┆ datetime[ ┆ datetime[ ┆   ┆ i64       ┆           ┆ list[i64] ┆ list[str │
│           ┆ μs]       ┆ μs]       ┆ μs]       ┆   ┆           ┆           ┆           ┆ ]        │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2024-08-2 ┆ 2024-08-2 ┆ 2024-08-2 ┆ 2024-08-2 ┆ … ┆ 1         ┆ false     ┆ [1, 2, 3] ┆ ["one",  │
│ 6         ┆ 6         ┆ 6         ┆ 6         ┆   ┆           ┆           ┆           ┆ "two",   │
│           ┆ 06:44:58  ┆ 06:44:58  ┆ 06:44:58  ┆   ┆           ┆           ┆

### Frame Object structure

In [7]:
# Previously obtained object df, in addition to DataFrame itself, contains additional useful information
print(df.columns)     # List of columns
print(df.types)       # List of original data types
print(df.time_read)   # Time taken by server to send data
print(df.bytes_read)  # Number of bytes transmitted by server (data is sent by server, and it's difficult to determine how exactly it calculates this)
df.data               # DataFrame itself

['test_date', 'test_datetime', 'test_timestamp', 'test_tsutc', 'test_int', 'test_float', 'test_bool', 'test_arrayint', 'test_arraystr']
['Date', 'DateTime', "DateTime('Europe/Moscow')", "DateTime('UTC')", 'UInt8', 'Float64', 'Bool', 'Array(UInt8)', 'Array(String)']
0.001652217
1


test_date,test_datetime,test_timestamp,test_tsutc,test_int,test_float,test_bool,test_arrayint,test_arraystr
date,datetime[μs],datetime[μs],datetime[μs],i64,i64,bool,list[i64],list[str]
2024-08-26,2024-08-26 06:44:58,2024-08-26 06:44:58,2024-08-26 06:44:58,1,1,False,"[1, 2, 3]","[""one"", ""two"", ""three""]"


### A temp_query method for Automatically Creating a Temporary Table from query

In [8]:
# As an example, we will use the previous query

temp_table: str = sess.temp_query(query)  # auto creation of a temporary table with data from the query and returning its name

temp_data = sess.read_frame(f"select * from {temp_table}").data  # accessing to previously created temporary table to retrieve DataFrame

print(temp_data)

INFO:root:
---------------------------------------
| Get names and data types from query |
---------------------------------------
INFO:root:
----------------------------------------------------------
| Generate DDL for temporary table temp_141ac3cf966d8116 |
----------------------------------------------------------
INFO:root:
--------------------------------------------------
| Temporary table temp_141ac3cf966d8116 created. |
--------------------------------------------------


shape: (1, 9)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ test_date ┆ test_date ┆ test_time ┆ test_tsut ┆ … ┆ test_floa ┆ test_bool ┆ test_arra ┆ test_arr │
│ ---       ┆ time      ┆ stamp     ┆ c         ┆   ┆ t         ┆ ---       ┆ yint      ┆ aystr    │
│ date      ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ bool      ┆ ---       ┆ ---      │
│           ┆ datetime[ ┆ datetime[ ┆ datetime[ ┆   ┆ i64       ┆           ┆ list[i64] ┆ list[str │
│           ┆ μs]       ┆ μs]       ┆ μs]       ┆   ┆           ┆           ┆           ┆ ]        │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2024-08-2 ┆ 2024-08-2 ┆ 2024-08-2 ┆ 2024-08-2 ┆ … ┆ 1         ┆ false     ┆ [1, 2, 3] ┆ ["one",  │
│ 6         ┆ 6         ┆ 6         ┆ 6         ┆   ┆           ┆           ┆           ┆ "two",   │
│           ┆ 06:45:15  ┆ 06:45:15  ┆ 06:45:15  ┆   ┆           ┆           ┆

### A send_multiquery method for Executing Multiquery to Database

#### This method is the primary reason for the creation of this class. A string containing multiple queries is passed as a parameter, and after executing all steps, the method will return the result of the last query as a DataFrame

In [9]:
# As an example, we will create a multiquery that includes creation of a temporary table based on query and return resulting DataFrame
multiquery: str = f"""
select 1; -- this to verify that query will not return a result from this action
CREATE TEMPORARY TABLE IF NOT EXISTS test_temp_table
(
test_date Date,
test_datetime DateTime,
test_timestamp DateTime('Europe/Moscow'),
test_tsutc DateTime('UTC'),
test_int UInt8,
test_float Float64,
test_bool Bool,
test_arrayint Array(UInt8),
test_arraystr Array(String)
)
ENGINE = MergeTree
ORDER BY test_date
AS
{query};
select * from test_temp_table;
"""
# print(multiquery.rstrip().rstrip(";").split(";"))
multiframe = sess.send_multiquery(multiquery).data

multiframe

INFO:root:
-------------------
| Part 1 started. |
-------------------
INFO:root:
-------------------
| Part 1 success. |
-------------------
INFO:root:
-------------------
| Part 2 started. |
-------------------
INFO:root:
-------------------
| Part 2 success. |
-------------------
INFO:root:
-------------------
| Part 3 started. |
-------------------
INFO:root:
-----------------------------
| Part 3 success. All done. |
-----------------------------


test_date,test_datetime,test_timestamp,test_tsutc,test_int,test_float,test_bool,test_arrayint,test_arraystr
date,datetime[μs],datetime[μs],datetime[μs],i64,i64,bool,list[i64],list[str]
2024-08-26,2024-08-26 06:45:19,2024-08-26 06:45:19,2024-08-26 06:45:19,1,1,False,"[1, 2, 3]","[""one"", ""two"", ""three""]"


### A insert_table method for Writing Data from DataFrame to Table

In [10]:
# This method works with any supported type of DataFrame.
# Method takes two mandatory arguments: table - the name of the target table, and data_frame - DataFrame containing data in one of supported formats.
# There is also one optional argument use_columns - a boolean, default is True.
# When set to True, order and names of columns from DataFrame will be passed to table. Note that frames in Python formats (nested list) and numpy do not contain column names.
# When writing data from frame, rows will be formed, and size of each packet for sending does not exceed sess.chunk_size, as specified in class attribute.
# When compression is enabled, each packet will be compressed in gzip format after formation and sent to server in a more compact size.

# As an example, we will create a table in the database and write data from multiframe into it
table: str = "default.test_table"
ddl: str = f"""CREATE TABLE IF NOT EXISTS {table}
(
test_date Date,
test_datetime DateTime,
test_timestamp DateTime('Europe/Moscow'),
test_tsutc DateTime('UTC'),
test_int UInt8,
test_float Float64,
test_bool Bool,
test_arrayint Array(UInt8),
test_arraystr Array(String)
)
ENGINE = MergeTree
ORDER BY test_date"""

sess.read_frame(ddl)  # create table

sess.insert_table(table=table, data_frame=multiframe,)  # insert data to table

print(sess.read_frame(f"select * from {table}").data)  # read table data to DataFrame

sess.execute(f"drop table {table}")  # drop table from server

INFO:root:
--------------------------------------------
| Sending chunk with 1—1 rows from 1 rows. |
--------------------------------------------
INFO:root:
-------------------------
| Insert chunk success. |
-------------------------
INFO:root:
-----------------------------
| Insert operation success. |
-----------------------------
INFO:root:
-----------------
| Command send. |
-----------------


shape: (1, 9)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ test_date ┆ test_date ┆ test_time ┆ test_tsut ┆ … ┆ test_floa ┆ test_bool ┆ test_arra ┆ test_arr │
│ ---       ┆ time      ┆ stamp     ┆ c         ┆   ┆ t         ┆ ---       ┆ yint      ┆ aystr    │
│ date      ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ bool      ┆ ---       ┆ ---      │
│           ┆ datetime[ ┆ datetime[ ┆ datetime[ ┆   ┆ i64       ┆           ┆ list[i64] ┆ list[str │
│           ┆ μs]       ┆ μs]       ┆ μs]       ┆   ┆           ┆           ┆           ┆ ]        │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2024-08-2 ┆ 2024-08-2 ┆ 2024-08-2 ┆ 2024-08-2 ┆ … ┆ 1         ┆ false     ┆ [1, 2, 3] ┆ ["one",  │
│ 6         ┆ 5         ┆ 5         ┆ 5         ┆   ┆           ┆           ┆           ┆ "two",   │
│           ┆ 20:45:19  ┆ 17:45:19  ┆ 20:45:19  ┆   ┆           ┆           ┆