# Native DuckDB vs JupySQL

In [1]:
%pip install pandas polars --quiet

Note: you may need to restart the kernel to use updated packages.


## Pandas

### Native DuckDB

In [1]:
import duckdb
from pandas import DataFrame
import numpy as np

num_rows = 1_000_000

df = DataFrame(np.random.randn(num_rows, 20))

In [2]:
with duckdb.connect() as con:
    %timeit con.sql("select * from df").df()

86.5 ms ± 928 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### JupySQL

In [3]:
%load_ext sql
conn = duckdb.connect()
%sql conn --alias duckdb
%config SqlMagic.displaycon = False
%config SqlMagic.autopandas = True
%config SqlMagic.displaylimit = 1

In [4]:
# %timeit %sql select * from df

In [5]:
from sql.run import run
from sql.connection import Connection
from unittest.mock import Mock

config = Mock()
config.autocommit = True
config.autolimit = 0

In [14]:
from sql.magic import SqlMagic
from IPython import start_ipython, get_ipython
import duckdb
from pandas import DataFrame
import numpy as np

num_rows = 1_000_000

df = DataFrame(np.random.randn(num_rows, 20))

magic = SqlMagic(get_ipython())

conn = duckdb.connect()
# magic.execute(line="conn --alias duckdb")
magic.execute(line="duckdb:// --alias duckdb")
magic.autopandas = True
magic.displaycon = False


def run_magic():
    return magic.execute("SELECT * FROM df")

In [15]:
%%timeit
run_magic()

3.61 s ± 18.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit
run(Connection.current, "SELECT * FROM df", config)

82.5 ms ± 1.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
%%timeit
df_ = %sql select * from df
df_.DataFrame()

3.45 s ± 21.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%config SqlMagic.autopandas = False
df_ = %sql select * from df

In [8]:
%%timeit
df_.DataFrame()

86.5 ms ± 1.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%%timeit
df_.sqlaproxy.execute(df_.statement)
df_.sqlaproxy.df()

85.7 ms ± 659 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
%%timeit
df_.sqlaproxy.execute(df_.statement).df()

87.1 ms ± 651 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Polars

### Native DuckDB

In [11]:
# import polars as pl

# df_pl = pl.DataFrame(np.random.randn(num_rows, 20))

In [12]:
# with duckdb.connect() as con:
#     %timeit con.sql("select * from df_pl").pl()

### JupySQL

In [13]:
# %config SqlMagic.autopolars = True

In [14]:
# %%timeit
# df_ = %sql select * from df_pl