<a href="https://colab.research.google.com/github/singhmanas1/showcase/blob/polars_demo/polars_gpu_engine_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!nvidia-smi | head

Tue Sep  3 22:39:54 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P0              28W /  70W |   7641MiB / 15360MiB |      0%      Default |


In [10]:
# Installing the wheel file-

In [9]:
!pip install polars==1.5
!pip install "cudf_polars_cu12-24.8.0a433-py3-none-any.whl" --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple

Looking in indexes: https://pypi.org/simple, https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
Processing ./cudf_polars_cu12-24.8.0a433-py3-none-any.whl
cudf-polars-cu12 is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.


If you aren't running in Google Colab, please install by following the instructions in the README.

In [1]:
import polars as pl
from polars.testing import assert_frame_equal

In [2]:
pl.__version__ # Make sure the version is `1.5.x`; If not restart the session.

'1.5.0'

In [3]:
transactions = pl.scan_parquet("100M_transactions.parquet")

In [8]:
%%time
print(transactions.head(100).collect(engine="gpu"))

shape: (100, 10)
┌────────────┬────────────┬──────────┬─────────────────┬───┬───────┬─────┬───────────────┬────────┐
│ CUST_ID    ┆ START_DATE ┆ END_DATE ┆ TRANS_ID        ┆ … ┆ MONTH ┆ DAY ┆ EXP_TYPE      ┆ AMOUNT │
│ ---        ┆ ---        ┆ ---      ┆ ---             ┆   ┆ ---   ┆ --- ┆ ---           ┆ ---    │
│ str        ┆ date       ┆ date     ┆ str             ┆   ┆ i64   ┆ i64 ┆ str           ┆ f64    │
╞════════════╪════════════╪══════════╪═════════════════╪═══╪═══════╪═════╪═══════════════╪════════╡
│ CI6XLYUMQK ┆ 2015-05-01 ┆ null     ┆ T8I9ZB5A6X90UG8 ┆ … ┆ 9     ┆ 11  ┆ Motor/Travel  ┆ 20.27  │
│ CI6XLYUMQK ┆ 2015-05-01 ┆ null     ┆ TZ4JSLS7SC7FO9H ┆ … ┆ 2     ┆ 8   ┆ Motor/Travel  ┆ 12.85  │
│ CI6XLYUMQK ┆ 2015-05-01 ┆ null     ┆ TTUKRDDJ6B6F42H ┆ … ┆ 8     ┆ 1   ┆ Housing       ┆ 383.8  │
│ CI6XLYUMQK ┆ 2015-05-01 ┆ null     ┆ TDUHFRUKGPPI6HD ┆ … ┆ 3     ┆ 16  ┆ Entertainment ┆ 5.72   │
│ CI6XLYUMQK ┆ 2015-05-01 ┆ null     ┆ T0JBZHBMSVRFMMD ┆ … ┆ 5     ┆ 15  ┆ Entertai

In [5]:
%%time

result = (
    transactions
    .group_by("CUST_ID")
    .agg(pl.col("AMOUNT").sum())
    .sort(by="AMOUNT", descending=True)
    .head()
    .collect(engine="gpu")
)

print(result)

shape: (5, 2)
┌────────────┬──────────┐
│ CUST_ID    ┆ AMOUNT   │
│ ---        ┆ ---      │
│ str        ┆ f64      │
╞════════════╪══════════╡
│ CP2KXQSX9I ┆ 2.3108e6 │
│ CGOKEO2EH4 ┆ 2.2721e6 │
│ CXYJF3GWQU ┆ 2.2389e6 │
│ C6JC5K02HA ┆ 2.1787e6 │
│ CVH8KQGTUE ┆ 2.1193e6 │
└────────────┴──────────┘
CPU times: user 574 ms, sys: 93.3 ms, total: 667 ms
Wall time: 683 ms


In [6]:
%%time

result = (
    transactions
    .group_by("CUST_ID")
    .agg(pl.col("AMOUNT").max().alias("max_amount"))
    .sort(by="max_amount", descending=True)
    .head(10)
    .collect(engine="gpu")
)

# Pretty print the dataframe
print(result)

shape: (10, 2)
┌────────────┬────────────┐
│ CUST_ID    ┆ max_amount │
│ ---        ┆ ---        │
│ str        ┆ f64        │
╞════════════╪════════════╡
│ CHF93DNS7X ┆ 6334.35    │
│ CZWNES9XE7 ┆ 6333.97    │
│ C90Z7WVK7W ┆ 6333.4     │
│ CDX7EY0YZZ ┆ 6321.0     │
│ CTLQHZ43M3 ┆ 6317.29    │
│ CNDMIE41TL ┆ 6314.18    │
│ C01218TOCY ┆ 6312.37    │
│ CXDPMWDPIZ ┆ 6303.4     │
│ C9USG5C24Y ┆ 6300.8     │
│ CKD303LHXE ┆ 6300.67    │
└────────────┴────────────┘
CPU times: user 385 ms, sys: 72.9 ms, total: 458 ms
Wall time: 461 ms


In [7]:
%%time

result = (
    transactions
    .group_by(["EXP_TYPE", "YEAR", "MONTH"])
    .agg(pl.mean("AMOUNT"))
    .sort(["EXP_TYPE", "YEAR", "MONTH"])
    .head(10)
    .collect(engine="gpu")
)

# Pretty print the dataframe
print(result)

shape: (10, 4)
┌─────────────────────┬──────┬───────┬────────────┐
│ EXP_TYPE            ┆ YEAR ┆ MONTH ┆ AMOUNT     │
│ ---                 ┆ ---  ┆ ---   ┆ ---        │
│ str                 ┆ i64  ┆ i64   ┆ f64        │
╞═════════════════════╪══════╪═══════╪════════════╡
│ Bills and Utilities ┆ 2010 ┆ 1     ┆ 204.85838  │
│ Bills and Utilities ┆ 2010 ┆ 2     ┆ 206.592982 │
│ Bills and Utilities ┆ 2010 ┆ 3     ┆ 212.972266 │
│ Bills and Utilities ┆ 2010 ┆ 4     ┆ 211.853896 │
│ Bills and Utilities ┆ 2010 ┆ 5     ┆ 208.788923 │
│ Bills and Utilities ┆ 2010 ┆ 6     ┆ 210.744436 │
│ Bills and Utilities ┆ 2010 ┆ 7     ┆ 209.58854  │
│ Bills and Utilities ┆ 2010 ┆ 8     ┆ 209.404764 │
│ Bills and Utilities ┆ 2010 ┆ 9     ┆ 210.791806 │
│ Bills and Utilities ┆ 2010 ┆ 10    ┆ 213.101376 │
└─────────────────────┴──────┴───────┴────────────┘
CPU times: user 505 ms, sys: 78 ms, total: 583 ms
Wall time: 565 ms
