In [2]:
import os

from snowflake.snowpark import Session
from snowflake.snowpark.functions import udf, sproc, servicesproc
from snowflake.snowpark import functions as F
from snowflake.snowpark.types import *

import pandas as pd

In [3]:
connection_parameters = {
    "account": "xe85544.east-us-2.azure",
    "user": "kx",
    "password": os.environ['SNOWFLAKE_TEMP_PASSWORD'],
    "role": "SYSADMIN",  # optional
    "warehouse": "KX",  # medium snowpark-optimized
    "database": "KX",
    "schema": "BRUNO",
}

session = Session.builder.configs(connection_parameters).create()

In [3]:
%%time
trades_sdf = session.table('TRADES')

select_columns = ['"TTime"', '"Symbol"', '"Trade Volume"', '"Trade Price"']
trades = trades_sdf.select(select_columns).where('"Symbol" = \'AAPL\'').order_by('"TTime"').toPandas()

trades

CPU times: user 1.28 s, sys: 114 ms, total: 1.4 s
Wall time: 4.33 s


Unnamed: 0,TTime,Symbol,Trade Volume,Trade Price
0,2022-10-03 04:00:00.012552067,AAPL,2,137.550
1,2022-10-03 04:00:00.012552343,AAPL,64,137.500
2,2022-10-03 04:00:00.012552436,AAPL,36,137.900
3,2022-10-03 04:00:00.012643634,AAPL,3,137.540
4,2022-10-03 04:00:00.012692606,AAPL,5,137.540
...,...,...,...,...
838789,2022-10-03 19:59:50.902418168,AAPL,6,142.900
838790,2022-10-03 19:59:51.484659661,AAPL,1,142.935
838791,2022-10-03 19:59:54.649888288,AAPL,1,142.935
838792,2022-10-03 19:59:55.559408346,AAPL,1,142.935


In [4]:
%%time
quotes_sdf = session.table('QUOTES')

select_columns = ['"TTime"', '"Symbol"', '"Bid_Price"', '"Bid_Size"', '"Offer_Price"', '"Offer_Size"']
quotes = quotes_sdf.select(select_columns).where('"Symbol" = \'AAPL\'').order_by('"TTime"').toPandas()

quotes

CPU times: user 4.15 s, sys: 885 ms, total: 5.04 s
Wall time: 13.9 s


Unnamed: 0,TTime,Symbol,Bid_Price,Bid_Size,Offer_Price,Offer_Size
0,2022-10-03 03:59:00.050935611,AAPL,0.00,0.0,0.00,0.0
1,2022-10-03 03:59:00.078477448,AAPL,0.00,0.0,0.00,0.0
2,2022-10-03 04:00:00.009934601,AAPL,0.00,0.0,138.21,1.0
3,2022-10-03 04:00:00.009981257,AAPL,110.77,1.0,138.21,1.0
4,2022-10-03 04:00:00.009998906,AAPL,130.50,1.0,138.21,1.0
...,...,...,...,...,...,...
12045309,2022-10-03 19:59:50.902415031,AAPL,142.88,1.0,142.99,42.0
12045310,2022-10-03 19:59:58.256064537,AAPL,142.88,4.0,142.99,42.0
12045311,2022-10-03 19:59:59.419239696,AAPL,142.88,3.0,142.99,42.0
12045312,2022-10-03 19:59:59.419247419,AAPL,142.88,3.0,142.99,42.0


In [5]:
# ASOF Join -- Pandas

In [6]:
%%time

asof_pandas_df = pd.merge_asof(trades, quotes, 
                      on='TTime', 
                      by='Symbol', 
                      direction='backward')
asof_pandas_df

CPU times: user 1.12 s, sys: 277 ms, total: 1.4 s
Wall time: 1.65 s


Unnamed: 0,TTime,Symbol,Trade Volume,Trade Price,Bid_Price,Bid_Size,Offer_Price,Offer_Size
0,2022-10-03 04:00:00.012552067,AAPL,2,137.550,137.00,1.0,138.20,10.0
1,2022-10-03 04:00:00.012552343,AAPL,64,137.500,137.00,1.0,138.20,10.0
2,2022-10-03 04:00:00.012552436,AAPL,36,137.900,137.00,1.0,138.20,10.0
3,2022-10-03 04:00:00.012643634,AAPL,3,137.540,137.00,1.0,138.20,10.0
4,2022-10-03 04:00:00.012692606,AAPL,5,137.540,137.00,1.0,138.20,10.0
...,...,...,...,...,...,...,...,...
838789,2022-10-03 19:59:50.902418168,AAPL,6,142.900,142.88,1.0,142.99,42.0
838790,2022-10-03 19:59:51.484659661,AAPL,1,142.935,142.88,1.0,142.99,42.0
838791,2022-10-03 19:59:54.649888288,AAPL,1,142.935,142.88,1.0,142.99,42.0
838792,2022-10-03 19:59:55.559408346,AAPL,1,142.935,142.88,1.0,142.99,42.0


In [25]:
@servicesproc(packages=['snowflake-snowpark-python','pykx'])
def kx(session: Session) -> dict:
    import os
    import site
    import base64
    from datetime import datetime
    import pandas as pd
    from snowflake.snowpark import Session
    
    connection_parameters = {
    "account": "xe85544.east-us-2.azure",
    "user": "kx",
    "password": "Snowflake1", #os.environ['SNOWFLAKE_TEMP_PASSWORD'],
    "role": "SYSADMIN",  # optional
    "warehouse": "KX",  # medium snowpark-optimized
    "database": "KX",
    "schema": "BRUNO",
    }

    session2 = Session.builder.configs(connection_parameters).create()
    
    path = site.getsitepackages()[0] + '/pykx/lib'
    os.environ['QHOME'] = path
    with open(f'{path}/k4.lic', 'wb') as f:
        f.write(base64.b64decode('htlG8YWFM4VysiYzhbKyQUkRKWbIUBTIspYOWxCWRxcNvVukpkf1ptNWDaZjliymDBcNW1tIpg6rc/kXhyf8veNWY8snq3JFhukzlqYmVpY7vUy9NkVzLGv+GFs2Ng5Ipiw2RssOm0cHq/7Ta5PZ2UYmuUZ5uTPiRgyxRmMIDHkMuUVj'))
    
    import pykx as kx
    
    #Import trades data
    trades_sdf = session2.table('TRADES')

    select_columns = ['"TTime"', '"Symbol"', '"Trade Volume"', '"Trade Price"']
    trades = trades_sdf.select(select_columns).where('"Symbol" = \'AAPL\'').order_by('"TTime"').toPandas()
    trades.columns = ['time', 'symbol', 'tvolume', 'tprice']
    trades.reset_index(drop=True, inplace=True)

    #import quote data 
    quotes_sdf = session2.table('QUOTES')

    select_columns = ['"TTime"', '"Symbol"', '"Bid_Price"', '"Bid_Size"', '"Offer_Price"', '"Offer_Size"']
    quotes = quotes_sdf.select(select_columns).where('"Symbol" = \'AAPL\'').order_by('"TTime"').toPandas()
    quotes.columns = ['time', 'symbol', 'qbid_price', 'qbid_size', 'qask_price', 'qask_size']
    quotes.reset_index(drop=True, inplace=True)
    
    start_1 = datetime.now()
    
    kx.q['trades'] = trades
    kx.q['quotes'] = quotes
    
    end_1 = datetime.now()
    
    start_2 = datetime.now()
    
    kx.q('trades: update `g#symbol, `s#time from trades')
    kx.q('quotes: update `g#symbol, `s#time from quotes')
    kx.q('rez:aj[`symbol`time; trades; quotes]')
    kx.q('rez2: update qmid_price: (qbid_price + qask_price) % 2 from rez')
    kx.q('rez2: update diff:tprice - qmid_price from rez2')
    kx.q('rez2: aj[`symbol`time; rez2; select symbol, time: time - 0D00:00:10, mid10:(qbid_price + qask_price) % 2 from quotes]')

    end_2 = datetime.now()
    
    return {
        'pandas_to_kx': str(end_1-start_1),
        'q': str(end_2-start_2),
    }


SnowparkSessionException: (1409): More than one active session is detected. When you call function 'udf' or use decorator '@udf', you must specify the 'session' parameter if you created multiple sessions.Alternatively, you can use 'session.udf.register' to register UDFs

In [21]:
kx()

{'result': {'pandas_to_kx': '0:00:02.775237', 'q': '0:00:00.379403'}}

In [16]:
@servicesproc(snowservice='GPU', packages=['snowflake-snowpark-python','pykx'])
def rapidsai(session: Session) -> dict:
    import os
    import site
    import base64
    from datetime import datetime
    import pandas as pd
    from snowflake.snowpark import Session
    import cudf
    
    connection_parameters = {
    "account": "xe85544.east-us-2.azure",
    "user": "kx",
    "password": "Snowflake1", #os.environ['SNOWFLAKE_TEMP_PASSWORD'],
    "role": "SYSADMIN",  # optional
    "warehouse": "KX",  # medium snowpark-optimized
    "database": "KX",
    "schema": "BRUNO",
    }

    session2 = Session.builder.configs(connection_parameters).create()
    
    path = site.getsitepackages()[0] + '/pykx/lib'
    os.environ['QHOME'] = path
    with open(f'{path}/k4.lic', 'wb') as f:
        f.write(base64.b64decode('htlG8YWFM4VysiYzhbKyQUkRKWbIUBTIspYOWxCWRxcNvVukpkf1ptNWDaZjliymDBcNW1tIpg6rc/kXhyf8veNWY8snq3JFhukzlqYmVpY7vUy9NkVzLGv+GFs2Ng5Ipiw2RssOm0cHq/7Ta5PZ2UYmuUZ5uTPiRgyxRmMIDHkMuUVj'))
    
    import pykx as kx
    
    #Import trades data
    trades_sdf = session2.table('TRADES')

    select_columns = ['"TTime"', '"Symbol"', '"Trade Volume"', '"Trade Price"']
    trades = trades_sdf.select(select_columns).where('"Symbol" = \'AAPL\'').order_by('"TTime"').toPandas()
    trades.columns = ['time', 'symbol', 'tvolume', 'tprice']
    trades.reset_index(drop=True, inplace=True)

    #import quote data 
    quotes_sdf = session2.table('QUOTES')

    select_columns = ['"TTime"', '"Symbol"', '"Bid_Price"', '"Bid_Size"', '"Offer_Price"', '"Offer_Size"']
    quotes = quotes_sdf.select(select_columns).where('"Symbol" = \'AAPL\'').order_by('"TTime"').toPandas()
    quotes.columns = ['time', 'symbol', 'qbid_price', 'qbid_size', 'qask_price', 'qask_size']
    quotes.reset_index(drop=True, inplace=True)
    
    start_1 = datetime.now()
    
    ctrades = cudf.DataFrame.from_pandas(trades)
    cquotes = cudf.DataFrame.from_pandas(quotes)
    
    end_1 = datetime.now()
    
    start_2 = datetime.now()
    
    asof_pandas_df = cudf.merge_asof(ctrades, cquotes, 
                          on='time', 
                          by='symbol', 
                          direction='backward')

    end_2 = datetime.now()
    
    return {
        'pandas_to_cudf': str(end_1-start_1),
        'cudf': str(end_2-start_2),
    }


In [17]:
rapidsai()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)