# Cow + Univ3 DataPipeline

### Setup Jupyter Environment

In [1]:
from subgrounds import Subgrounds

# import concurrent.futures
import os
import pandas as pd
import polars as pl

# These commands enlarge the column size of the dataframe so things like 0x... are not truncated
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

In [2]:
# if data folder doesnt exist, create it
if not os.path.exists('data'):
    os.makedirs('data')

In [3]:
sg = Subgrounds()

### Cowswap Trades

In [4]:
# DEFINE TIMESTAMP HERE. Timstamp is used for replication quality assurance purposes.
timestamp = 1677891498 # current block timestamp is around 1677891498 on March 3rd, 2023 8:06PM

# define ethereum token addresses here to be used in cowswap trades query filter
weth_addr = "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2"
usdc_addr = "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48"

# we set a fixed query size number. The Cow settlements and Uniswap swaps query are multiples larger than this initial query size.
query_size = 10000000 # use this for full historical
# query_size = 11111 # use this for sample testing

#Filter size - We filter trades out that are smaller than $1000 USD size
filter_usd = 1000

In [5]:
token_addr_list = [weth_addr, usdc_addr]

In [6]:
# Load
cow_sg = sg.load_subgraph('https://api.thegraph.com/subgraphs/name/cowprotocol/cow')

# load query path
cow_trades_qp = cow_sg.Query.trades

# define search params
trades_qp = cow_sg.Query.trades(
    orderBy=cow_sg.Query.trades.timestamp,
    orderDirection='desc',
    first=query_size,
    where = {
    'timestamp_lt': timestamp, 
    'buyAmountUsd_gt': filter_usd, 
    'sellAmountUsd_gt': filter_usd, 
    "sellToken_in": token_addr_list, 
    "buyToken_in": token_addr_list
    }
)

In [7]:
trades_df = sg.query_df(trades_qp)

In [8]:
# convert buy and sell amounts to float64
trades_df['trades_gasPrice'] = trades_df['trades_gasPrice'].astype('float64')
trades_df['trades_feeAmount'] = trades_df['trades_feeAmount'].astype('float64')
trades_df['trades_buyAmount'] = trades_df['trades_buyAmount'].astype('float64')
trades_df['trades_sellAmount'] = trades_df['trades_sellAmount'].astype('float64')

In [9]:
# replace addresses with symbols
trades_df['trades_buyToken_id'] = trades_df['trades_buyToken_id'].replace(weth_addr, 'WETH')
trades_df['trades_buyToken_id'] = trades_df['trades_buyToken_id'].replace(usdc_addr, 'USDC')

trades_df['trades_sellToken_id'] = trades_df['trades_sellToken_id'].replace(weth_addr, 'WETH')
trades_df['trades_sellToken_id'] = trades_df['trades_sellToken_id'].replace(usdc_addr, 'USDC')

In [10]:
# convert trades_df to polars dataframe
trades_pl = pl.from_pandas(trades_df)

In [11]:
print(f'query returned {len(trades_pl)} rows')

query returned 21848 rows


### Cowswap Trades-Settlement Merge

In [12]:
# get unique trades_settlement_id list
trades_settlement_id_list = trades_pl['trades_settlement_id'].unique().to_list()

In [13]:
query_index = len(trades_settlement_id_list)

In [14]:
# get index of every 999th value in query_index. Needs to be less than 999 otherwise we get a 413 Request Entity Too Large error. 
# Note that this doesn't happen with the Univ3 subgraph query below, it appears to be subgraph specific. I will leave this as an open conjecture.
query_index_list = [i for i in range(0, query_index, 999)]

data = []

# get field path
settlements_fp = cow_sg.Query.settlements

In [15]:
for i in range(1, len(query_index_list)):
    print(f'querying {query_index_list[i-1]} to {query_index_list[i]}. Remaining: {len(trades_settlement_id_list) - query_index_list[i]}, {((len(trades_settlement_id_list) - query_index_list[i]) / len(trades_settlement_id_list)) * 100:.2f}%')

    # define a "partition", which is part of the larger trades_settlement_id_list
    partition = trades_settlement_id_list[query_index_list[i-1]:query_index_list[i]]

    # define query path with partition of unique settlement ids
    qp = settlements_fp(
        first = query_size,
        where= {"txHash_in": partition},
    )
    
    # run query
    df = sg.query_df(qp)

    # append df to data list
    data.append(df)

querying 0 to 999. Remaining: 19629, 95.16%
querying 999 to 1998. Remaining: 18630, 90.31%
querying 1998 to 2997. Remaining: 17631, 85.47%
querying 2997 to 3996. Remaining: 16632, 80.63%
querying 3996 to 4995. Remaining: 15633, 75.79%
querying 4995 to 5994. Remaining: 14634, 70.94%
querying 5994 to 6993. Remaining: 13635, 66.10%
querying 6993 to 7992. Remaining: 12636, 61.26%
querying 7992 to 8991. Remaining: 11637, 56.41%
querying 8991 to 9990. Remaining: 10638, 51.57%
querying 9990 to 10989. Remaining: 9639, 46.73%
querying 10989 to 11988. Remaining: 8640, 41.88%
querying 11988 to 12987. Remaining: 7641, 37.04%
querying 12987 to 13986. Remaining: 6642, 32.20%
querying 13986 to 14985. Remaining: 5643, 27.36%
querying 14985 to 15984. Remaining: 4644, 22.51%
querying 15984 to 16983. Remaining: 3645, 17.67%
querying 16983 to 17982. Remaining: 2646, 12.83%
querying 17982 to 18981. Remaining: 1647, 7.98%
querying 18981 to 19980. Remaining: 648, 3.14%


In [16]:
# convert dataframes from settlement_data_store to polars dataframes
settlement_data_store_pl = [pl.from_pandas(df) for df in data]

In [17]:
# convert list of pandas dataframes to polars dataframes
settlements_pl = pl.concat(settlement_data_store_pl)

In [18]:
# enforce trades_df column types. This is necessary because the data types are not enforced by pandas dataframes. We need to enforce the types as a preprocessing step to converting to Polars
trades_df['trades_buyAmount'] = trades_df['trades_buyAmount'].astype('float64')
trades_df['trades_sellAmount'] = trades_df['trades_sellAmount'].astype('float64')
trades_df['trades_buyAmountUsd'] = trades_df['trades_buyAmountUsd'].astype('float64')
trades_df['trades_sellAmountUsd'] = trades_df['trades_sellAmountUsd'].astype('float64')
trades_df['trades_timestamp'] = trades_df['trades_timestamp'].astype('int64')
trades_df['trades_buyToken_id'] = trades_df['trades_buyToken_id'].astype('str')
trades_df['trades_sellToken_id'] = trades_df['trades_sellToken_id'].astype('str')

In [19]:
# merge trades and settlement dataframes on the settlement transaction hash
cow_complete_pl = trades_pl.join(other=settlements_pl, left_on='trades_settlement_id', right_on='settlements_txHash', how='inner')

In [20]:
# print f the shapes of the dataframes
print(f'trades_pl shape: {trades_pl.shape}')
print(f'settlements_pl shape: {settlements_pl.shape}')
print(f'cow_complete_pl shape: {cow_complete_pl.shape}')

trades_pl shape: (21848, 15)
settlements_pl shape: (19980, 4)
cow_complete_pl shape: (21170, 18)


### Cowswap Trades-Solver Merge

In [21]:
solvers = pd.read_csv('dune/cowv2_solvers.csv') # load in pandas instead of polars. Having trouble replacing \ symbol in polars

In [22]:
# rename address to settlements_solver_id in pandas
solvers = solvers.rename(columns={"address": "settlements_solver_id"})

In [23]:
# NOTE - dune formats addresses as /x... need to convert '/' to '0'
solvers['settlements_solver_id'] = solvers['settlements_solver_id'].str.replace('\\', '0', regex=False)

In [24]:
# turn solvers into a dictionary
solvers_dict = solvers.to_dict('records')

# convert dict to polars
solvers_pl = pl.from_dicts(solvers_dict)

In [25]:
# inner join solvers_pl on total_settlement_tokens_pl
cow_complete_pl = cow_complete_pl.join(solvers_pl, on="settlements_solver_id", how="inner")

In [26]:
cow_complete_pl.shape

(21080, 21)

In [27]:
# save polars to parquet
# cow_complete_pl.write_parquet('data/cow_complete_pl.parquet')

#### Basic Agg

In [28]:
# filter by "prod" environments
filter_df = cow_complete_pl.filter(pl.col("environment") == "prod")

In [29]:
# filter by "prod" environments
filter_df = cow_complete_pl.filter(pl.col("environment") == "prod")

In [30]:
filter_df.shape

(20499, 21)

In [31]:
# group filter_df by solver name. Check solver count
grouped_df = filter_df.groupby("name").agg(
    pl.count("trades_id").alias("total_trades")).sort("total_trades", reverse=True)


In [32]:
grouped_df

name,total_trades
str,u32
"""Otex""",3152
"""PLM""",2910
"""Gnosis_0x""",2840
"""QuasiModo""",2202
"""Gnosis_1inch""",2201
"""Legacy""",2120
"""Laertes""",923
"""DexCowAgg""",860
"""MIP""",831
"""Gnosis_ParaSwa...",728


### Uniswap V3 Swaps

In [33]:
# Load
univ3_sg = sg.load_subgraph('https://api.thegraph.com/subgraphs/name/messari/uniswap-v3-ethereum')

In [34]:
# query params
weth_usdc_list = [
    "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", # usdc/weth .05%
    "0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8" #usdc/weth .3%
]

# get timestamps list
timestamps_list = cow_complete_pl['trades_timestamp'].to_list()

# filter for unique values
cow_timestamps = list(set(timestamps_list))

In [35]:
swaps_query_index = len(cow_timestamps)

In [36]:
cow_timestamp_query_list = [i for i in range(0, len(cow_timestamps), 999)]
# append the last value in the index list to the query_index_list
cow_timestamp_query_list.append(swaps_query_index)

swaps_data = []

# define query path
uni_swaps_qp = univ3_sg.Query.swaps

In [37]:
for i in range(1, len(cow_timestamp_query_list)):
    print(f'querying {cow_timestamp_query_list[i-1]} to {cow_timestamp_query_list[i]}. Remaining: {len(cow_timestamps) - cow_timestamp_query_list[i]}, {((len(cow_timestamps) - cow_timestamp_query_list[i]) / len(cow_timestamps)) * 100:.2f}%')

    # define a "partition", which is part of the larger cow_timestamps
    partition = cow_timestamps[cow_timestamp_query_list[i-1]:cow_timestamp_query_list[i]]

    # define query path with partition of unique settlement ids
    swaps_qp = uni_swaps_qp(
        orderBy=uni_swaps_qp.timestamp,
        orderDirection='desc',
        first=query_size * 5,
        where = {'timestamp_in': partition, 'amountInUSD_gt': filter_usd, 'amountOutUSD_gt': filter_usd, 'pool_in': weth_usdc_list} 
    )
    
    # run query
    df = sg.query_df(swaps_qp)

    # convert all int64 columns to float64 to avoid large int overflows
    df['swaps_gasLimit'] = df['swaps_gasLimit'].astype('float64')
    df['swaps_gasPrice'] = df['swaps_gasPrice'].astype('float64')
    df['swaps_tick'] = df['swaps_tick'].astype('float64')
    df['swaps_amountIn'] = df['swaps_amountIn'].astype('float64')
    df['swaps_amountOut'] = df['swaps_amountOut'].astype('float64')

    # append df to data list
    swaps_data.append(df)

querying 0 to 999. Remaining: 18868, 94.97%
querying 999 to 1998. Remaining: 17869, 89.94%
querying 1998 to 2997. Remaining: 16870, 84.91%
querying 2997 to 3996. Remaining: 15871, 79.89%
querying 3996 to 4995. Remaining: 14872, 74.86%
querying 4995 to 5994. Remaining: 13873, 69.83%
querying 5994 to 6993. Remaining: 12874, 64.80%
querying 6993 to 7992. Remaining: 11875, 59.77%
querying 7992 to 8991. Remaining: 10876, 54.74%
querying 8991 to 9990. Remaining: 9877, 49.72%
querying 9990 to 10989. Remaining: 8878, 44.69%
querying 10989 to 11988. Remaining: 7879, 39.66%
querying 11988 to 12987. Remaining: 6880, 34.63%
querying 12987 to 13986. Remaining: 5881, 29.60%
querying 13986 to 14985. Remaining: 4882, 24.57%
querying 14985 to 15984. Remaining: 3883, 19.54%
querying 15984 to 16983. Remaining: 2884, 14.52%
querying 16983 to 17982. Remaining: 1885, 9.49%
querying 17982 to 18981. Remaining: 886, 4.46%
querying 18981 to 19867. Remaining: 0, 0.00%


In [38]:
swaps_df = pd.concat(swaps_data) # pandas dependency....TODO - remove and convert replace commands to polars

In [39]:
# replace the pool addresses with LP pool names with fees
swaps_df['swaps_pool_id'] = swaps_df['swaps_pool_id'].replace(weth_usdc_list[0], 'USDC_WETH .05%')
swaps_df['swaps_pool_id'] = swaps_df['swaps_pool_id'].replace(weth_usdc_list[1], 'USDC_WETH .3%')

# replace token addresses with symbols
swaps_df['swaps_tokenIn_id'] = swaps_df['swaps_tokenIn_id'].replace(usdc_addr, 'USDC')
swaps_df['swaps_tokenIn_id'] = swaps_df['swaps_tokenIn_id'].replace(weth_addr, 'WETH')
swaps_df['swaps_tokenOut_id'] = swaps_df['swaps_tokenOut_id'].replace(usdc_addr, 'USDC')
swaps_df['swaps_tokenOut_id'] = swaps_df['swaps_tokenOut_id'].replace(weth_addr, 'WETH')

In [40]:
# enforce swaps_df column types. This is necessary because the data types are not enforced by pandas dataframes. We need to enforce the types as a preprocessing step to converting to Polars
swaps_df['swaps_gasLimit'] = swaps_df['swaps_gasLimit'].astype('float64')
swaps_df['swaps_gasUsed'] = swaps_df['swaps_gasUsed'].astype('float64')
swaps_df['swaps_gasPrice'] = swaps_df['swaps_gasPrice'].astype('float64')
swaps_df['swaps_amountIn'] = swaps_df['swaps_amountIn'].astype('float64')
swaps_df['swaps_amountInUSD'] = swaps_df['swaps_amountInUSD'].astype('float64')
swaps_df['swaps_amountOut'] = swaps_df['swaps_amountOut'].astype('float64')
swaps_df['swaps_amountOutUSD'] = swaps_df['swaps_amountOutUSD'].astype('float64')
swaps_df['swaps_blockNumber'] = swaps_df['swaps_blockNumber'].astype('int64')
swaps_df['swaps_timestamp'] = swaps_df['swaps_timestamp'].astype('int64')

In [41]:
# convert swaps, transactions, and cowswap dataframes to polars
swaps_pl = pl.from_pandas(swaps_df)
# recall cow_trades_pl was already created earlier

In [42]:
# drop duplicate rows
uni_complete_pl = swaps_pl.unique()

In [73]:
uni_complete_pl.shape

(35157, 21)

### Univ3 Gas Calculations

In [43]:
# get median transactions_gasUsed amount. Typical V3 swap is 127k gas for One-hop. However with multiple hops, gas will be higher. 352 reflects an avg of 3 hops worth of gas
tx_gas_median = uni_complete_pl['swaps_gasLimit'].median()
print(f'transaction gas median is {tx_gas_median}')

transaction gas median is 327788.0


In [44]:
# to get transaction gas used, we do gasUsed * gasPrice
uni_complete_pl = uni_complete_pl.with_columns([
    (127000 * pl.col("swaps_gasPrice")).alias('transaction_gas_fee_one_hop'),
    (tx_gas_median * pl.col("swaps_gasPrice")).alias('transaction_gas_fee_median')
    ])

In [45]:
uni_complete_pl = uni_complete_pl.with_columns([
    (pl.col("transaction_gas_fee_one_hop") / 10**18).alias('transaction_gas_fee_one_hop'), # wei is 10^9, but eth is 10^18
        (pl.col("transaction_gas_fee_median") / 10**18).alias('transaction_gas_fee_median') # wei is 10^9, but eth is 10^18
])

In [46]:
# sort by largest transaction_gas_fee
uni_complete_pl.sort("transaction_gas_fee_one_hop", reverse=False).head(5)

swaps_id,swaps_hash,swaps_nonce,swaps_logIndex,swaps_gasLimit,swaps_gasUsed,swaps_gasPrice,swaps_protocol_id,swaps_account_id,swaps_pool_id,swaps_blockNumber,swaps_timestamp,swaps_tick,swaps_tokenIn_id,swaps_amountIn,swaps_amountInUSD,swaps_tokenOut_id,swaps_amountOut,swaps_amountOutUSD,transaction_gas_fee_one_hop,transaction_gas_fee_median
str,str,i64,i64,f64,f64,f64,str,str,str,i64,i64,f64,str,f64,f64,str,f64,f64,f64,f64
"""0xd47608bf5a81...","""0xd47608bf5a81...",42947,2,2000008.0,,2442200000.0,"""0x1f98431c8ad9...","""0xb58555fcba64...","""USDC_WETH .05%...",15427347,1661683811,203271.0,"""WETH""",6.9853e+19,103905.874069,"""USDC""",103870000000.0,103865.446394,0.00031,0.000801
"""0xb6d326647e5a...","""0xb6d326647e5a...",1858,507,445612.0,,2461600000.0,"""0x1f98431c8ad9...","""0xa21740833858...","""USDC_WETH .05%...",15658800,1664693699,204502.0,"""WETH""",5.3841e+18,7074.445209,"""USDC""",7077700000.0,7077.711966,0.000313,0.000807
"""0xf3e2e69fe388...","""0xf3e2e69fe388...",71090,2,304542.0,,2506300000.0,"""0x1f98431c8ad9...","""0x43e4715ae093...","""USDC_WETH .05%...",15386847,1661123836,202394.0,"""WETH""",4.5367e+20,737703.570974,"""USDC""",736750000000.0,736748.733958,0.000318,0.000822
"""0xb5a3f38a211b...","""0xb5a3f38a211b...",1376,255,310656.0,,2540100000.0,"""0x1f98431c8ad9...","""0x6d1247b8acf4...","""USDC_WETH .05%...",15556912,1663461467,203514.0,"""WETH""",1.4969e+18,2175.808649,"""USDC""",2172200000.0,2172.152162,0.000323,0.000833
"""0xb9f7e381f9cc...","""0xb9f7e381f9cc...",150,489,352584.0,,2548800000.0,"""0x1f98431c8ad9...","""0x679c45ee70b4...","""USDC_WETH .05%...",15566808,1663581347,204633.0,"""WETH""",1.4661e+18,1900.566855,"""USDC""",1902400000.0,1902.381369,0.000324,0.000835


### Merge Cow and Univ3

In [47]:
# load cow_complete_pl.parquet
# cow_complete_pl = pl.read_parquet('data/cow_complete_pl.parquet')

In [48]:
# merge trades and swaps on timestamp value. We use outer join because we want to keep all trades and swaps data and backfill swap values
cow_uni_outer_pl = cow_complete_pl.join(other=uni_complete_pl, left_on='trades_timestamp', right_on='swaps_timestamp', how='outer')

In [49]:
cow_uni_outer_pl.columns

['trades_id',
 'trades_timestamp',
 'trades_gasPrice',
 'trades_feeAmount',
 'trades_txHash',
 'trades_settlement_id',
 'trades_buyAmount',
 'trades_sellAmount',
 'trades_sellToken_id',
 'trades_buyToken_id',
 'trades_order_id',
 'trades_buyAmountEth',
 'trades_sellAmountEth',
 'trades_buyAmountUsd',
 'trades_sellAmountUsd',
 'settlements_id',
 'settlements_firstTradeTimestamp',
 'settlements_solver_id',
 'environment',
 'name',
 'active',
 'swaps_id',
 'swaps_hash',
 'swaps_nonce',
 'swaps_logIndex',
 'swaps_gasLimit',
 'swaps_gasUsed',
 'swaps_gasPrice',
 'swaps_protocol_id',
 'swaps_account_id',
 'swaps_pool_id',
 'swaps_blockNumber',
 'swaps_tick',
 'swaps_tokenIn_id',
 'swaps_amountIn',
 'swaps_amountInUSD',
 'swaps_tokenOut_id',
 'swaps_amountOut',
 'swaps_amountOutUSD',
 'transaction_gas_fee_one_hop',
 'transaction_gas_fee_median']

In [50]:
# This truncated dataframe isn't being used right now
cow_uni_trunc_pl = cow_uni_outer_pl[[
    'trades_timestamp', 
    'trades_txHash',
    'trades_feeAmount',
    'trades_sellToken_id', 
    'trades_buyToken_id', 
    'trades_buyAmount',
    'trades_sellAmount',
    # 'trades_sellAmountUsd', 
    # 'trades_buyAmountUsd', 
    'name',
    'environment',
    'swaps_pool_id', 
    'swaps_hash',
    'swaps_tokenIn_id', 
    'swaps_tokenOut_id',
    'swaps_amountIn',
    'swaps_amountOut',  
    # 'swaps_amountInUSD',
    # 'swaps_amountOutUSD',
    'swaps_blockNumber',
    # 'transactions_timestamp',
    # 'transactions_gasUsed',
    # 'transactions_gasPrice',
    'transaction_gas_fee_one_hop',
    'transaction_gas_fee_median'
    ]]

In [51]:
#check pl dataframe size
cow_uni_trunc_pl.shape

(41677, 18)

In [52]:
# sort by largest transaction_gas_fee
cow_uni_trunc_pl.sort("transaction_gas_fee_one_hop", reverse=True).head(5)

trades_timestamp,trades_txHash,trades_feeAmount,trades_sellToken_id,trades_buyToken_id,trades_buyAmount,trades_sellAmount,name,environment,swaps_pool_id,swaps_hash,swaps_tokenIn_id,swaps_tokenOut_id,swaps_amountIn,swaps_amountOut,swaps_blockNumber,transaction_gas_fee_one_hop,transaction_gas_fee_median
i64,str,f64,str,str,f64,f64,str,str,str,str,str,str,f64,f64,i64,f64,f64
1675298507,"""0xbd95e01eae7f...",1.1769e+16,"""WETH""","""USDC""",20061000000.0,1.2e+19,"""Gnosis_1inch""","""prod""","""USDC_WETH .05%...","""0xacba1467bf35...","""USDC""","""WETH""",2235200000000.0,1.3259e+21,16537612,2.085911,5.383753
1658126179,"""0x864210143e92...",3086400000000000.0,"""WETH""","""USDC""",39766000000.0,2.75e+19,"""PLM""","""prod""","""USDC_WETH .05%...","""0x1e2a2d6b6df4...","""USDC""","""WETH""",568170000000.0,3.913e+20,15165041,1.115926,2.880214
1653065597,"""0xfc1e15e06c23...",1.6109e+16,"""WETH""","""USDC""",5773500000.0,3e+18,"""Otex""","""prod""","""USDC_WETH .05%...","""0xceb4019c9a0e...","""USDC""","""WETH""",1601700000000.0,8.3179e+20,14812252,1.080211,2.788032
1674060383,"""0x025ff0eeb001...",1.337e+16,"""WETH""","""USDC""",75172000000.0,5e+19,"""PLM""","""prod""","""USDC_WETH .05%...","""0xaef9a74bccb7...","""USDC""","""WETH""",1165200000000.0,7.7305e+20,16434975,0.940701,2.427957
1675298507,"""0xbd95e01eae7f...",1.1769e+16,"""WETH""","""USDC""",20061000000.0,1.2e+19,"""Gnosis_1inch""","""prod""","""USDC_WETH .3%""","""0x92c37a88b5ea...","""USDC""","""WETH""",951570000000.0,5.6368e+20,16537612,0.935032,2.413326


In [53]:
sample_data = cow_uni_trunc_pl[['swaps_hash', 'transaction_gas_fee_one_hop']].sort("transaction_gas_fee_one_hop", reverse=True).head(5)

In [54]:
sample_data

swaps_hash,transaction_gas_fee_one_hop
str,f64
"""0xacba1467bf35...",2.085911
"""0x1e2a2d6b6df4...",1.115926
"""0xceb4019c9a0e...",1.080211
"""0xaef9a74bccb7...",0.940701
"""0x92c37a88b5ea...",0.935032


In [55]:
sample_data['swaps_hash'].to_list()

['0xacba1467bf359ea6e6518d455b53e2a837e7987b95d43d53170c6a07f31cbdb2',
 '0x1e2a2d6b6df455ba39515036a3fec29484966f6744a20c66f2c9cda0964bdd53',
 '0xceb4019c9a0e317644ec8a70cd5c1d109f8c843c87cf32fbba38d2db10c45509',
 '0xaef9a74bccb7d15751c61f678c2d181bb30552d32a5c48b3e1de8585f8258656',
 '0x92c37a88b5eac266a3b1febd1c8977d3694929120dc0fca464256b41d68ec2e8']

### Chainlink

In [56]:
chain_sg = sg.load_subgraph('https://api.thegraph.com/subgraphs/name/openpredict/chainlink-prices-subgraph')

# define qp
chain_price_qp = chain_sg.Query.prices

In [57]:
chain_qp = chain_price_qp(
    first=query_size * 5,
    orderBy='timestamp',
    orderDirection='desc',
    where = {'timestamp_lt': timestamp, 'assetPair': "ETH/USD"}
    )

In [58]:
chain_df = sg.query_df(chain_qp)

In [59]:
# convert chain_df to polars
chain_pl = pl.from_pandas(chain_df)

# drop prices_id
chain_pl = chain_pl.drop(['prices_id'])

In [60]:
# divide prices_prices by 10 ** 8
chain_pl = chain_pl.with_columns([
    (pl.col("prices_price") / 10**8).alias('prices_prices')
    ])

In [61]:
# outer merge chain_df with cow_uni_outer_pl on timestamp
cow_uni_chain_outer_pl = cow_uni_trunc_pl.join(other=chain_pl, left_on='trades_timestamp', right_on='prices_timestamp', how='outer')

In [62]:
cow_uni_chain_outer_pl.shape

(166280, 21)

### Price Calculations

#### Decimal Calculations

In [63]:
# add decimals to cow trades sell tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('trades_sellToken_id'),
        (
            pl.when(pl.col('trades_sellToken_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('trades_sellToken_decimals'),
    ]
)

# add decimals to cow trades buy tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('trades_buyToken_id'),
        (
            pl.when(pl.col('trades_buyToken_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('trades_buyToken_decimals'),
    ]
)

# add decimals to cow trades sell tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('swaps_tokenIn_id'),
        (
            pl.when(pl.col('swaps_tokenIn_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('swaps_tokenIn_decimals'),
    ]
)

# add decimals to cow trades buy tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('swaps_tokenOut_id'),
        (
            pl.when(pl.col('swaps_tokenOut_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('swaps_tokenOut_decimals'),
    ]
)

In [64]:
cow_uni_chain_outer_pl.columns # NOTE - the DF has transaction cols

['trades_timestamp',
 'trades_txHash',
 'trades_feeAmount',
 'trades_sellToken_id',
 'trades_buyToken_id',
 'trades_buyAmount',
 'trades_sellAmount',
 'name',
 'environment',
 'swaps_pool_id',
 'swaps_hash',
 'swaps_tokenIn_id',
 'swaps_tokenOut_id',
 'swaps_amountIn',
 'swaps_amountOut',
 'swaps_blockNumber',
 'transaction_gas_fee_one_hop',
 'transaction_gas_fee_median',
 'prices_assetPair_id',
 'prices_price',
 'prices_prices',
 'trades_sellToken_decimals',
 'trades_buyToken_decimals',
 'swaps_tokenIn_decimals',
 'swaps_tokenOut_decimals']

#### Execution Price Calculations

In [65]:
# cast all of the columns as numerics
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns([
        pl.col('trades_sellAmount').cast(pl.Float64),
        pl.col('trades_buyAmount').cast(pl.Float64),
        pl.col('swaps_amountIn').cast(pl.Float64),
        pl.col('swaps_amountOut').cast(pl.Float64)
        ])

In [66]:
# NOTE - Polars can perform these calculations in-column. This means it can convert the values in place without creating a new column. The new column created here is more verbose, but is a good sanity check to see before/after results.
trades_swaps_converted_pl = cow_uni_chain_outer_pl.with_columns([
    (pl.col("trades_buyAmount") / (10**pl.col("trades_buyToken_decimals"))).alias('trades_buyAmount_converted'),
    (pl.col("trades_sellAmount") / (10**pl.col("trades_sellToken_decimals"))).alias('trades_sellAmount_converted'),
    (pl.col("swaps_amountIn") / (10**pl.col("swaps_tokenIn_decimals"))).alias('swaps_amountIn_converted'),
    (pl.col("swaps_amountOut") / (10**pl.col("swaps_tokenOut_decimals"))).alias('swaps_amountOut_converted'),
])

In [67]:
trades_swaps_converted_trunc_pl = trades_swaps_converted_pl.with_columns([
    (pl.col("trades_buyAmount_converted") / pl.col("trades_sellAmount_converted")).alias('trades_buy_sell_ratio'),
    (pl.col("trades_sellAmount_converted") / pl.col("trades_buyAmount_converted")).alias('trades_sell_buy_ratio'),
    (pl.col("swaps_amountIn_converted") / pl.col("swaps_amountOut_converted")).alias('swaps_amountIn_amountOut_ratio'),
    (pl.col("swaps_amountOut_converted") / pl.col("swaps_amountIn_converted")).alias('swaps_amountOut_amountIn_ratio'),
])

In [68]:
trades_swaps_converted_trunc_pl = trades_swaps_converted_trunc_pl[
    'trades_timestamp',
    'swaps_blockNumber',
    'trades_txHash',
    'trades_feeAmount',
    'trades_sellToken_id',
    'trades_buyToken_id',
    'trades_sellAmount_converted',
    'trades_buyAmount_converted',
    'name',
    'environment',
    'swaps_pool_id',
    'swaps_tokenIn_id',
    'swaps_tokenOut_id',
    'swaps_amountIn_converted',
    'swaps_amountOut_converted',
    'transaction_gas_fee_one_hop',
    'transaction_gas_fee_median',
    'trades_buy_sell_ratio',
    'trades_sell_buy_ratio',
    'swaps_amountIn_amountOut_ratio',
    'swaps_amountOut_amountIn_ratio',
    'prices_assetPair_id',
    'prices_price'
]

In [69]:
trades_swaps_converted_trunc_pl.shape

(166280, 23)

In [70]:
trades_swaps_converted_trunc_pl['prices_price']

prices_price
i64
157511232828
156836770000
156615320000
156030000000
156032602886
155254000000
156062000000
156660140000
156198543000
155640000000


### Save Data to a local parquet file

In [71]:
# checkpoint, save to parquet
trades_swaps_converted_trunc_pl.write_parquet('data/cow_uni_chain_outer_pl_historical.parquet')