# Cow + Univ3 DataPipeline

### Setup Jupyter Environment

In [6]:
from datastreams.datastream import Streamer

# import concurrent.futures
import os
import pandas as pd
import polars as pl

# These commands enlarge the column size of the dataframe so things like 0x... are not truncated
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

### Cowswap Trades

In [2]:
# instantiate Streamer class. Note that we need two separate streamer classes, otherwise the queries will be overwritten. 
cow_ds1 = Streamer('https://api.thegraph.com/subgraphs/name/cowprotocol/cow')
cow_ds2 = Streamer('https://api.thegraph.com/subgraphs/name/cowprotocol/cow')

In [3]:
# DEFINE TIMESTAMP HERE. Timstamp is used for replication quality assurance purposes.
timestamp = 1677891498 # current block timestamp is around 1677891498 on March 3rd, 2023 8:06PM

# define ethereum token addresses here to be used in cowswap trades query filter
weth_addr = "0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2"
usdc_addr = "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48"

# we set a fixed query size number. The Cow settlements and Uniswap swaps query are multiples larger than this initial query size.
query_size = 10000000

#Filter size - We filter trades out that are smaller than $1000 USD size
filter_usd = 1000

In [4]:
token_addr_list = [weth_addr, usdc_addr]

In [5]:
# We need to make two queries to the cow schema to get all the trades that match weth/usdc and usdc/weth.
trades_weth_usdc_fp = cow_ds1.queryDict.get('trades')
trades_usdc_weth_fp = cow_ds2.queryDict.get('trades')

# trades query path that gets token a -> token b trades
trades_weth_usdc_qp = trades_weth_usdc_fp(
    first=query_size,
    orderBy='timestamp',
    orderDirection='desc',
    where = {
    'timestamp_lt': timestamp, 
    'buyAmountUsd_gt': filter_usd, 
    'sellAmountUsd_gt': filter_usd, 
    "sellToken_in": token_addr_list, 
    "buyToken_in": token_addr_list
    }
)

# trades query path that gets token b -> token a trades
trades_usdc_weth_qp = trades_usdc_weth_fp(
    first=query_size,
    orderBy='timestamp',
    orderDirection='desc',
    where = {
    'timestamp_lt': timestamp, 
    'buyAmountUsd_gt': filter_usd, 
    'sellAmountUsd_gt': filter_usd, 
    "sellToken_in": token_addr_list, 
    "buyToken_in": token_addr_list
    }
)

# run query
trades_weth_usdc_df = cow_ds1.runQuery(trades_weth_usdc_qp)
trades_usdc_weth_df = cow_ds2.runQuery(trades_usdc_weth_qp)

FIELD - trades


KeyboardInterrupt: 

In [None]:
# combine the trades queries together
trades_df = pd.concat([trades_weth_usdc_df, trades_usdc_weth_df])

In [None]:
print(f'query returned {len(trades_df)} rows')

query returned 43696 rows


In [None]:
# verify the query results to ensure that only two tokens were queried.
trades_df['trades_buyToken_id'].unique()

array(['0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2',
       '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'], dtype=object)

In [None]:
# replace addresses with symbols
trades_df['trades_buyToken_id'] = trades_df['trades_buyToken_id'].replace(weth_addr, 'WETH')
trades_df['trades_buyToken_id'] = trades_df['trades_buyToken_id'].replace(usdc_addr, 'USDC')

trades_df['trades_sellToken_id'] = trades_df['trades_sellToken_id'].replace(weth_addr, 'WETH')
trades_df['trades_sellToken_id'] = trades_df['trades_sellToken_id'].replace(usdc_addr, 'USDC')

In [None]:
trades_df.columns

Index(['trades_id', 'trades_timestamp', 'trades_gasPrice', 'trades_feeAmount',
       'trades_txHash', 'trades_settlement_id', 'trades_buyAmount',
       'trades_sellAmount', 'trades_sellToken_id', 'trades_buyToken_id',
       'trades_order_id', 'trades_buyAmountEth', 'trades_sellAmountEth',
       'trades_buyAmountUsd', 'trades_sellAmountUsd', 'endpoint'],
      dtype='object')

### Cowswap Trades-Settlement Merge

In [None]:
# do same query methood but instead of timestamps, use trades_settlement_id

# get unique trades_settlement_id list
trades_settlement_id_list = trades_df['trades_settlement_id'].unique()

# define field path
settlements_fp = cow_ds1.queryDict.get('settlements')

In [None]:
settlement_df_list = []

In [None]:
for i in range(0, len(trades_settlement_id_list)): # inner loop through cowswap timestamps
    print(f'round {i} of {len(trades_settlement_id_list)}')
    # define the query path
    settlements_qp = settlements_fp(
        first=query_size * 3,
        orderBy='firstTradeTimestamp',
        orderDirection='desc',
        where = {'txHash': trades_settlement_id_list[i]} 
        )
    
    # run query
    df = cow_ds1.runQuery(settlements_qp)

    if df.empty:
        print('empty')
        continue
    else:
        settlement_df_list.append(df)

round 0 of 20628
FIELD - settlements
round 1 of 20628
FIELD - settlements
round 2 of 20628
FIELD - settlements
round 3 of 20628
FIELD - settlements
round 4 of 20628
FIELD - settlements
round 5 of 20628
FIELD - settlements
round 6 of 20628
FIELD - settlements
round 7 of 20628
FIELD - settlements
round 8 of 20628
FIELD - settlements
round 9 of 20628
FIELD - settlements
round 10 of 20628
FIELD - settlements
round 11 of 20628
FIELD - settlements
round 12 of 20628
FIELD - settlements
round 13 of 20628
FIELD - settlements
round 14 of 20628
FIELD - settlements
round 15 of 20628
FIELD - settlements
round 16 of 20628
FIELD - settlements
round 17 of 20628
FIELD - settlements
round 18 of 20628
FIELD - settlements
round 19 of 20628
FIELD - settlements
round 20 of 20628
FIELD - settlements
round 21 of 20628
FIELD - settlements
round 22 of 20628
FIELD - settlements
round 23 of 20628
FIELD - settlements
round 24 of 20628
FIELD - settlements
round 25 of 20628
FIELD - settlements
round 26 of 20628
FIEL

In [None]:
# concatenate
settlements_df = pd.concat(settlement_df_list)

In [None]:
# enforce trades_df column types. This is necessary because the data types are not enforced by pandas dataframes. We need to enforce the types as a preprocessing step to converting to Polars
trades_df['trades_buyAmount'] = trades_df['trades_buyAmount'].astype('float64')
trades_df['trades_sellAmount'] = trades_df['trades_sellAmount'].astype('float64')
trades_df['trades_buyAmountUsd'] = trades_df['trades_buyAmountUsd'].astype('float64')
trades_df['trades_sellAmountUsd'] = trades_df['trades_sellAmountUsd'].astype('float64')
trades_df['trades_timestamp'] = trades_df['trades_timestamp'].astype('int64')
trades_df['trades_buyToken_id'] = trades_df['trades_buyToken_id'].astype('str')
trades_df['trades_sellToken_id'] = trades_df['trades_sellToken_id'].astype('str')

In [None]:
# convert dictionaries into polars dataframes
settlement_pl = pl.from_pandas(settlements_df)
trades_pl = pl.from_pandas(trades_df)

In [None]:
# merge trades and settlement dataframes on the settlement transaction hash
cow_trades_pl = trades_pl.join(other=settlement_pl, left_on='trades_settlement_id', right_on='settlements_txHash', how='inner')

In [None]:
cow_trades_pl.shape

(43696, 20)

In [None]:
cow_trades_pl.head(5)

trades_id,trades_timestamp,trades_gasPrice,trades_feeAmount,trades_txHash,trades_settlement_id,trades_buyAmount,trades_sellAmount,trades_sellToken_id,trades_buyToken_id,trades_order_id,trades_buyAmountEth,trades_sellAmountEth,trades_buyAmountUsd,trades_sellAmountUsd,endpoint,settlements_id,settlements_firstTradeTimestamp,settlements_solver_id,endpoint_right
str,i64,i64,i64,str,str,f64,f64,str,str,str,f64,f64,f64,f64,str,str,i64,str,str
"""0x2d177cbcc3e2...",1677890687,34031938581,11504283,"""0xdbeb3db4bf01...","""0xdbeb3db4bf01...",6.3506e+19,100000000000.0,"""USDC""","""WETH""","""0x2d177cbcc3e2...",63.506413,63.678801,99729.286519,100000.0,"""cow""","""0xdbeb3db4bf01...",1677890687,"""0x149d0f928233...","""cow"""
"""0x47ece80491bf...",1677887663,30096110884,39337855,"""0x8810dcd24713...","""0x8810dcd24713...",9.5578e+19,150000000000.0,"""USDC""","""WETH""","""0x47ece80491bf...",95.578018,95.778401,149686.177736,150000.0,"""cow""","""0x8810dcd24713...",1677887663,"""0x149d0f928233...","""cow"""
"""0x8c129381cf3e...",1677885323,19689355198,27018672,"""0xf8e2a0e1ae13...","""0xf8e2a0e1ae13...",9.5518e+19,150000000000.0,"""USDC""","""WETH""","""0x8c129381cf3e...",95.518272,95.806942,149548.044201,150000.0,"""cow""","""0xf8e2a0e1ae13...",1677885323,"""0xb20b86c4e6de...","""cow"""
"""0xc07f45a19d95...",1677882191,21220765432,4843587156540876,"""0x4eadf5384177...","""0x4eadf5384177...",2803000000.0,1.8e+18,"""WETH""","""USDC""","""0xc07f45a19d95...",1.798916,1.8,2803.044267,2804.733026,"""cow""","""0x4eadf5384177...",1677882191,"""0xc9ec550bea1c...","""cow"""
"""0x7159b0217959...",1677882011,21515734906,3250564090489318,"""0xfb4569f85710...","""0xfb4569f85710...",20249000000.0,1.2966e+19,"""WETH""","""USDC""","""0x7159b0217959...",12.995247,12.965727,20248.988319,20202.990692,"""cow""","""0xfb4569f85710...",1677882011,"""0x149d0f928233...","""cow"""


In [None]:
# get unique values in cow_trades_pl trades_sellToken_id column
cow_trades_pl['trades_sellToken_id'].unique()

trades_sellToken_id
str
"""USDC"""
"""WETH"""


### Cowswap Trades-Solver Merge

In [None]:
solvers = pd.read_csv('data/cowv2_solvers.csv') # load in pandas instead of polars. Having trouble replacing \ symbol in polars

In [None]:
# rename address to settlements_solver_id in pandas
solvers = solvers.rename(columns={"address": "settlements_solver_id"})

In [None]:
# NOTE - dune formats addresses as /x... need to convert '/' to '0'
solvers['settlements_solver_id'] = solvers['settlements_solver_id'].str.replace('\\', '0', regex=False)

In [None]:
# turn solvers into a dictionary
solvers_dict = solvers.to_dict('records')

# convert dict to polars
solvers_pl = pl.from_dicts(solvers_dict)

In [None]:
# inner join solvers_pl on total_settlement_tokens_pl
cow_complete_pl = cow_trades_pl.join(solvers_pl, on="settlements_solver_id", how="inner")

In [None]:
# drop endpoint_right column from total_settlement_tokens_solvers
cow_complete_pl = cow_complete_pl.drop('endpoint_right')

In [None]:
cow_complete_pl.shape

(43514, 22)

In [None]:
# save polars to parquet
cow_complete_pl.write_parquet('data/cow_complete_pl.parquet')

#### Basic Agg

In [None]:
# filter by "prod" environments
filter_df = cow_complete_pl.filter(pl.col("environment") == "prod")

In [None]:
# filter by "prod" environments
filter_df = cow_complete_pl.filter(pl.col("environment") == "prod")

In [None]:
filter_df.shape

(42310, 22)

In [None]:
# group filter_df by solver name. Check solver count
grouped_df = filter_df.groupby("name").agg(
    pl.count("trades_id").alias("total_trades")).sort("total_trades", reverse=True)


In [None]:
grouped_df

name,total_trades
str,u32
"""Otex""",6510
"""PLM""",6020
"""Gnosis_0x""",5866
"""Gnosis_1inch""",4568
"""QuasiModo""",4556
"""Legacy""",4358
"""Laertes""",1914
"""DexCowAgg""",1774
"""MIP""",1706
"""Gnosis_ParaSwa...",1504


### Uniswap V3 Swaps

In [7]:
# load parquet
cow_complete_pl = pl.read_parquet('data/cow_complete_pl.parquet')

In [8]:
# instantiate Streamer object. 
# Note - unlike the cow queries, univ3 does not require multiple streamer instantations because the swaps field path is reset each iteration. 
# If the Cow queries were updated to use the same method, we could use the same streamer object for all queries.
univ3_ds = Streamer('https://api.thegraph.com/subgraphs/name/messari/uniswap-v3-ethereum')

In [9]:
# get a query field path from the query dictionary which is automatically populated in the Streamer object
swaps_fp = univ3_ds.queryDict.get('swaps')

In [10]:
weth_usdc_list = [
    "0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640", # usdc/weth .05%
    "0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8" #usdc/weth .3%
]

In [11]:
timestamps_list = cow_complete_pl['trades_timestamp'].to_list()
# get unique values in cow_timestamps
cow_timestamps = list(set(timestamps_list))

swaps_df_list = []

NameError: name 'cow_trades_pl' is not defined

In [None]:
for lp in weth_usdc_list: # outer loop loops through liquidity pools
    for i in range(0, len(cow_timestamps)): # inner loop through cowswap timestamps
        print(f'round {i} of {len(cow_timestamps)}, timestamp: {cow_timestamps[i]}')
        # define the query path
        swaps_qp = swaps_fp(
            first=10,
            orderBy='timestamp',
            orderDirection='desc',
            where = {'timestamp_lt': cow_timestamps[i], 'amountInUSD_gt': filter_usd, 'amountOutUSD_gt': filter_usd, 'pool': lp} 
            )

        # run query
        df = univ3_ds.runQuery(swaps_qp)

        if df.empty:
            print('empty')
            continue
        else:
            swaps_df_list.append(df)

round 0 of 20603, timestamp: 1648623621
FIELD - swaps
round 1 of 20603, timestamp: 1652555791
FIELD - swaps
round 2 of 20603, timestamp: 1634992143
FIELD - swaps
round 3 of 20603, timestamp: 1674575891
FIELD - swaps
round 4 of 20603, timestamp: 1664221211
FIELD - swaps
round 5 of 20603, timestamp: 1636565026
FIELD - swaps
round 6 of 20603, timestamp: 1633681442
FIELD - swaps
round 7 of 20603, timestamp: 1673658407
FIELD - swaps
round 8 of 20603, timestamp: 1661468720
FIELD - swaps
round 9 of 20603, timestamp: 1638006837
FIELD - swaps
round 10 of 20603, timestamp: 1636696118
FIELD - swaps
round 11 of 20603, timestamp: 1636565047
FIELD - swaps
round 12 of 20603, timestamp: 1636433974
FIELD - swaps
round 13 of 20603, timestamp: 1640366138
FIELD - swaps
round 14 of 20603, timestamp: 1661993031
FIELD - swaps
round 15 of 20603, timestamp: 1669988435
FIELD - swaps
round 16 of 20603, timestamp: 1639317593
FIELD - swaps
round 17 of 20603, timestamp: 1669070951
FIELD - swaps
round 18 of 20603, t

PaginationError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [None]:
# concat swaps_df_list into a single dataframe.
swaps_df = pd.concat(swaps_df_list)

In [None]:
swaps_df.shape

In [None]:
# print min and max swaps_timestamp
print(swaps_df['swaps_timestamp'].min())
print(swaps_df['swaps_timestamp'].max())

In [None]:
# replace the pool addresses with LP pool names with fees
swaps_df['swaps_pool_id'] = swaps_df['swaps_pool_id'].replace(weth_usdc_list[0], 'USDC_WETH .05%')
swaps_df['swaps_pool_id'] = swaps_df['swaps_pool_id'].replace(weth_usdc_list[1], 'USDC_WETH .3%')

# replace token addresses with symbols
swaps_df['swaps_tokenIn_id'] = swaps_df['swaps_tokenIn_id'].replace(usdc_addr, 'USDC')
swaps_df['swaps_tokenIn_id'] = swaps_df['swaps_tokenIn_id'].replace(weth_addr, 'WETH')
swaps_df['swaps_tokenOut_id'] = swaps_df['swaps_tokenOut_id'].replace(usdc_addr, 'USDC')
swaps_df['swaps_tokenOut_id'] = swaps_df['swaps_tokenOut_id'].replace(weth_addr, 'WETH')

In [None]:
print(f'query returned {len(swaps_df)} rows\n swaps_df columns are {swaps_df.columns}')

In [12]:
# save polars to parquet
swaps_df.write_parquet('data/swaps_df.parquet')

NameError: name 'swaps_df' is not defined

### Get the Uniswap Gas Data

In [13]:
univ3_no_messari_ds = Streamer('https://api.thegraph.com/subgraphs/name/uniswap/uniswap-v3')

In [14]:
transactions_fp = univ3_no_messari_ds.queryDict.get('transactions')

In [15]:
transactions_df_list = []

In [16]:
for lp in weth_usdc_list: # outer loop loops through liquidity pools
    for i in range(0, len(cow_timestamps)): # inner loop through cowswap timestamps
        print(f'round {i} of {len(cow_timestamps)}, timestamp: {cow_timestamps[i]}')
        # define the query path
        txs_qp = transactions_fp(
            first=10,
            orderBy='timestamp',
            orderDirection='desc',
            where = {"swaps_": {'timestamp': cow_timestamps[i], 'pool': lp}}
            )
        
        # run query
        df = univ3_no_messari_ds.runQuery(txs_qp)

        if df.empty:
            print('empty')
            continue
        else:
            transactions_df_list.append(df)

NameError: name 'cow_timestamps' is not defined

In [None]:
# concat transactions_df_list
transactions_df = pd.concat(transactions_df_list)

In [None]:
# save polars to parquet
transactions_df.write_parquet('data/transactions_df.parquet')

In [None]:
transactions_df.shape

### Everything is checkpointed, if it fails again I can just reload the parquet files and run from here

In [None]:
# get dataframe row types swaps_df
swaps_df.dtypes

In [None]:
# enforce swaps_df column types. This is necessary because the data types are not enforced by pandas dataframes. We need to enforce the types as a preprocessing step to converting to Polars
swaps_df['swaps_gasLimit'] = swaps_df['swaps_gasLimit'].astype('float64')
swaps_df['swaps_gasUsed'] = swaps_df['swaps_gasUsed'].astype('float64')
swaps_df['swaps_gasPrice'] = swaps_df['swaps_gasPrice'].astype('float64')
swaps_df['swaps_amountIn'] = swaps_df['swaps_amountIn'].astype('float64')
swaps_df['swaps_amountInUSD'] = swaps_df['swaps_amountInUSD'].astype('float64')
swaps_df['swaps_amountOut'] = swaps_df['swaps_amountOut'].astype('float64')
swaps_df['swaps_amountOutUSD'] = swaps_df['swaps_amountOutUSD'].astype('float64')
swaps_df['swaps_blockNumber'] = swaps_df['swaps_blockNumber'].astype('int64')
swaps_df['swaps_timestamp'] = swaps_df['swaps_timestamp'].astype('int64')

In [None]:
# convert swaps, transactions, and cowswap dataframes to polars
swaps_pl = pl.from_pandas(swaps_df)
transactions_pl = pl.from_pandas(transactions_df)
# recall cow_trades_pl was already created earlier

### Merge Swaps and Transactions for Gas

In [None]:
# merge swaps_pl and transactions_pl polars
uni_complete_pl = swaps_pl.join(transactions_pl, left_on="swaps_hash", right_on="transactions_id", how="inner")

In [None]:
# drop duplicate rows
uni_complete_pl = uni_complete_pl.unique()

In [None]:
# drop endpoint column from uni_complete_pl
uni_complete_pl = uni_complete_pl.drop('endpoint')

In [None]:
# get median transactions_gasUsed amount. Typical V3 swap is 127k gas for One-hop. However with multiple hops, gas will be higher. 352 reflects an avg of 3 hops worth of gas
tx_gas_median = uni_complete_pl['transactions_gasUsed'].median()
print(f'transaction gas median is {tx_gas_median}')

In [None]:
# to get transaction gas used, we do gasUsed * gasPrice
uni_complete_pl = uni_complete_pl.with_columns([
    (127000 * pl.col("transactions_gasPrice")).alias('transaction_gas_fee_one_hop'),
    (tx_gas_median * pl.col("transactions_gasPrice")).alias('transaction_gas_fee_median')
    ])

In [None]:
uni_complete_pl = uni_complete_pl.with_columns([
    (pl.col("transaction_gas_fee_one_hop") / 10**18).alias('transaction_gas_fee_one_hop'), # wei is 10^9, but eth is 10^18
        (pl.col("transaction_gas_fee_median") / 10**18).alias('transaction_gas_fee_median') # wei is 10^9, but eth is 10^18
])

In [None]:
# sort by largest transaction_gas_fee
uni_complete_pl.sort("transaction_gas_fee_one_hop", reverse=False).head(5)

### Merge Cow and Univ3

In [None]:
# merge trades and swaps on timestamp value. We use outer join because we want to keep all trades and swaps data and backfill swap values
cow_uni_outer_pl = cow_complete_pl.join(other=uni_complete_pl, left_on='trades_timestamp', right_on='swaps_timestamp', how='outer')

In [None]:
cow_uni_outer_pl.columns

In [None]:
# This truncated dataframe isn't being used right now
cow_uni_trunc_pl = cow_uni_outer_pl[[
    'trades_timestamp', 
    'trades_txHash',
    'trades_feeAmount',
    'trades_sellToken_id', 
    'trades_buyToken_id', 
    'trades_buyAmount',
    'trades_sellAmount',
    # 'trades_sellAmountUsd', 
    # 'trades_buyAmountUsd', 
    'name',
    'environment',
    'swaps_pool_id', 
    'swaps_tokenIn_id', 
    'swaps_tokenOut_id',
    'swaps_amountIn',
    'swaps_amountOut',  
    # 'swaps_amountInUSD',
    # 'swaps_amountOutUSD',
    'swaps_blockNumber',
    # 'transactions_timestamp',
    # 'transactions_gasUsed',
    # 'transactions_gasPrice',
    'transaction_gas_fee_one_hop',
    'transaction_gas_fee_median'
    ]]

In [None]:
#check pl dataframe size
cow_uni_trunc_pl.shape

In [None]:
# sort by largest transaction_gas_fee
cow_uni_trunc_pl.sort("transaction_gas_fee_one_hop", reverse=True).head(5)

### Chainlink

In [None]:
# load streamer class
chain_ds = Streamer('https://api.thegraph.com/subgraphs/name/openpredict/chainlink-prices-subgraph')

In [None]:
chain_price_feed = "ETH/USD"

In [None]:
chain_dfs_list = []

In [None]:
# get a query field path from the query dictionary which is automatically populated in the Streamer object
chain_fp = chain_ds.queryDict.get('prices')

# add parameters to the settlements_qp.
chain_qp = chain_fp(
    first=query_size * 5,
    orderBy='timestamp',
    orderDirection='desc',
    where = {'timestamp_lt': timestamp, 'assetPair': chain_price_feed}
    )

# run query
chain_df = chain_ds.runQuery(chain_qp)

In [None]:
# drop prices_id, endpoint. 
chain_df = chain_df.drop(['prices_id', 'endpoint'], axis=1)
# divide prices_price by 1e8 to get the price in USD
chain_df['prices_price'] = chain_df['prices_price'] / 10 ** 8

In [None]:
chain_df.shape

In [None]:
chain_pl = pl.from_pandas(chain_df)

In [None]:
# outer merge chain_df with cow_uni_outer_pl on timestamp
cow_uni_chain_outer_pl = cow_uni_trunc_pl.join(other=chain_pl, left_on='trades_timestamp', right_on='prices_timestamp', how='outer')

In [None]:
cow_uni_chain_outer_pl.shape

### Price Calculations

#### Decimal Calculations

In [None]:
# add decimals to cow trades sell tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('trades_sellToken_id'),
        (
            pl.when(pl.col('trades_sellToken_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('trades_sellToken_decimals'),
    ]
)

# add decimals to cow trades buy tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('trades_buyToken_id'),
        (
            pl.when(pl.col('trades_buyToken_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('trades_buyToken_decimals'),
    ]
)

# add decimals to cow trades sell tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('swaps_tokenIn_id'),
        (
            pl.when(pl.col('swaps_tokenIn_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('swaps_tokenIn_decimals'),
    ]
)

# add decimals to cow trades buy tokens
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns(
    [
        pl.col('swaps_tokenOut_id'),
        (
            pl.when(pl.col('swaps_tokenOut_id') == 'WETH')
            .then(18)
            .otherwise(6)
            .cast(pl.UInt8)
        ).alias('swaps_tokenOut_decimals'),
    ]
)

In [None]:
cow_uni_chain_outer_pl.columns # NOTE - the DF has transaction cols

#### Execution Price Calculations

In [None]:
# NOTE - Polars can perform these calculations in-column. This means it can convert the values in place without creating a new column. The new column created here is more verbose, but is a good sanity check to see before/after results.
trades_swaps_converted_pl = cow_uni_chain_outer_pl.with_columns([
    (pl.col("trades_buyAmount") / (10**pl.col("trades_buyToken_decimals"))).alias('trades_buyAmount_converted'),
    (pl.col("trades_sellAmount") / (10**pl.col("trades_sellToken_decimals"))).alias('trades_sellAmount_converted'),
    (pl.col("swaps_amountIn") / (10**pl.col("swaps_tokenIn_decimals"))).alias('swaps_amountIn_converted'),
    (pl.col("swaps_amountOut") / (10**pl.col("swaps_tokenOut_decimals"))).alias('swaps_amountOut_converted'),
])

In [None]:
trades_swaps_converted_trunc_pl = trades_swaps_converted_pl.with_columns([
    (pl.col("trades_buyAmount_converted") / pl.col("trades_sellAmount_converted")).alias('trades_buy_sell_ratio'),
    (pl.col("trades_sellAmount_converted") / pl.col("trades_buyAmount_converted")).alias('trades_sell_buy_ratio'),
    (pl.col("swaps_amountIn_converted") / pl.col("swaps_amountOut_converted")).alias('swaps_amountIn_amountOut_ratio'),
    (pl.col("swaps_amountOut_converted") / pl.col("swaps_amountIn_converted")).alias('swaps_amountOut_amountIn_ratio'),
])

In [None]:
trades_swaps_converted_trunc_pl = trades_swaps_converted_trunc_pl[
    'trades_timestamp',
    'swaps_blockNumber',
    'trades_txHash',
    'trades_feeAmount',
    'trades_sellToken_id',
    'trades_buyToken_id',
    'trades_sellAmount_converted',
    'trades_buyAmount_converted',
    'name',
    'environment',
    'swaps_pool_id',
    'swaps_tokenIn_id',
    'swaps_tokenOut_id',
    'swaps_amountIn_converted',
    'swaps_amountOut_converted',
    'transaction_gas_fee_one_hop',
    'transaction_gas_fee_median',
    'trades_buy_sell_ratio',
    'trades_sell_buy_ratio',
    'swaps_amountIn_amountOut_ratio',
    'swaps_amountOut_amountIn_ratio',
    'prices_assetPair_id',
    'prices_price'
]

In [None]:
trades_swaps_converted_trunc_pl.shape

In [None]:
trades_swaps_converted_trunc_pl.head(10)

### Save Data to a local parquet file

In [None]:
# checkpoint, save to parquet
trades_swaps_converted_trunc_pl.write_parquet('data/cow_uni_chain_outer_pl.parquet')