# Cow + Univ3 DataPipeline

### Setup Jupyter Environment

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import polars as pl

# These commands enlarge the column size of the dataframe so things like 0x... are not truncated
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

In [None]:
# load parquet file into polars dataframe polars dataframe
cow_uni_chain_outer_pl = pl.read_parquet('data/cow_uni_chain_outer_pl_historical.parquet')

In [None]:
cow_uni_chain_outer_pl.shape

In [None]:
# Replace null values with 0. In this case we are using the string "0"
# trades_swaps_converted_trunc_pl = cow_uni_chain_outer_pl.fill_null("0")

In [None]:
# return the larger value between trades_buy_sell_ratio and trades_sell_buy_ratio in a lambda function. This serves as a column equalization function for trades_buy_sell_ratio and trades_sell_buy_ratio
execution_prices_pl = cow_uni_chain_outer_pl.with_columns([
    (pl.col("trades_buy_sell_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('trades_buy_sell_ratio'),
    (pl.col("trades_sell_buy_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('trades_sell_buy_ratio'),
    (pl.col("swaps_amountIn_amountOut_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('swaps_amountIn_amountOut_ratio'),
    (pl.col("swaps_amountOut_amountIn_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('swaps_amountOut_amountIn_ratio'),
])

In [None]:
# add columns that tell us the trade directionality for the trades and swaps. 
# we label the trade direction WETH -> USDC when WETH is in the CoW buy column and the UniV3 amountOut column. 
# Conversely, the USDC -> WETH direction is when USDC is in the CoW sell column and the Univ3 amountIn column.
execution_prices_pl = execution_prices_pl.with_columns([
    (pl.col("trades_buyToken_id").apply(lambda x: True if x == 'WETH' else False)).alias('trades_buy_WETH'),
    (pl.col("swaps_tokenOut_id").apply(lambda x: True if x == 'WETH' else False)).alias('swaps_buy_WETH'),
])

In [None]:
execution_prices_pl = execution_prices_pl.with_columns([
    (pl.col("trades_buy_WETH") & pl.col("swaps_buy_WETH")).alias('buy_WETH'), # buy WETH
    (~pl.col("trades_buy_WETH") & ~pl.col("swaps_buy_WETH")).alias('sell_WETH'), # sell WETH
])


### Plotting

In [None]:
# convert execution_prices_pl to pandas dataframe for easier plotting.
execution_prices_pd = execution_prices_pl.to_pandas()

# set trades_timestamp as index and order by trades_timestamp
execution_prices_pd = execution_prices_pd.set_index('trades_timestamp').sort_index()

# create a new dataframe with a unique integer index. make the current timestamp index a column
reindexed_df = execution_prices_pd.reset_index()

In [None]:
# reindex execution_prices_pd. We take the mean over multiple timestamps of the same value.
# reindexed_df = execution_prices_pd.groupby(execution_prices_pd.index).mean(numeric_only=True)

In [None]:
# # The reindex value converts booleans into 0s and 1s. Thus we convert buy_WETH 0 values to False boolean
# reindexed_df['buy_WETH'] = reindexed_df['buy_WETH'].apply(lambda x: False if x == 0 else True)

# # do same for WETH sel
# reindexed_df['sell_WETH'] = reindexed_df['sell_WETH'].apply(lambda x: False if x == 0 else True)

In [None]:
reindexed_price_df = reindexed_df[[
    'timestamp',                            # timestamp
    'trades_sell_buy_ratio',                # cow price
    'swaps_amountOut_amountIn_ratio',       # univ3 price
    'prices_price',                         # chain price
    'transaction_gas_fee_one_hop',          # univ3 single hop swap
    'transaction_gas_fee_median',           # univ3 median swap transaction cost
    'buy_WETH'                              # trade direction.
    ]]

# rename columns
reindexed_price_df.columns = [
    'timestamp',                            # timestamp
    'cow_price',                            # cow price
    'univ3_price',                          # univ3 price
    'chain_price',                          # chain price
    'transaction_gas_fee_one_hop',          # univ3 single hop swap
    'transaction_gas_fee_median',           # univ3 median swap transaction cost
    'buy_WETH'                              # trade direction.
]

In [None]:
# forward fill reindexed_price_df
reindexed_price_df = reindexed_price_df.fillna(method='ffill')

In [None]:
# convert transaction_gas_fee into usd amount
reindexed_price_df['transaction_gas_fee_one_hop_usd'] = reindexed_price_df['transaction_gas_fee_one_hop'] * reindexed_price_df['univ3_price']
reindexed_price_df['transaction_gas_fee_median_usd'] = reindexed_price_df['transaction_gas_fee_median'] * reindexed_price_df['univ3_price']

In [None]:
# filter trade directions
buy_WETH_df = reindexed_price_df[reindexed_price_df['buy_WETH'] == True]
sell_WETH_df = reindexed_price_df[reindexed_price_df['buy_WETH'] == False]

In [None]:
# single hop gas adjusted prices
buy_WETH_df.loc[:, 'univ3_gas_single_hop_price'] = buy_WETH_df.loc[:, 'univ3_price'] + buy_WETH_df.loc[:, 'transaction_gas_fee_one_hop_usd']
sell_WETH_df.loc[:, 'univ3_gas_single_hop_price'] = sell_WETH_df.loc[:, 'univ3_price'] - sell_WETH_df.loc[:, 'transaction_gas_fee_one_hop_usd']

# add new price values that add the gas cost
buy_WETH_df.loc[:, 'univ3_gas_median_price'] = buy_WETH_df.loc[:, 'univ3_price'] + buy_WETH_df.loc[:, 'transaction_gas_fee_median_usd']
sell_WETH_df.loc[:, 'univ3_gas_median_price'] = sell_WETH_df.loc[:, 'univ3_price'] - sell_WETH_df.loc[:, 'transaction_gas_fee_median_usd']

In [None]:
# do a heuristic filter to remove some incorectly calculated outlier data. If transaction_gas_fee_one_hop_usd is greater than 250, then remove it
buy_WETH_df = buy_WETH_df[buy_WETH_df['transaction_gas_fee_median_usd'] < 200]
sell_WETH_df = sell_WETH_df[sell_WETH_df['transaction_gas_fee_median_usd'] < 200]

In [None]:
sell_WETH_df

In [None]:
# convert index to datetime format
sell_WETH_df.timestamp = pd.to_datetime(sell_WETH_df.timestamp, utc=True, unit='s')
buy_WETH_df.timestamp = pd.to_datetime(buy_WETH_df.timestamp, utc=True, unit='s')

In [None]:
buy_WETH_df

In [None]:
STOP

In [None]:
# first non nan value in sell_WETH_df['cow_price']
first_valid_index_buy = sell_WETH_df['cow_price'].first_valid_index()
print(f'first valid index is: {first_valid_index_buy}')

In [None]:
# plot cow price vs univ3 and chain gas adjusted prices against timestamp
# buy_WETH_df.loc[first_valid_index_buy:][['cow_price', 'univ3_gas_median_price', 'chain_price']].plot(figsize=(10,5))
# plt.title("WETH Buy Surplus Discount (median gas fee adj.)")
# plt.xlabel('date')
# plt.ylabel('WETH Price')
# plt.show();

In [None]:
# first non nan value in sell_WETH_df['cow_price']
first_valid_index_sell = sell_WETH_df['cow_price'].first_valid_index()
print(f'first valid index is: {first_valid_index_sell}')

In [None]:
# plot cow price vs univ3 and chain gas adjusted prices after first valid index 1676258543
sell_WETH_df.loc[first_valid_index_sell:][['cow_price', 'univ3_gas_median_price', 'chain_price']].plot(figsize=(10,5))
plt.title("WETH Sell Surplus Premium (median gas fee adj.)")
plt.xlabel('date')
plt.ylabel('WETH Price')
plt.show();

### Price Differences

In [None]:
# get the price price difference percent between cow and univ3 gas adjusted price in buy_WETH_df
buy_WETH_df['cow_univ3_gas_adj_price_diff_percent_single'] = (buy_WETH_df['cow_price'] - buy_WETH_df['univ3_gas_single_hop_price']) / buy_WETH_df['cow_price'] * 100
buy_WETH_df['cow_univ3_gas_adj_price_diff_percent_median'] = (buy_WETH_df['cow_price'] - buy_WETH_df['univ3_gas_median_price']) / buy_WETH_df['cow_price'] * 100

In [None]:
# plot scatterplot
buy_WETH_df.loc[first_valid_index_buy:].plot(figsize=(10,5), kind='scatter', x='cow_univ3_gas_adj_price_diff_percent_single', y='cow_price')

plt.xlabel("CoW Surplus Discount Percent vs Univ3")
plt.ylabel("WETH Price")
plt.title("WETH Buy Surplus Discount (single hop gas adj.)")
plt.show();

In [None]:
# plot scatterplot
buy_WETH_df.loc[first_valid_index_buy:].plot(figsize=(10,5), kind='scatter', x='cow_univ3_gas_adj_price_diff_percent_median', y='cow_price')

plt.xlabel("CoW Surplus Discount Percent vs Univ3")
plt.ylabel("WETH Price")
plt.title("WETH Buy Surplus Discount (median gas fee adj.)")
plt.show();

In [None]:
# get the price price difference percent between cow and univ3 gas adjusted price in sell_WETH_df
sell_WETH_df['cow_univ3_gas_adj_price_diff_percent_single'] = (sell_WETH_df['cow_price'] - sell_WETH_df['univ3_gas_single_hop_price']) / sell_WETH_df['cow_price'] * 100
sell_WETH_df['cow_univ3_gas_adj_price_diff_percent_median'] = (sell_WETH_df['cow_price'] - sell_WETH_df['univ3_gas_median_price']) / sell_WETH_df['cow_price'] * 100

In [None]:
# plot scatterplot
sell_WETH_df.loc[first_valid_index_sell:].plot(figsize=(10,5), kind='scatter', x='cow_univ3_gas_adj_price_diff_percent_single', y='cow_price')


plt.xlabel("CoW Surplus Premium Percent vs Univ3")
plt.ylabel("WETH Price")
plt.title("WETH Sell Surplus Premium (single hop gas adj.)")
plt.show();

In [None]:
# plot scatterplot
sell_WETH_df.loc[first_valid_index_sell:].plot(figsize=(10,5), kind='scatter', x='cow_univ3_gas_adj_price_diff_percent_median', y='cow_price')

plt.xlabel("CoW Surplus Premium Percent vs Univ3")
plt.ylabel("WETH Price")
plt.title("WETH Sell Surplus Premium (median gas fee adj.)")
plt.show();

In [None]:
# statistics for buy_WETH_df single hop price difference
buy_WETH_df[['cow_univ3_gas_adj_price_diff_percent_single', 'cow_univ3_gas_adj_price_diff_percent_median']].describe()

In [None]:
sell_WETH_df[['cow_univ3_gas_adj_price_diff_percent_single', 'cow_univ3_gas_adj_price_diff_percent_median']].describe()