In [None]:
# IF JUPYTER ISN'T COOPERATING, COMMENT THIS CODE BLOCK OUT
# Turn off autosaving of notebook. Use this to disable Jupyter notebook's default way of choosing Python executables
%autosave 0

# Print environment information
import sysconfig
for k,v in sysconfig.get_config_vars().items():
    print(f"{k:26} {v}")

In [None]:
from datetime import datetime
from sim.pool import AMMPoolHistory, CPMM, AMMWithBaulking, OraclePool, RebalancingPool
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np

import pandas as pd
import polars as pl

# These commands enlarge the column size of the dataframe so things like 0x... are not truncated
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

# Data Analysis of Prices

### Polars Preprocessing

In [None]:
# load parquet file into polars dataframe
cow_uni_chain_outer_pl = pl.read_parquet('data/cow_uni_chain_outer_pl_historical.parquet')

In [None]:
# NOTE CAREFUL! This command was done in data_pipeline_final but appears to have reverted. Apply decimal conversion to chainlink again
cow_uni_chain_outer_pl = cow_uni_chain_outer_pl.with_columns([
    (pl.col("prices_price")/ 10 ** 8).alias('prices_price')
])

In [None]:
cow_uni_chain_outer_pl.shape

In [None]:
cow_uni_chain_outer_pl.select([
    (pl.col("swaps_tokenOut_id")).value_counts(),
    (pl.col("trades_buyToken_id")).value_counts(),
]).rows()

In [None]:
# return the larger value between trades_buy_sell_ratio and trades_sell_buy_ratio in a lambda function. This serves as a column equalization function for trades_buy_sell_ratio and trades_sell_buy_ratio
execution_prices_pl = cow_uni_chain_outer_pl.with_columns([
    (pl.col("trades_buy_sell_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('trades_buy_sell_ratio'),
    (pl.col("trades_sell_buy_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('trades_sell_buy_ratio'),
    (pl.col("swaps_amountIn_amountOut_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('swaps_amountIn_amountOut_ratio'),
    (pl.col("swaps_amountOut_amountIn_ratio").apply(lambda x: x if x > 1 else 1/x)).alias('swaps_amountOut_amountIn_ratio'),
])

In [None]:
# Add trade directionality flag.
# we label the trade direction WETH -> USDC when WETH is in the CoW buy column and the UniV3 amountOut column. 
# Conversely, the USDC -> WETH direction is when USDC is in the CoW sell column and the Univ3 amountIn column.
execution_prices_pl = execution_prices_pl.with_columns([
    (pl.col("trades_buyToken_id").apply(lambda x: True if x == 'WETH' else False)).alias('trades_buy_WETH'),
    (pl.col("swaps_tokenOut_id").apply(lambda x: True if x == 'WETH' else False)).alias('swaps_buy_WETH'),
])

In [None]:
# match cow trades with univ3 swaps
execution_prices_pl = execution_prices_pl.with_columns([
    (pl.col("trades_buy_WETH") & pl.col("swaps_buy_WETH")).alias('buy_buy_WETH'), # cow buy/uni buy WETH
    (~pl.col("trades_buy_WETH") & ~pl.col("swaps_buy_WETH")).alias('sell_sell_WETH'), # cow sell/uni sell WETH
    (pl.col("trades_buy_WETH") & ~pl.col("swaps_buy_WETH")).alias('buy_sell_WETH'), # cow buy/uni sel WETH
    (~pl.col("trades_buy_WETH") & pl.col("swaps_buy_WETH")).alias('sell_buy_WETH'), # cow sell/uni buy WETH
])

### Pandas Preprocessing

In [None]:
# convert execution_prices_pl to pandas dataframe for easier plotting.
execution_prices_pd = execution_prices_pl.to_pandas()

# set trades_timestamp as index and order by trades_timestamp
execution_prices_pd = execution_prices_pd.set_index('trades_timestamp').sort_index()

# create a new dataframe with a unique integer index. make the current timestamp index a column
reindexed_df = execution_prices_pd.reset_index()

In [None]:
# select columns to keep
reindexed_price_df = reindexed_df[[
    'trades_timestamp',                     # timestamp
    'trades_sell_buy_ratio',                # cow price
    'swaps_amountOut_amountIn_ratio',       # univ3 price
    'prices_price',                         # chain price
    'transaction_gas_fee_one_hop',          # univ3 single hop swap
    'transaction_gas_fee_median',           # univ3 median swap transaction cost
    'buy_buy_WETH',                         # cow/uni buy direction
    'sell_sell_WETH',                       # cow/uni sell direction
    'buy_sell_WETH',                        # cow buy/uni sell direction
    'sell_buy_WETH'                         # cow sell/uni buy direction
    ]]

# rename columns
reindexed_price_df.columns = [
    'timestamp',                            # timestamp
    'cow_price',                            # cow price
    'univ3_price',                          # univ3 price
    'chain_price',                          # chain price
    'transaction_gas_fee_one_hop',          # univ3 single hop swap
    'transaction_gas_fee_median',           # univ3 median swap transaction cost
    'buy_buy_WETH',                         # trade direction
    'sell_sell_WETH',                       # trade direction
    'buy_sell_WETH',                        # trade direction
    'sell_buy_WETH'                         # trade direction
]

In [None]:
# forward fill reindexed_price_df
reindexed_price_df = reindexed_price_df.fillna(method='ffill')

In [None]:
reindexed_price_df.shape

In [None]:
# convert transaction_gas_fee into usd amount
reindexed_price_df['transaction_gas_fee_one_hop_usd'] = reindexed_price_df['transaction_gas_fee_one_hop'] * reindexed_price_df['univ3_price']
reindexed_price_df['transaction_gas_fee_median_usd'] = reindexed_price_df['transaction_gas_fee_median'] * reindexed_price_df['univ3_price']

In [None]:
reindexed_price_df['univ3_price_adjusted'] = reindexed_price_df['univ3_price'] + reindexed_price_df['transaction_gas_fee_one_hop_usd']

In [None]:
# calculate mean deviation between cow and chain prices for each row
reindexed_price_df['cow_chain_diff'] = np.abs((reindexed_price_df['chain_price'] - reindexed_price_df['cow_price']) / reindexed_price_df['chain_price']) * 100

# calculate mean deviation between cow and chain prices for each row
reindexed_price_df['univ3_chain_diff'] = np.abs((reindexed_price_df['chain_price'] - reindexed_price_df['univ3_price_adjusted']) / reindexed_price_df['chain_price']) * 100

In [None]:
reindexed_price_df[['chain_price', 'cow_chain_diff', 'univ3_chain_diff']].quantile([.1, 0.25, 0.5, 0.75, .9], interpolation='nearest')

In [None]:
# filter trade directions. There are 4 total cases:
# cow buy/uni buy WETH
# cow sell/uni sell WETH
# cow buy/uni sell WETH
# cow sell/uni buy WETH
buy_buy_WETH_reindex_df = reindexed_price_df[reindexed_price_df['buy_buy_WETH'] == True]
sell_sell_WETH_reindex_df = reindexed_price_df[reindexed_price_df['sell_sell_WETH'] == True]
buy_sell_WETH_reindex_df = reindexed_price_df[reindexed_price_df['buy_sell_WETH'] == True]
sell_buy_WETH_reindex_df = reindexed_price_df[reindexed_price_df['sell_buy_WETH'] == True]

In [None]:
# print f the df shapes. Note that the dataframes have grown in size due to the forward fill method above.
print(buy_buy_WETH_reindex_df.shape)
print(sell_sell_WETH_reindex_df.shape)
print(buy_sell_WETH_reindex_df.shape)
print(sell_buy_WETH_reindex_df.shape)

In [None]:
# get all values below the 90th percentile
buy_buy_WETH_df = buy_buy_WETH_reindex_df[buy_buy_WETH_reindex_df['transaction_gas_fee_one_hop_usd'] < buy_buy_WETH_reindex_df['transaction_gas_fee_one_hop_usd'].quantile(0.90)]
sell_sell_WETH_df = sell_sell_WETH_reindex_df[sell_sell_WETH_reindex_df['transaction_gas_fee_one_hop_usd'] < sell_sell_WETH_reindex_df['transaction_gas_fee_one_hop_usd'].quantile(0.90)]
buy_sell_WETH_df = buy_sell_WETH_reindex_df[buy_sell_WETH_reindex_df['transaction_gas_fee_one_hop_usd'] < buy_sell_WETH_reindex_df['transaction_gas_fee_one_hop_usd'].quantile(0.90)]
sell_buy_WETH_df = sell_buy_WETH_reindex_df[sell_buy_WETH_reindex_df['transaction_gas_fee_one_hop_usd'] < sell_buy_WETH_reindex_df['transaction_gas_fee_one_hop_usd'].quantile(0.90)]

In [None]:
print(f'cow and uni buy count: {buy_buy_WETH_df.shape}')
print(f'cow and uni sell count: {sell_sell_WETH_df.shape}')
print(f'cow buy and uni sell count: {buy_sell_WETH_df.shape}')
print(f'uni sell and cow buy count: {sell_buy_WETH_df.shape}')

In [None]:
# single hop gas adjusted prices
buy_buy_WETH_df.loc[:, 'univ3_gas_single_hop_price'] = buy_buy_WETH_df.loc[:, 'univ3_price'] + buy_buy_WETH_df.loc[:, 'transaction_gas_fee_one_hop_usd']
sell_sell_WETH_df.loc[:, 'univ3_gas_single_hop_price'] = sell_sell_WETH_df.loc[:, 'univ3_price'] - sell_sell_WETH_df.loc[:, 'transaction_gas_fee_one_hop_usd']
buy_sell_WETH_df.loc[:, 'univ3_gas_single_hop_price'] = buy_sell_WETH_df.loc[:, 'univ3_price'] - buy_sell_WETH_df.loc[:, 'transaction_gas_fee_one_hop_usd']
sell_buy_WETH_df.loc[:, 'univ3_gas_single_hop_price'] = sell_buy_WETH_df.loc[:, 'univ3_price'] + sell_buy_WETH_df.loc[:, 'transaction_gas_fee_one_hop_usd']

# add new price values that add the gas cost
buy_buy_WETH_df.loc[:, 'univ3_gas_median_price'] = buy_buy_WETH_df.loc[:, 'univ3_price'] + buy_buy_WETH_df.loc[:, 'transaction_gas_fee_median_usd']
sell_sell_WETH_df.loc[:, 'univ3_gas_median_price'] = sell_sell_WETH_df.loc[:, 'univ3_price'] - sell_sell_WETH_df.loc[:, 'transaction_gas_fee_median_usd']
buy_sell_WETH_df.loc[:, 'univ3_gas_median_price'] = buy_sell_WETH_df.loc[:, 'univ3_price'] - buy_sell_WETH_df.loc[:, 'transaction_gas_fee_median_usd']
sell_buy_WETH_df.loc[:, 'univ3_gas_median_price'] = sell_buy_WETH_df.loc[:, 'univ3_price'] + sell_buy_WETH_df.loc[:, 'transaction_gas_fee_median_usd']

In [None]:
# convert index to datetime format
sell_sell_WETH_df.loc[:, 'timestamp'] = pd.to_datetime(sell_sell_WETH_df.timestamp, utc=True, unit='s')
buy_buy_WETH_df.loc[:, 'timestamp'] = pd.to_datetime(buy_buy_WETH_df.timestamp, utc=True, unit='s')
buy_sell_WETH_df.loc[:, 'timestamp'] = pd.to_datetime(buy_sell_WETH_df.timestamp, utc=True, unit='s')
sell_buy_WETH_df.loc[:, 'timestamp'] = pd.to_datetime(sell_buy_WETH_df.timestamp, utc=True, unit='s')

### Price Charts

In [None]:
# define color scheme
cow_color = '#0000b3'
univ3_color = '#993366'
chain_color = '#004d00'

In [None]:
# first date in the dataframe
graph_chart_date = sell_sell_WETH_df.iloc[-1300].timestamp
graph_chart_date

In [None]:
# plot prices on y axis and timestamp on x axis
fig1, ax = plt.subplots(figsize=(10, 5))
ax.step(
    sell_sell_WETH_df[sell_sell_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    sell_sell_WETH_df[sell_sell_WETH_df['timestamp'] > graph_chart_date]['cow_price'],
    label='cow_price',color=cow_color, linewidth=.75)
ax.step(
    sell_sell_WETH_df[sell_sell_WETH_df['timestamp'] > graph_chart_date]['timestamp'], 
    sell_sell_WETH_df[sell_sell_WETH_df['timestamp'] > graph_chart_date]['univ3_gas_single_hop_price'], '--',
    label='univ3_gas_single_hop_price', color=univ3_color, alpha=.6)
ax.step(
    sell_sell_WETH_df[sell_sell_WETH_df['timestamp'] > graph_chart_date]['timestamp'], 
    sell_sell_WETH_df[sell_sell_WETH_df['timestamp'] > graph_chart_date]['chain_price'], '--',
    label='chain_price', color=chain_color, alpha=.6)

plt.xlabel('Date')
plt.ylabel('WETH Price')
plt.legend()
plt.show();

# save figure
fig1.savefig('../report/diagrams/weth_sell_line.eps', format='eps', bbox_inches='tight')
#fig1.savefig('../report/diagrams/weth_sell_line.pdf', format='pdf', bbox_inches='tight')

In [None]:
# plot buy_buy_WETH_df chart where the timestamp is after the graph_chart_date variable
fig2, ax = plt.subplots(figsize=(10, 5))
ax.step(
    buy_buy_WETH_df[buy_buy_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    buy_buy_WETH_df[buy_buy_WETH_df['timestamp'] > graph_chart_date]['cow_price'],
    label='cow_price',color=cow_color, linewidth=.75)
ax.step(
    buy_buy_WETH_df[buy_buy_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    buy_buy_WETH_df[buy_buy_WETH_df['timestamp'] > graph_chart_date]['univ3_gas_single_hop_price'], '--',
    label='univ3_gas_single_hop_price', color=univ3_color, alpha=.6)
ax.step(
    buy_buy_WETH_df[buy_buy_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    buy_buy_WETH_df[buy_buy_WETH_df['timestamp'] > graph_chart_date]['chain_price'], '--',
    label='chain_price', color=chain_color, alpha=.6)

plt.xlabel('Date')
plt.ylabel('WETH Price')
plt.legend()
plt.show();

fig2.savefig('../report/diagrams/weth_buy_line.eps', format='eps', bbox_inches='tight')
#fig2.savefig('../report/diagrams/weth_buy_line.pdf', format='pdf', bbox_inches='tight')

In [None]:
# plot prices on y axis and timestamp on x axis
fig3, ax = plt.subplots(figsize=(10, 5))
ax.step(
    buy_sell_WETH_df[buy_sell_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    buy_sell_WETH_df[buy_sell_WETH_df['timestamp'] > graph_chart_date]['cow_price'],
    label='cow_price',color=cow_color, linewidth=.75)
ax.step(
    buy_sell_WETH_df[buy_sell_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    buy_sell_WETH_df[buy_sell_WETH_df['timestamp'] > graph_chart_date]['univ3_gas_single_hop_price'], '--',
    label='univ3_gas_single_hop_price', color=univ3_color, alpha=.6)
ax.step(
    buy_sell_WETH_df[buy_sell_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    buy_sell_WETH_df[buy_sell_WETH_df['timestamp'] > graph_chart_date]['chain_price'], '--',
    label='chain_price', color=chain_color, alpha=.6)

plt.xlabel('Date')
plt.ylabel('WETH Price')
plt.legend()
plt.title('CoW WETH Buy Price vs Univ3 WETH Sell Price')
plt.show();

In [None]:
# plot prices on y axis and timestamp on x axis
fig4, ax = plt.subplots(figsize=(10, 5))
ax.step(
    sell_buy_WETH_df[sell_buy_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    sell_buy_WETH_df[sell_buy_WETH_df['timestamp'] > graph_chart_date]['cow_price'],
    label='cow_price',color=cow_color, linewidth=.75)
ax.step(
    sell_buy_WETH_df[sell_buy_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    sell_buy_WETH_df[sell_buy_WETH_df['timestamp'] > graph_chart_date]['univ3_gas_single_hop_price'], '--',
    label='univ3_gas_single_hop_price', color=univ3_color, alpha=.6)
ax.step(
    sell_buy_WETH_df[sell_buy_WETH_df['timestamp'] > graph_chart_date]['timestamp'],
    sell_buy_WETH_df[sell_buy_WETH_df['timestamp'] > graph_chart_date]['chain_price'], '--',
    label='chain_price', color=chain_color, alpha=.6)

plt.xlabel('Date')
plt.ylabel('WETH Price')
plt.legend()
plt.title('CoW WETH Sell Price vs Univ3 WETH Buy Price')
plt.show();

### Price Differences Charts

In [None]:
# get the price price difference percent between cow and univ3 gas adjusted price in buy_buy_WETH_df
buy_buy_WETH_df['cow_univ3_gas_adj_price_diff_percent_single'] = (buy_buy_WETH_df['cow_price'] - buy_buy_WETH_df['univ3_gas_single_hop_price']) / buy_buy_WETH_df['cow_price'] * 100
buy_buy_WETH_df['cow_univ3_gas_adj_price_diff_percent_median'] = (buy_buy_WETH_df['cow_price'] - buy_buy_WETH_df['univ3_gas_median_price']) / buy_buy_WETH_df['cow_price'] * 100

# get the price price difference percent between cow and univ3 gas adjusted price in sell_sell_WETH_df
sell_sell_WETH_df['cow_univ3_gas_adj_price_diff_percent_single'] = (sell_sell_WETH_df['cow_price'] - sell_sell_WETH_df['univ3_gas_single_hop_price']) / sell_sell_WETH_df['chain_price'] * 100
sell_sell_WETH_df['cow_univ3_gas_adj_price_diff_percent_median'] = (sell_sell_WETH_df['cow_price'] - sell_sell_WETH_df['univ3_gas_median_price']) / sell_sell_WETH_df['chain_price']* 100

# calculate mean deviation between cow and chain prices for each row
sell_sell_WETH_df['cow_chain_price_diff_percent'] = (sell_sell_WETH_df['chain_price'] - sell_sell_WETH_df['cow_price']) / sell_sell_WETH_df['chain_price'] * 100
buy_buy_WETH_df['cow_chain_price_diff_percent'] = (buy_buy_WETH_df['chain_price'] - buy_buy_WETH_df['cow_price']) / buy_buy_WETH_df['chain_price'] * 100

# calculate mean deviation between cow and chain prices for each row
sell_sell_WETH_df['univ3_chain_price_diff_percent'] = (sell_sell_WETH_df['chain_price'] - sell_sell_WETH_df['univ3_price']) / sell_sell_WETH_df['chain_price'] * 100
buy_buy_WETH_df['univ3_chain_price_diff_percent'] = (buy_buy_WETH_df['chain_price'] - buy_buy_WETH_df['univ3_price']) / buy_buy_WETH_df['chain_price'] * 100

In [None]:
buy_buy_WETH_df['spread'] = np.abs(buy_buy_WETH_df['cow_chain_price_diff_percent'] - buy_buy_WETH_df['univ3_chain_price_diff_percent'])
sell_sell_WETH_df['spread'] = np.abs(sell_sell_WETH_df['cow_chain_price_diff_percent'] - sell_sell_WETH_df['univ3_chain_price_diff_percent'])

In [None]:
# buy deviation quartiles (with sign)
print('buy deviation quartiles (with sign)')
buy_buy_WETH_df[['chain_price', 'cow_chain_price_diff_percent', 'univ3_chain_price_diff_percent']].quantile([.1, .25, .5, .75, .9], interpolation='nearest')

In [None]:
# sell deviation quartiles (with sign)
print('sell deviation quartiles (with sign)')
sell_sell_WETH_df[['chain_price', 'cow_chain_price_diff_percent', 'univ3_chain_price_diff_percent']].quantile([.1, .25, .5, .75, .9], interpolation='nearest')

In [None]:
# print the mean deviation of cow price from chainlink price with f strings
print(f"Mean deviation of cow sell price from chainlink price (90th percentile): {sell_sell_WETH_df['cow_chain_price_diff_percent'].mean():.2f}%")
print(f"Mean deviation of cow buy price from chainlink price (90th percentile): {buy_buy_WETH_df['cow_chain_price_diff_percent'].mean():.2f}%")

In [None]:
# plot histogram of the price diffs
# https://matplotlib.org/stable/gallery/misc/rasterization_demo.html
fig3, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    sell_sell_WETH_df['cow_chain_price_diff_percent'],
    bins=250,
    label='chain price',
    color=chain_color,
    alpha=0.75)
ax.hist(
    sell_sell_WETH_df['cow_univ3_gas_adj_price_diff_percent_single'],
    bins=250,
    label='univ3 with single hop gas adj',
    color=univ3_color,
    alpha=0.75)

ax.set_xlim(-4, 4)
plt.xlabel('CoW Sells Price Difference (in %)')
plt.ylabel('Count')
plt.legend()
plt.show();

#save as eps
fig3.savefig('../report/diagrams/weth_sell_hist.eps', format='eps', bbox_inches='tight')
#fig3.savefig('../report/diagrams/weth_sell_hist.pdf', format='pdf', bbox_inches='tight')

In [None]:
# plot histogram of the price diffs
fig3, ax = plt.subplots(figsize=(10, 5))
ax.hist(
    buy_buy_WETH_df['cow_chain_price_diff_percent'],
    bins=50,
    label='chain price',
    color=chain_color,
    alpha=0.75)
ax.hist(
    buy_buy_WETH_df['cow_univ3_gas_adj_price_diff_percent_single'],
    bins=50,
    label='univ3 with single hop gas adj',
    color=univ3_color,
    alpha=0.75)

ax.set_xlim(-4, 4)
plt.xlabel('CoW Buys Price Difference (in %)')
plt.ylabel('Count')
plt.legend()
plt.show();

#save as eps
fig3.savefig('../report/diagrams/weth_buy_hist.eps', format='eps', bbox_inches='tight')
#fig3.savefig('../report/diagrams/weth_buy_hist.pdf', format='pdf', bbox_inches='tight')

### Simulations

#### Preprocessing

In [None]:
# divide the merged dataframe into smaller dataframes. We used an outerjoin in the data pipeline notebook which introduces null values. Now we drop the null values to recover the pre-outer join dataframes. 
# Recall the preprocessing step to forwardfill values. This makes the recovered dataframes larger in size.
uni_swaps_df = cow_uni_chain_outer_pl[
    'trades_timestamp',  
    'swaps_tokenIn_id',
    'swaps_tokenOut_id',
    'swaps_amountIn_converted',
    'swaps_amountOut_converted'
    ].drop_nulls()

chain_df = cow_uni_chain_outer_pl[    
    'trades_timestamp', 
    'prices_assetPair_id', 
    'prices_price'].drop_nulls()


cow_df = cow_uni_chain_outer_pl[
    'trades_timestamp',
    'swaps_blockNumber',
    'trades_txHash',
    'trades_feeAmount',
    'trades_sellToken_id',
    'trades_buyToken_id',
    'trades_sellAmount_converted',
    'trades_buyAmount_converted',
    'name',
    'environment'
].drop_nulls()

In [None]:
# print size
print(f"Size of uni_swaps_df: {uni_swaps_df.shape}")
print(f"Size of chain_df: {chain_df.shape}")
print(f"Size of cow_df: {cow_df.shape}")

In [None]:
# convert to pandas dataframes
uni_swaps_df = uni_swaps_df.to_pandas()
chain_df = chain_df.to_pandas()
cow_df = cow_df.to_pandas()

In [None]:
# sort all by trades timestamp
uni_swaps_df = uni_swaps_df.sort_values(by=['trades_timestamp'])
chain_df = chain_df.sort_values(by=['trades_timestamp'])
cow_df = cow_df.sort_values(by=['trades_timestamp'])

In [None]:
# filter based on timestamp
uni_swaps_df = uni_swaps_df[uni_swaps_df['trades_timestamp'] >= 1643673600]
chain_df = chain_df[chain_df['trades_timestamp'] >= 1643673600]
cow_df = cow_df[cow_df['trades_timestamp'] >= 1643673600]

In [None]:
# Find the first and last value in the timestamp column
first_timestamp_uni = uni_swaps_df['trades_timestamp'].min()
last_timestamp_uni = uni_swaps_df['trades_timestamp'].max()
print(f'uni first timestamp: {first_timestamp_uni} \nlast timestamp: {last_timestamp_uni}')

# Find the first and last value in the timestamp column
first_timestamp_chain = chain_df['trades_timestamp'].min()
last_timestamp_chain = chain_df['trades_timestamp'].max()
print(f'chain first timestamp: {first_timestamp_chain} \nlast timestamp: {last_timestamp_chain}')

In [None]:
timestamps = pd.DataFrame({'trades_timestamp': range(first_timestamp_uni, last_timestamp_uni + 1)})

In [None]:
print(len(timestamps))

#### Create Chainlink Oracle

In [None]:
# left merge chain_df with empty timestamps_df
chain_filled_df = pd.merge(timestamps, chain_df, on='trades_timestamp', how='left')
# Forward-fill missing values
chain_filled_df.fillna(method='ffill', inplace=True)
# sort by timestamp
chain_filled_df = chain_filled_df.sort_values('trades_timestamp', ascending=True)

In [None]:
# get first non null price row
first_non_null_price_row = chain_filled_df[chain_filled_df['prices_price'].notnull()].iloc[0]

In [None]:
# get simulation initialization data from the first row of the chain data
initialization_df_row = chain_filled_df[chain_filled_df["trades_timestamp"] == first_non_null_price_row['trades_timestamp']]
initilization_price = initialization_df_row['prices_price']

# initialize 100m USDC supply reserve amount
USDC_reserves_USD = 100000000

token0_start = int(USDC_reserves_USD/initilization_price)
print(f"Token 0 WETH initial reserves: \t{token0_start}")

token1_start = USDC_reserves_USD
print(f"Token 1 USDC initial reserves :\t{token1_start}")

# convert initialization_df['trades_timestamp'][0] to datetime object
datetime_object = datetime.fromtimestamp(initialization_df_row['trades_timestamp'].iloc[0]).strftime('%Y-%m-%d')

print(f'start date: {datetime_object}, start WETH price: {initilization_price.iloc[0]}')

In [None]:
# convert into series. This data structure is required for the simulation class input
price_s = chain_filled_df.set_index("trades_timestamp")["prices_price"]

# drop null values
price_s = price_s[price_s.index > first_non_null_price_row['trades_timestamp']]

In [None]:
# merge duplicated index values in price_s. This is the 'external market oracle' for the CPMM
price_s = price_s.groupby(level=0).first()

#### Create CoW Oracle

In [None]:
# drop NaN values in cow_price column from the earlier completed dataset.
cow_oracle_df = reindexed_price_df[reindexed_price_df['cow_price'].notnull()]

In [None]:
# create an oracle dict which serves as a price oracle for OraclePool
oracle_dict = [{"ts": row["timestamp"], "p": row["cow_price"]} for _, row in cow_oracle_df.iterrows()]

#### Oracle Price Chart

In [None]:
# convert oracle_dict to a df
oracle_df = pd.DataFrame(oracle_dict)

In [None]:
# plot oracle_df and price_s on same plot. UNOPTIMIZED chart rendering! Takes a few seconds.
fig, ax = plt.subplots(figsize=(15, 5))
ax.plot(oracle_df['ts'], oracle_df['p'], label='CowSwap Oracle Price', color='blue')
ax.plot(price_s.index, price_s, label='Chainlink Oracle Price', color='red', alpha=0.2, linewidth=4)
ax.set_xlabel('Timestamp')
ax.set_ylabel('Price')
ax.legend()
plt.show()

#### Simulation Execution & Preprocessing

In [None]:
# Create feedlot history objects, which store the data of the simulation.
feedlot1 = AMMPoolHistory(token0_start, token1_start)
feedlot2 = AMMPoolHistory(token0_start, token1_start)
feedlot3 = AMMPoolHistory(token0_start, token1_start)

In [None]:
cow_oracle = cow_oracle_df[["timestamp","cow_price"]]
cow_oracle.columns = ["ts", "p"]

In [None]:
# Instantiate the CPMM object with the AMMPool object
cpmm = CPMM(pool=feedlot1)
cpmm_SI_flow = AMMWithBaulking(cpmm, price_s)
oracle_pool = OraclePool(feedlot2, (v for _,v in cow_oracle.iterrows()))
rebalancing_pool = RebalancingPool(feedlot3, (v for _,v in cow_oracle.iterrows()))

In [None]:
# Initialize a counter for trades that deplete the reserves
depleted_reserve_count = 0
# Initialize empty list to store a list of transactions that trigger when reserves are depleted.
depleted_reserve_df = []

# Iterate through each row in the uni_swaps_df dataframe
for index, row in uni_swaps_df.iterrows():
    # Check if the trade occurred after the external market price oracle data was last updated
    if row['trades_timestamp'] > cpmm_SI_flow.target_prices.index[0]:
        # Determine which token is being traded (WETH or USDC) and how much is being traded
        # using a switch statement (supported in Python 3.10+)
        match row['swaps_tokenIn_id']:
            case 'WETH':
                token = 0
                amt = row['swaps_amountIn_converted']
            case 'USDC':
                token = 1
                amt = row['swaps_amountIn_converted']

        # If the reserve is depleted, nullify the trade by setting the amount in and out to 0.
        # Otherwise, execute the swap function on the AMM pool and the market order sell function
        # on the oracle pool
        amta, amtb = cpmm_SI_flow.maybe_market_order_sell(amt, token, row['trades_timestamp'])
        amt0, amt1 = oracle_pool.market_order_sell(amt, token, row['trades_timestamp'])

        # Count transactions that deplete the reserves and store them in a separate dataframe
        if amt0 == 0:
            # print(f"Reserves depleted at time {row['trades_timestamp']}.")
            # print(f"Pool reserves: ({feedlot1.reserves[0], feedlot1.reserves[1]})")
            depleted_reserve_count += 1
            depleted_reserve_df.append(row)
        # print(f"Traded {amt0} for {amt1}\tat time {row['trades_timestamp']}.")
        # print(f"Pool reserves: ({feedlot1.reserves[0], feedlot1.reserves[1]})")


In [None]:
print(f'depleted_reserve_count: {depleted_reserve_count}, total swaps going through sim: {uni_swaps_df.shape}')

In [None]:
def rolling_volatility(x: pd.Series, period='1D', window_size=30) -> pd.Series:
    x = x.resample(period).last()
    return (x.diff() / x).rolling(window_size, min_periods=1).std()

def preprocess_history(pool: AMMPoolHistory) -> pd.DataFrame:
    """
    Use this function to prepare the simulation data for plotting. Computes values in USD
    """
    df = pd.merge(pd.DataFrame(pool.history[1:]), chain_df[["trades_timestamp", "prices_price"]], left_on='ts', right_on='trades_timestamp', how='left')
    df["prices_price"] = df["prices_price"].ffill()
    df["reserve0_usd"] = df["reserve0"] * df["prices_price"]
    df["tvl"] = df["reserve0_usd"] + df["reserve1"]
#    df["tvl_volatility"] = (df["tvl"].diff() / df["tvl"]).cumsum() # not volatility!
    df["reserve_ratio"] = df["reserve0_usd"] / df["reserve1"]  # USDC / WETH
    df["reserve_cash"] = df["reserve1"] / df["tvl"]  # USDC / WETH
    df = df.set_index(pd.to_datetime(df['ts'], unit='s'), drop=True)
    df = df[~df.index.duplicated(keep="last")]
    return df

In [None]:
cpmm_amm_baulking_df = preprocess_history(feedlot1)
oracle_pool_df = preprocess_history(feedlot2)
rebalancing_pool_df = preprocess_history(feedlot3)

cpmm_amm_baulking_vol_s = rolling_volatility(cpmm_amm_baulking_df["tvl"])
oracle_pool_vol_s = rolling_volatility(oracle_pool_df["tvl"])
rebalancing_pool_vol_s = rolling_volatility(rebalancing_pool_df["tvl"])
price_vol_s = rolling_volatility(oracle_pool_df["prices_price"])

In [None]:
# count the number of swaps in results1 and results 2
print(f"Number of swaps in the order flow historical data: {uni_swaps_df.shape}")
print(f"Number of swaps accepted in CPMM: {cpmm_amm_baulking_df.shape}")
print(f"Number of swaps accepted in OraclePool: {oracle_pool_df.shape}")
print(f"Number of rebalances: {rebalancing_pool_df.shape}")

#### Simulation Charts

In [None]:
# plot subplots of merged dfs tvl
fig4, (ax0, ax1, ax2) = plt.subplots(3, 1, figsize=(10, 10), sharex=True)
ax0.plot(cpmm_amm_baulking_df.index, cpmm_amm_baulking_df['tvl'] / 1000000, drawstyle='steps', color='blue',   label='CPMM (with Baulking)')
ax0.plot(oracle_pool_df.index,       oracle_pool_df['tvl']       / 1000000, drawstyle='steps', color='orange', label='OraclePool')
ax0.plot(rebalancing_pool_df.index,  rebalancing_pool_df['tvl']  / 1000000, drawstyle='steps', color='grey',   label='Rebalancing', linewidth=0.4)
ax0.set_title('TVL')
ax0.set_ylabel("(millions of USDC)")
ax0.legend()

ax1.plot(cpmm_amm_baulking_vol_s.index, cpmm_amm_baulking_vol_s, color='blue', label='CPMM (with Baulking)')
ax1.plot(oracle_pool_vol_s.index, oracle_pool_vol_s, color='orange', label='OraclePool')
ax1.plot(rebalancing_pool_vol_s.index, rebalancing_pool_vol_s, color='grey', label='Rebalancing', linewidth=0.4)
ax1.plot(price_vol_s.index, price_vol_s, color='purple', label='WETH/USDC', linewidth=0.4)
ax1.set_title('Daily volatility')
ax1.legend()

ax2.plot(cpmm_amm_baulking_df.index, cpmm_amm_baulking_df['reserve_cash'], color='blue', label='CPMM (with Baulking)')
ax2.plot(oracle_pool_df.index, oracle_pool_df['reserve_cash'], color='orange', label='OraclePool')
ax2.plot(rebalancing_pool_df.index, rebalancing_pool_df['reserve_cash'], color='grey', label='Rebalancing', linewidth=0.4)
ax2.set_title('Cash (USDC) balance as percentage of TVL')
ax2.yaxis.set_major_formatter(mpl.ticker.PercentFormatter(xmax=1))
ax2.legend()
plt.show();

# save charts
#save as eps
fig4.savefig('../report/diagrams/pool_metrics.eps', format='eps', bbox_inches='tight')
fig4.savefig('../report/diagrams/pool_metrics.pdf', format='pdf', bbox_inches='tight')

In [None]:
#calculate reserve0 delta
cpmm_amm_baulking_df['reserve0_delta'] = cpmm_amm_baulking_df['reserve0'].diff()

In [None]:
# plot reserve0 delta
cpmm_amm_baulking_df['reserve0_delta'].plot()

In [None]:
# get largest uni_swaps_df buy orders where swaps_tokenIn_id is WETH
uni_swaps_df[uni_swaps_df['swaps_tokenIn_id'] == 'WETH'].nlargest(10, 'swaps_amountIn_converted')

In [None]:
cpmm_delta = (cpmm_amm_baulking_df["tvl"].shift(1) - cpmm_amm_baulking_df["tvl"]).rolling(10).mean()
oraclepool_delta = (oracle_pool_df["tvl"].shift(1) - oracle_pool_df["tvl"]).rolling(10).mean()

In [None]:
fig, ax = plt.subplots()
ax.step(cpmm_delta.index, cpmm_delta)
ax.step(oraclepool_delta.index, oraclepool_delta)

In [None]:
print(f"Correlation: {oraclepool_delta.corr(cpmm_delta)}")
beta = oraclepool_delta/cpmm_delta
beta.rolling(10).mean().plot()

In [None]:
def reserve_balance(df) -> pd.Series:
    return df["reserve0"] * df["prices_price"] / df["reserve1"]

In [None]:
fig, ax = plt.subplots()
oraclepool_balance = reserve_balance(oracle_pool_df).rolling(10).mean()
ax.plot(oraclepool_balance.index, oraclepool_balance)
axr = ax.twinx()
axr.plot(oracle_pool_df["prices_price"], color="purple")