In [1]:
import polars as pl
from state import v3Pool
import markout

In [8]:
pools = (
    # see pools.sql
    pl.read_csv('2024.5.2 uniswap pools for markout analysis.csv')
    # I need to filter out for WETH pairs only because I don't have USDC values for everything else
    .filter(
        (pl.col('TOKEN0_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
        |
        (pl.col('TOKEN1_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
    )
    .with_columns(
        FEE = (pl.col('FEE')/100).cast(pl.Int64),
        # whether WETH is token0 or not
        IS_WETH_TOKEN0 = (
            pl.when(pl.col('TOKEN0_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
            .then(True)
            .otherwise(False)
        )
    )
)

pools = pools.head(10)

# # okay i wanted to get WETH price but it's fkin up on allium. so I will do it with v3-polars
# weth_usdc = v3Pool('0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640', 'ethereum', update=True)

# STARTING_BLOCK = 17382266

# weth_prices = weth_usdc.getPriceSeries(as_of=(STARTING_BLOCK / 1e4), frequency='1s')
# # weth_prices = (weth_prices
# #                .cast({'block_timestamp':pl.Datetime})
# #                .with_columns(
# #                     weth_price = 10**12 / ((pl.col('sqrtPriceX96')/(2 ** 96)) ** 2)
# #                 )
# #             )

In [9]:
pools.describe()

statistic,LIQUIDITY_POOL_ADDRESS,POOL_NAME,TOKEN0_ADDRESS,TOKEN0_DECIMALS,TOKEN1_ADDRESS,TOKEN1_DECIMALS,FEE,IS_WETH_TOKEN0
str,str,str,str,f64,str,f64,f64,f64
"""count""","""10""","""10""","""10""",10.0,"""10""",10.0,10.0,10.0
"""null_count""","""0""","""0""","""0""",0.0,"""0""",0.0,0.0,0.0
"""mean""",,,,18.0,,17.1,72.0,0.4
"""std""",,,,0.0,,2.84605,36.147845,
"""min""","""0x0188c28ea32d…","""uniswap: DPI-W…","""0x1494ca1f11d4…",18.0,"""0xc02aaa39b223…",9.0,30.0,0.0
"""25%""",,,,18.0,,18.0,30.0,
"""50%""",,,,18.0,,18.0,100.0,
"""75%""",,,,18.0,,18.0,100.0,
"""max""","""0xfcfdfc98062d…","""uniswap: WETH-…","""0xc02aaa39b223…",18.0,"""0xe410fbd0977c…",18.0,100.0,1.0


In [10]:
"""
    TODO:
    something like
    for pool in "pools", which is a pl.DataFrame
    get the pool address and chain
    call load_all_swaps, which returns a pl.DataFrame
    pass the df, TOKEN0_DECIMALS, TOKEN1_DECIMALS, IS_WETH_TOKEN0, and weth_prices to construct_markout
    call execute_markout, which returns a pl.DataFrame
    append the DataFrame to "results", a pl.DataFrame
"""

def process_pools(pools: pl.DataFrame) -> pl.DataFrame:
    # Initialize an empty list to store the results
    results = []

    for pool in pools.iter_rows(named=True):

        pool_address = pool['LIQUIDITY_POOL_ADDRESS']
        chain = 'ethereum' 
        # TODO: lint - these should be lowercase
        TOKEN0_DECIMALS = pool['TOKEN0_DECIMALS']
        TOKEN1_DECIMALS = pool['TOKEN1_DECIMALS']
        IS_WETH_TOKEN0 = pool['IS_WETH_TOKEN0']
        fee = pool['FEE']

        # Load all swaps for the current pool
        swaps_df = markout.load_all_swaps(pool_address, chain)

        # Construct the markout DataFrame
        markout_df = markout.construct_markout(swaps_df, TOKEN0_DECIMALS, TOKEN1_DECIMALS, IS_WETH_TOKEN0)

        # Execute the markout calculation
        markout_result = markout.execute_markout(markout_df)

        # Add pool information to the result
        markout_result['pool_address'] = pool_address
        markout_result['chain'] = chain
        markout_result['fee'] = fee

        # Append the result to the list
        results.append(markout_result)

    # Convert the list of results to a Polars DataFrame
    result_df = pl.from_dicts(results)

    return result_df

# Assuming you have a DataFrame called 'pools' with pool information
# and a DataFrame called 'weth_prices' with WETH price data
result_df = process_pools(pools)

# TODO: then plot. x axis as volume, y axis as markout, can plot fee as dot color.

In [23]:
bad_results = (result_df.explode('total_volume', 'total_markout')
      .filter(
          (pl.col('total_volume') != 0.0) &
          (pl.col('total_markout') > -508827)
      )
      )

In [24]:
bad_results.select(["total_volume", "total_markout"]).plot.scatter(x="total_volume", y="total_markout")

In [5]:
result_df.explode(['total_volume', 'total_markout']).head()
# result_df.explode(['total_volume_usd', 'total_markout_usd']).head().write_csv('results.csv')

total_volume,total_markout,pool_address,chain,fee
f64,f64,str,str,i64
2.2828e+21,-73293.039054,"""0x9359c87b38dd…","""ethereum""",30


In [6]:
exploded_df = result_df.explode(['total_volume', 'total_markout'])

exploded_df.select(["total_volume", "total_markout"]).plot.scatter(x="total_volume", y="total_markout")