In [None]:
import polars as pl
# import markout

In [None]:
# load pools
# TODO: can I totally skip this step?
pools = (
    # see pools.sql
    pl.read_csv('2024.5.2 uniswap pools for markout analysis.csv')
    # I need to filter out for WETH pairs only because I don't have USDC values for everything else
    # I'm filtering in the Python rather than the SQL so that it's extensible to USDC in the future.
    .filter(
        (pl.col('TOKEN0_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
        |
        (pl.col('TOKEN1_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
    )
    .with_columns(
        FEE = (pl.col('FEE')/100).cast(pl.Int8),
        # whether WETH is token0 or not
        # TODO: can I remove this and pass decimals to the markout?
        IS_WETH_TOKEN0 = (
            pl.when(pl.col('TOKEN0_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
            .then(True)
            .otherwise(False)
        )
    )
)

In [12]:
pools.head()

LIQUIDITY_POOL_ADDRESS,POOL_NAME,TOKEN0_ADDRESS,TOKEN0_DECIMALS,TOKEN1_ADDRESS,TOKEN1_DECIMALS,FEE,IS_WETH_TOKEN0
str,str,str,i64,str,i64,i8,bool
"""0x9359c87b38dd…","""uniswap: DPI-W…","""0x1494ca1f11d4…",18,"""0xc02aaa39b223…",18,30,False
"""0x919fa96e88d6…","""uniswap: WETH-…","""0xc02aaa39b223…",18,"""0xd533a949740b…",18,30,True
"""0x4628a0a564de…","""uniswap: RNDR-…","""0x6de037ef9ad2…",18,"""0xc02aaa39b223…",18,30,False
"""0xfcfdfc98062d…","""uniswap: WETH-…","""0xc02aaa39b223…",18,"""0xdef1ca1fb7fb…",18,100,True
"""0x0188c28ea32d…","""uniswap: WETH-…","""0xc02aaa39b223…",18,"""0xe410fbd0977c…",9,100,True


In [None]:
swaps = (
    #TODO: add datetime conversion (maybe?)
    pl.read_csv('all_swaps.csv', dtypes={'sqrt_price_x96':pl.String})
)

In [11]:
swaps.head()

liquidity_pool_address,block_timestamp,block_number,log_index,transaction_hash,token0_amount,token0_amount_raw,token0_decimals,token1_amount,token1_amount_raw,token1_decimals,sqrt_price_x96
str,str,i64,i64,str,f64,f64,i64,f64,f64,i64,str
"""0x3887e82dbdbe…","""2023-07-19 01:…",17724032,168,"""0x27d9c6fae175…",1852.896567,1.8529e+21,18,-0.970962,-9.7096e+17,18,"""18124201279677…"
"""0x3887e82dbdbe…","""2023-07-19 00:…",17723605,42,"""0xcafc476d8e41…",-762.018757,-7.6202e+20,18,0.40231,4.0231e+17,18,"""18206896076020…"
"""0x60594a405d53…","""2023-07-19 01:…",17724022,189,"""0x251d218f9f82…",74773.839744,7.4774e+22,18,-39.086115,-3.9086e+19,18,"""18089046453319…"
"""0x60594a405d53…","""2023-07-19 01:…",17723943,117,"""0x078dc32964fd…",-514.819089,-5.1482e+20,18,0.27,2.7e+17,18,"""18139715334062…"
"""0x60594a405d53…","""2023-07-19 01:…",17724040,11,"""0xd6b6c888177e…",-7405.027949,-7.405e+21,18,3.885996,3.886e+18,18,"""18147184557506…"


In [None]:
"""
    TODO:
    something like
    for pool in "pools", which is a pl.DataFrame
    get the pool address and chain
    call load_all_swaps, which returns a pl.DataFrame
    pass the df, TOKEN0_DECIMALS, TOKEN1_DECIMALS, IS_WETH_TOKEN0, and weth_prices to construct_markout
    call execute_markout, which returns a pl.DataFrame
    append the DataFrame to "results", a pl.DataFrame
"""

def process_pools(pools: pl.DataFrame) -> pl.DataFrame:
    # Initialize an empty list to store the results
    results = []

    for pool in pools.iter_rows(named=True):

        pool_address = pool['LIQUIDITY_POOL_ADDRESS']
        chain = 'ethereum' 
        # TODO: lint - these should be lowercase
        TOKEN0_DECIMALS = pool['TOKEN0_DECIMALS']
        TOKEN1_DECIMALS = pool['TOKEN1_DECIMALS']
        IS_WETH_TOKEN0 = pool['IS_WETH_TOKEN0']
        fee = pool['FEE']

        # Load all swaps for the current pool
        swaps_df = markout.load_all_swaps(pool_address, chain)

        # Construct the markout DataFrame
        markout_df = markout.construct_markout(swaps_df, TOKEN0_DECIMALS, TOKEN1_DECIMALS, IS_WETH_TOKEN0)

        # Execute the markout calculation
        markout_result = markout.execute_markout(markout_df)

        # Add pool information to the result
        markout_result['pool_address'] = pool_address
        markout_result['chain'] = chain
        markout_result['fee'] = fee

        # Append the result to the list
        results.append(markout_result)

    # Convert the list of results to a Polars DataFrame
    result_df = pl.from_dicts(results)

    return result_df

# Assuming you have a DataFrame called 'pools' with pool information
# and a DataFrame called 'weth_prices' with WETH price data
result_df = process_pools(pools)

# TODO: then plot. x axis as volume, y axis as markout, can plot fee as dot color.

In [None]:
bad_results = (result_df.explode('total_volume', 'total_markout')
      .filter(
          (pl.col('total_volume') != 0.0) &
          (pl.col('total_markout') > -508827)
      )
      )

In [None]:
bad_results.select(["total_volume", "total_markout"]).plot.scatter(x="total_volume", y="total_markout")

In [None]:
result_df.explode(['total_volume', 'total_markout']).head()
# result_df.explode(['total_volume_usd', 'total_markout_usd']).head().write_csv('results.csv')

In [None]:
exploded_df = result_df.explode(['total_volume', 'total_markout'])

exploded_df.select(["total_volume", "total_markout"]).plot.scatter(x="total_volume", y="total_markout")