In [1]:
%load_ext autoreload
%autoreload 2

import polars as pl
import markout

In [None]:
# load pools
# TODO: can I totally skip this step? Ideally you could build it as an add-on feature to make the analysis extensible.
pools = (
    # see pools.sql
    pl.read_csv('2024.5.2 uniswap pools for markout analysis.csv')
    # I need to filter out for WETH pairs only because I don't have USDC values for everything else
    # I'm filtering in the Python rather than the SQL so that it's extensible to USDC in the future.
    .filter(
        (pl.col('TOKEN0_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
        |
        (pl.col('TOKEN1_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
    )
    .with_columns(
        FEE = (pl.col('FEE')/100).cast(pl.Int8),
        # whether WETH is token0 or not
        # TODO: can I remove this and pass decimals to the markout?
        IS_WETH_TOKEN0 = (
            pl.when(pl.col('TOKEN0_ADDRESS') == '0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2')
            .then(True)
            .otherwise(False)
        )
    )
)

In [2]:
# FYI BOB/WETH is 0x3887e82dbdbe8ec6db44e6298a2d48af572a3b78
swaps = markout.loadSwaps()

In [17]:
swaps_plus_markout = markout.construct_markout(swaps)

In [19]:
swaps_plus_markout.head(2000).write_csv('swaps_plus_markout_cut.csv')

In [10]:
addresses_list = (swaps.
      filter(
            (pl.col('token0_decimals').is_null()) 
            | 
            (pl.col('token1_decimals').is_null())
      ).select('address')
      .unique()
      .to_series()
      .to_list()
)
for address in addresses_list:
    print(address)

0x2dcd080327c08d6f202c7ef0b709f73d325c0464
0x4fd305344201c6b6f15a7a55a314a3fc91d4fc8a
0xba3e0ff5a72e928ea6f7792b2e6d70249cb4286d
0x900549b32e9fe1ec712648e2ba5961904e311df5
0x2b263a81573ed8a7836b590f8bed1fe9c69807a7
0x8347d552b43c1477927ac4bcbf95921c1bd7b0d4
0xe58f6ce1d154a63314772d8ed483e477e040a2b8
0x91e5b0be8ec7f0f86210931ecc3b34c186d79eff
0xb8c9dca43702409f20779a3ee62f4453b9dc641c


In [4]:
swaps.describe()

statistic,address,block_timestamp,block_number,transaction_index,transaction_hash,amount0,token0_decimals,amount1,token1_decimals,sqrtPriceX96,token0_symbol
str,str,str,f64,f64,str,f64,f64,f64,f64,str,str
"""count""","""3325969""","""3325969""",3325969.0,3325969.0,"""3325969""",3325969.0,3323791.0,3325969.0,3325117.0,"""3325969""","""3323791"""
"""null_count""","""0""","""0""",0.0,0.0,"""0""",0.0,2178.0,0.0,852.0,"""0""","""2178"""
"""mean""",,"""2023-07-15 17:…",17701000.0,172.656638,,1.3151999999999999e+38,15.720892,2.8619999999999998e+38,16.491509,,
"""std""",,,193740.233298,150.84641,,3.2865e+41,4.537996,3.6849e+41,3.926501,,
"""min""","""0x0073ce82d9a8…","""2023-06-01 00:…",17382266.0,1.0,"""0x0000039e791d…",-3.7959e+44,0.0,-2.1093999999999998e+33,0.0,"""10000002698099…",""" SHOP"""
"""25%""",,"""2023-06-21 23:…",17531242.0,57.0,,-1.7569e+20,18.0,-5.0174e+17,18.0,,
"""50%""",,"""2023-07-15 19:…",17700948.0,151.0,,-670090000.0,18.0,1000000000.0,18.0,,
"""75%""",,"""2023-08-08 18:…",17871984.0,253.0,,5.15e+18,18.0,4.5e+17,18.0,,
"""max""","""0xffdb25e0f0c6…","""2023-08-31 23:…",18037987.0,10136.0,"""0xfffffbb18ccd…",3.8442e+44,18.0,4.8757e+44,27.0,"""99999949382130…","""똥코인"""


In [None]:
"""
    TODO:
    something like
    for pool in "pools", which is a pl.DataFrame
    get the pool address and chain
    call load_all_swaps, which returns a pl.DataFrame
    pass the df, TOKEN0_DECIMALS, TOKEN1_DECIMALS, IS_WETH_TOKEN0, and weth_prices to construct_markout
    call execute_markout, which returns a pl.DataFrame
    append the DataFrame to "results", a pl.DataFrame
"""

def process_pools(pools: pl.DataFrame) -> pl.DataFrame:
    # Initialize an empty list to store the results
    results = []

    for pool in pools.iter_rows(named=True):

        pool_address = pool['LIQUIDITY_POOL_ADDRESS']
        chain = 'ethereum' 
        # TODO: lint - these should be lowercase
        TOKEN0_DECIMALS = pool['TOKEN0_DECIMALS']
        TOKEN1_DECIMALS = pool['TOKEN1_DECIMALS']
        IS_WETH_TOKEN0 = pool['IS_WETH_TOKEN0']
        fee = pool['FEE']

        # Load all swaps for the current pool
        swaps_df = markout.load_all_swaps(pool_address, chain)

        # Construct the markout DataFrame
        markout_df = markout.construct_markout(swaps_df, TOKEN0_DECIMALS, TOKEN1_DECIMALS, IS_WETH_TOKEN0)

        # Execute the markout calculation
        markout_result = markout.execute_markout(markout_df)

        # Add pool information to the result
        markout_result['pool_address'] = pool_address
        markout_result['chain'] = chain
        markout_result['fee'] = fee

        # Append the result to the list
        results.append(markout_result)

    # Convert the list of results to a Polars DataFrame
    result_df = pl.from_dicts(results)

    return result_df

# Assuming you have a DataFrame called 'pools' with pool information
# and a DataFrame called 'weth_prices' with WETH price data
result_df = process_pools(pools)

# TODO: then plot. x axis as volume, y axis as markout, can plot fee as dot color.

In [None]:
bad_results = (result_df.explode('total_volume', 'total_markout')
      .filter(
          (pl.col('total_volume') != 0.0) &
          (pl.col('total_markout') > -508827)
      )
      )

In [None]:
bad_results.select(["total_volume", "total_markout"]).plot.scatter(x="total_volume", y="total_markout")

In [None]:
result_df.explode(['total_volume', 'total_markout']).head()
# result_df.explode(['total_volume_usd', 'total_markout_usd']).head().write_csv('results.csv')

In [None]:
exploded_df = result_df.explode(['total_volume', 'total_markout'])

exploded_df.select(["total_volume", "total_markout"]).plot.scatter(x="total_volume", y="total_markout")