# Gas Fees Issue

Gas fees are market based external prices that are more or less a random walk. sample and hold for the slower moving pool is not a good strategy and makes the model make poor choices about the actual cost of doing business. a better approach is sample and hold on gasUsed and attempt to use the latest value for gasPrice (if possible)

First I need to import two pools and see if we can come up with a better way...

In [10]:
# change the active path to the parent directory 
if True: 
    print("Moving active path to parent directory")
    os.chdir('..')
    print(os.getcwd())

Moving active path to parent directory
/Users/das/DATASCI210/arbitrage_playground


In [30]:
import os
import re

import pandas as pd

from itertools import combinations

import src.arbutils as arbutils
import src.fetch as fetch

In [21]:

GRAPH_API_KEY = os.getenv("GRAPH_API_KEY")
DATA_PATH = "data/"

In [25]:
def find_pool_pairs(thegraph_api_key, location):
    """
    search in a directory with csv files with the naming convention pool_id_<address>.  Extract
    the address and then query for the metadata for the pool.  save the metadata for each file 
    and determine which ones are valid pairs (i.e. the token pairs).
    """
    pools = []

    for filename in [x for x in os.listdir(location) if x.find(f'.csv')!=-1]:

        #print(f"Reading: {filename}")
        pattern = r"pool_id_(.*?)_swap_final\.csv"
        match = re.search(pattern, filename)
        if match:
            address = match.group(1)
            #print(f"Found {address}")
            metadata = fetch.thegraph_request_pool_metadata(thegraph_api_key=thegraph_api_key, pool_address=address)
            pool = {
                'filename':f"{location}{filename}",
                'address':address,
                'feeTier':int(metadata['feeTier'])*1e-6,
                'token0':metadata['token0']['symbol'],
                'token1':metadata['token1']['symbol']
            }
            pools.append(pool)
        else:
            #ignore this mysterious csv.
            print(f"Ignoring {filename}")
    
    #print(f"Found {len(pools)} pools.")

    pair_to_addresses = {}
    matching_addresses = []
    
    for pool in pools:
        # Create a pair (order doesn't matter, so we use a tuple and sort it)
        pair = tuple(sorted([pool['token0'], pool['token1']]))
        address = pool['address']
        
        if pair not in pair_to_addresses:
            pair_to_addresses[pair] = []
        
        # Add the current address to the list of addresses for this pair
        pair_to_addresses[pair].append(address)
    
    # For each token pair, generate all possible combinations of addresses
    for addresses in pair_to_addresses.values():
        if len(addresses) > 1:
            matching_addresses.extend(list(combinations(addresses, 2)))
    
    matching_pools = []
    for addr0,addr1 in matching_addresses:
        
        pool_pairs = {'pool0':dict, 'pool1':dict}
        for pool in pools:
            if addr0 == pool['address']:
                pool_pairs['pool0'] = pool
            elif addr1 == pool['address']:
                pool_pairs['pool1'] = pool
        matching_pools.append(pool_pairs)
            
    print(f"Found {len(matching_pools)} valid pool pairs.")

    return matching_pools

In [32]:
pool_pairs_list = find_pool_pairs(GRAPH_API_KEY, DATA_PATH)
for pool_pair in pool_pairs_list:
    print(f"Pair: {pool_pair['pool0']['address']}, {pool_pair['pool1']['address']}")

Found 1 valid pool pairs.
Pair: 0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640, 0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8


In [33]:
# Loading the files from the directory
p0 = pd.read_csv(pool_pairs_list[0]['pool0']['filename'])
p1 = pd.read_csv(pool_pairs_list[0]['pool1']['filename'])

p0_fee_tier = pool_pairs_list[0]['pool0']['feeTier']
p1_fee_tier = pool_pairs_list[0]['pool1']['feeTier']

In [55]:
p0.columns

Index(['transactionHash', 'datetime', 'timeStamp', 'sqrtPriceX96',
       'blockNumber', 'gasPrice', 'gasUsed', 'tick', 'amount0', 'amount1',
       'liquidity'],
      dtype='object')

In [66]:
small_p0 = p0[['datetime','timeStamp','gasPrice','blockNumber','gasUsed','sqrtPriceX96','tick']].sample(10).sort_values(by='datetime')
small_p1 = p1[['datetime','timeStamp','gasPrice','blockNumber','gasUsed','sqrtPriceX96','tick']].sample(10).sort_values(by='datetime')

In [68]:
pools = pd.merge(small_p0, small_p1, on=['datetime','timeStamp','gasPrice','blockNumber'],how='outer')
pools

Unnamed: 0,datetime,timeStamp,gasPrice,blockNumber,gasUsed_x,sqrtPriceX96_x,tick_x,gasUsed_y,sqrtPriceX96_y,tick_y
0,2024-12-19 00:13:23+00:00,1734567203,15208800000.0,21432919,,,,205175.0,1.313314e+33,194324.0
1,2024-12-19 18:50:11+00:00,1734634211,54954980000.0,21438465,,,,297857.0,1.34312e+33,194773.0
2,2024-12-20 02:42:11+00:00,1734662531,17924740000.0,21440816,,,,121780.0,1.350671e+33,194885.0
3,2024-12-20 10:19:35+00:00,1734689975,38079750000.0,21443091,269502.0,1.398178e+33,195576.0,,,
4,2024-12-21 09:52:23+00:00,1734774743,8534123000.0,21450106,,,,286324.0,1.344915e+33,194799.0
5,2024-12-22 20:21:59+00:00,1734898919,9567340000.0,21460391,211559.0,1.393072e+33,195503.0,,,
6,2024-12-24 18:54:59+00:00,1735066499,8366211000.0,21474249,461277.0,1.340213e+33,194729.0,,,
7,2024-12-25 15:47:47+00:00,1735141667,8787449000.0,21480481,358912.0,1.344746e+33,194797.0,,,
8,2024-12-26 08:57:23+00:00,1735203443,21778020000.0,21485589,119699.0,1.365755e+33,195107.0,,,
9,2024-12-26 21:10:23+00:00,1735247423,79352860000.0,21489235,,,,119755.0,1.376078e+33,195258.0


In [73]:
# Rename columns
pools.rename(
    columns=lambda col: f"p0.{col.replace('_x', '')}" if '_x' in col else
                        f"p1.{col.replace('_y', '')}" if '_y' in col else col
)

Unnamed: 0,datetime,timeStamp,gasPrice,blockNumber,p0.gasUsed,p0.sqrtPriceX96,p0.tick,p1.gasUsed,p1.sqrtPriceX96,p1.tick
0,2024-12-19 00:13:23+00:00,1734567203,15208800000.0,21432919,,,,205175.0,1.313314e+33,194324.0
1,2024-12-19 18:50:11+00:00,1734634211,54954980000.0,21438465,,,,297857.0,1.34312e+33,194773.0
2,2024-12-20 02:42:11+00:00,1734662531,17924740000.0,21440816,,,,121780.0,1.350671e+33,194885.0
3,2024-12-20 10:19:35+00:00,1734689975,38079750000.0,21443091,269502.0,1.398178e+33,195576.0,,,
4,2024-12-21 09:52:23+00:00,1734774743,8534123000.0,21450106,,,,286324.0,1.344915e+33,194799.0
5,2024-12-22 20:21:59+00:00,1734898919,9567340000.0,21460391,211559.0,1.393072e+33,195503.0,,,
6,2024-12-24 18:54:59+00:00,1735066499,8366211000.0,21474249,461277.0,1.340213e+33,194729.0,,,
7,2024-12-25 15:47:47+00:00,1735141667,8787449000.0,21480481,358912.0,1.344746e+33,194797.0,,,
8,2024-12-26 08:57:23+00:00,1735203443,21778020000.0,21485589,119699.0,1.365755e+33,195107.0,,,
9,2024-12-26 21:10:23+00:00,1735247423,79352860000.0,21489235,,,,119755.0,1.376078e+33,195258.0
