## Etherscan Notebook 

Looking at how to derive price for tokens and gas fees by pool.  Pool example can be found [here](https://app.uniswap.org/explore/pools/ethereum/0x88e6A0c2dDD26FEEb64F039a2c41296FcB3f5640)

In [1]:
import requests
import time 

import pandas as pd
import numpy as np
from web3 import Web3


In [2]:
ETHERSCAN_API_KEY = '16FCD3FTVWC3KDK17WS5PTWRQX1E2WEYV2'
ETHERSCAN_URL = "https://api.etherscan.io/api"
POOL0_ADDRESS="0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640" # USDC / WETH (0.05%) 
POOL0_TXN_FEE = 0.0005
POOL1_ADDRESS="0x8ad599c3a0ff1de082011efddc58f1908eb6e6d8" # USDC / WETH (0.3%)
POOl1_TXN_FEE = 0.003

GWEI_SCALER = 1e9

In [3]:
def fetch_latest_block(url, api_key):
    # Parameters for the API request
    params = {
        "module": "proxy",
        "action": "eth_blockNumber",
        "apikey": api_key
    }
    
    # Make the request
    response = requests.get(url, params=params)
    
    if response.status_code != 200:
        #st.error(f"API request failed with status code {response.status_code}")
        (f"API request failed with status code {response.status_code}") 
        return None
    
    # Handle the response
    if response.status_code == 200:
        result = response.json()
        if result.get("status") != "0" and "result" in result:
            latest_block = int(result['result'], 16)  # Convert hex to int
            #print(f"Latest Block Number: {latest_block}")
            return latest_block
        else:
            print(f"Unexpected response or error: {result.get('message', 'Unknown error')}")
            return None
    else:
        print(f"Failed to fetch the latest block. Status code: {response.status_code}")
        return None
    


In [4]:
def fetch_data(url, api_key, pool_address, start_block, end_block):
    params = {
        'module': 'account',
        'action': 'tokentx',
        'address': pool_address,
        'startblock': startblock,
        'endblock': endblock,
        'sort': 'desc',
        'apikey': api_key
    }
    
    response = requests.get(url, params=params)
    if response.status_code != 200:
        #st.error(f"API request failed with status code {response.status_code}")
        raise Exception(f"API request failed with status code {response.status_code}")
    
    data = response.json()
    if data['status'] != '1':
        #st.error(f"API returned an error: {data['result']}")
        raise Exception(f"API returned an error: {data['result']}")
    
    df = pd.DataFrame(data['result'])
    
    expected_columns = ['hash', 'blockNumber', 'timeStamp', 'from', 'to', 'gas', 'gasPrice', 'gasUsed', 'cumulativeGasUsed', 'confirmations', 'tokenSymbol', 'value', 'tokenName']
    
    for col in expected_columns:
        if col not in df.columns:
            raise Exception(f"Expected column '{col}' is missing from the response")
    
    df.sort_values(by='timeStamp')
    
    consolidated_data = {}

    for index, row in df.iterrows():
        tx_hash = row['hash']
        
        if tx_hash not in consolidated_data:
            consolidated_data[tx_hash] = {
                'blockNumber': row['blockNumber'],
                'timeStamp': row['timeStamp'],
                'hash': tx_hash,
                'from': row['from'],
                'to': row['to'],
                'WETH_value': 0,
                'USDC_value': 0,
                'tokenName_WETH': '',
                'tokenName_USDC': '',
                'gas': row['gas'],
                'gasPrice': row['gasPrice'],
                'gasUsed': row['gasUsed'],
                'cumulativeGasUsed': row['cumulativeGasUsed'],
                'confirmations': row['confirmations']
            }
        
        if row['tokenSymbol'] == 'WETH':
            consolidated_data[tx_hash]['WETH_value'] = row['value']
            consolidated_data[tx_hash]['tokenName_WETH'] = row['tokenName']
        elif row['tokenSymbol'] == 'USDC':
            consolidated_data[tx_hash]['USDC_value'] = row['value']
            consolidated_data[tx_hash]['tokenName_USDC'] = row['tokenName']

    return pd.DataFrame.from_dict(consolidated_data, orient='index')

In [5]:
# fetch block numbers for the last hour.
# Assumes that there is at least one transaction from both pools
# and more than ten transactions total.
endblock = fetch_latest_block(ETHERSCAN_URL, ETHERSCAN_API_KEY)
startblock = endblock-5 * 60
if endblock: 
    print(f"Start Block: {startblock}")
    print(f"Stop Block: {endblock}")

try: 
    p0 = fetch_data(ETHERSCAN_URL, ETHERSCAN_API_KEY, POOL0_ADDRESS, startblock, endblock)
    endtime = int(p0['timeStamp'].iloc[0])
    beginningtime = int(p0['timeStamp'].iloc[-1])
    print(f"Successfully fetched {p0.shape[0]} swaps in the last {(endtime -beginningtime)/60:.2f} minutes.")
    
    p1 = fetch_data(ETHERSCAN_URL, ETHERSCAN_API_KEY, POOL1_ADDRESS, startblock, endblock)
    endtime = int(p1['timeStamp'].iloc[0])
    beginningtime = int(p1['timeStamp'].iloc[-1])
    print(f"Successfully fetched {p1.shape[0]} swaps in the last {(endtime -beginningtime)/60:.2f} minutes.")
except Exception as e: 
    print(e)

Start Block: 21616435
Stop Block: 21616735
Successfully fetched 332 swaps in the last 59.80 minutes.
Successfully fetched 36 swaps in the last 57.00 minutes.


In [6]:
p0.columns

Index(['blockNumber', 'timeStamp', 'hash', 'from', 'to', 'WETH_value',
       'USDC_value', 'tokenName_WETH', 'tokenName_USDC', 'gas', 'gasPrice',
       'gasUsed', 'cumulativeGasUsed', 'confirmations'],
      dtype='object')

In [7]:
p0.head()

Unnamed: 0,blockNumber,timeStamp,hash,from,to,WETH_value,USDC_value,tokenName_WETH,tokenName_USDC,gas,gasPrice,gasUsed,cumulativeGasUsed,confirmations
0x3a0b26ce48e22e0d85d03fd19a1b20e9a4b98991ca51fd6f9308c7cb61cfdc94,21616733,1736785415,0x3a0b26ce48e22e0d85d03fd19a1b20e9a4b98991ca51...,0x51c72848c68a965f66fa7a88855f9f7784502a7f,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,27249766816706695168,82346212477,Wrapped Ether,USDC,195172,67614131305,150133,1159417,2
0x86f1efebcc3b43277d9ae2eb6c39d12038959931481c2c85114ee1746a066e4b,21616729,1736785367,0x86f1efebcc3b43277d9ae2eb6c39d12038959931481c...,0x51c72848c68a965f66fa7a88855f9f7784502a7f,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,27112143223736328192,81992133952,Wrapped Ether,USDC,194738,64296131345,149799,540582,6
0xdeddd6c1caede5d31439297a4ca64afe1051f505817a3ca1c189b9758f5e1f10,21616728,1736785355,0xdeddd6c1caede5d31439297a4ca64afe1051f505817a...,0x51c72848c68a965f66fa7a88855f9f7784502a7f,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,17330601988748953600,52443272860,Wrapped Ether,USDC,155650,43280139163,119731,389560,7
0x938d0500adf0f3e6cb003f0e91b4f8599f1b000faaccff4a394349a1efef3ff9,21616727,1736785343,0x938d0500adf0f3e6cb003f0e91b4f8599f1b000faacc...,0x6fb9792b8445d75d9b909b43394b8c1a62876994,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,99016338953940953,300000000,Wrapped Ether,USDC,334623,16013159787,244593,3005175,8
0x233e1d1b361249952046cd9f1a9dedba5fdba667f7b60323e1904e645ab3de2c,21616726,1736785331,0x233e1d1b361249952046cd9f1a9dedba5fdba667f7b6...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,97000000000000000,293597064,Wrapped Ether,USDC,430261,16037797525,307861,2785087,9


Now that I have information at the transaction level.  I would like to parse specific transactions to extract the sqrtPriceX96 value which is the only value missing for our current model.  Also, assuming that I am doing this for inference, I only need transactions that fit the following criteria: (1) at least one transactions from each pool (because of the forward fill), (2) at least 10 transactions total (to ensure we can do lags and moving averages for feature extraction). 

For now, I will just truncate both to use only the last ten from each pool.  This is a shortcut for inference only that allows redunancy in the fetch for data from logs, allows for forward fill of known values, etc.

In [8]:
p0 = p0.iloc[-10:]
p1 = p1.iloc[-10:]

**Fetch sqrtPriceX96 values for the first 10 transactions in the returned list.**

In [9]:
def decode_block_data(url, swap_tx_data, block_number,transaction_hash,datatype_list,verbose=False):
    """
    
    # Call for a single block
    print(f"Block Number: {block_number}")
    swap_tx_data = pool1_tx_data[0]['data']
    block_number = int(pool1_tx_data[0]['blockNumber'],16)
    block_data = decode_block_data(swap_tx_data,block_number,verbose=True)
    
    """
    w3 = Web3(Web3.HTTPProvider(url))

    amount0, amount1, sqrtPriceX96, liquidity, tick = w3.eth.codec.decode(datatype_list,bytes.fromhex(swap_tx_data[2:]))
    return sqrtPriceX96

# Function to fetch logs from a block
def fetch_logs_for_block(url, api_key, block_number, pool_address):
    params = {
        "module": "logs",
        "action": "getLogs",
        "fromBlock": block_number,
        "toBlock": block_number,
        "address": pool_address,
        "apikey": api_key,
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    if data.get("status") == "1" and "result" in data:
        return data["result"]
    else:
        raise ValueError(f"Error fetching logs for block {block_number}: {data.get('message', 'Unknown error')}")

# Decode the sqrtPriceX96 from the logs
def decode_sqrt_price_x96(url, logs):
    sqrt_prices = []

    #import pdb
    #pdb.set_trace()
    
    # Uniswap V3 Swap event ABI
    swap_event_abi = {
        "anonymous": False,
        "inputs": [
            {"indexed": True, "internalType": "address", "name": "sender", "type": "address"},
            {"indexed": False, "internalType": "int256", "name": "amount0", "type": "int256"},
            {"indexed": False, "internalType": "int256", "name": "amount1", "type": "int256"},
            {"indexed": False, "internalType": "uint160", "name": "sqrtPriceX96", "type": "uint160"},
            {"indexed": False, "internalType": "uint128", "name": "liquidity", "type": "uint128"},
            {"indexed": False, "internalType": "int24", "name": "tick", "type": "int24"}
        ],
        "name": "Swap",
        "type": "event"
    }
    # For Decoding Data Field
    datatype_list = [i['type'] for i in swap_event_abi['inputs'] if not i['indexed']]
    raw_data = []
    for log in logs:
        if log["topics"][0] == "0xc42079f94a6350d7e6235f29174924f928cc2ac818eb64fed8004e115fbcca67":
            block = int(log['blockNumber'])
            data = log['data']
            transaction_hash=log['transactionHash']
            raw_data.append(decode_block_data(url,data,block,transaction_hash,datatype_list))
            
            # Extract sqrtPriceX96 from the data field (last 32 bytes)
            sqrt_price_x96 = int(data[-64:], 16)
            sqrt_prices.append(sqrt_price_x96)
    return raw_data



def fetch_sqrtPriceX96(block_numbers, time_stamps):
    results = {'timeStamp':[], 'blockNumber':[], 'sqrtPriceX96':[]}

    for block,timestamp in zip(block_numbers,time_stamps):
        try:
            logs = fetch_logs_for_block(ETHERSCAN_URL, ETHERSCAN_API_KEY, block, POOL0_ADDRESS)
            sqrt_prices = decode_sqrt_price_x96(ETHERSCAN_URL, logs)
            for sqrt_price in sqrt_prices:
                results['timeStamp'].append(timestamp)
                results['blockNumber'].append(block)
                results['sqrtPriceX96'].append(sqrt_price)
            print(".",end='')
                
        except Exception as e:
            print(f"Error processing block {block}: {e} (ignoring for now)")

    return results

start_time = time.time()
p0_sqrtPriceX86 = fetch_sqrtPriceX96(list(p0['blockNumber']), list(p0['timeStamp']))
p0 = p0.merge(pd.DataFrame(p0_sqrtPriceX86),on=['timeStamp','blockNumber'],how='left').dropna().reset_index(drop=True)
p1_sqrtPriceX86 = fetch_sqrtPriceX96(list(p1['blockNumber']), list(p1['timeStamp']))
p1 = p1.merge(pd.DataFrame(p1_sqrtPriceX86),on=['timeStamp','blockNumber'],how='left').dropna().reset_index(drop=True)
finish_time = time.time()
print(f"Completed fetching {len(list(p0['blockNumber']))} blocks in {finish_time-start_time}s")

Error processing block 21616443: invalid literal for int() with base 10: '0x149d73b' (ignoring for now)
Error processing block 21616441: invalid literal for int() with base 10: '0x149d739' (ignoring for now)
Error processing block 21616440: invalid literal for int() with base 10: '0x149d738' (ignoring for now)
Error processing block 21616438: invalid literal for int() with base 10: '0x149d736' (ignoring for now)
Error processing block 21616438: invalid literal for int() with base 10: '0x149d736' (ignoring for now)
Error processing block 21616436: invalid literal for int() with base 10: '0x149d734' (ignoring for now)
Error processing block 21616436: invalid literal for int() with base 10: '0x149d734' (ignoring for now)
Error processing block 21616436: invalid literal for int() with base 10: '0x149d734' (ignoring for now)
Error processing block 21616435: invalid literal for int() with base 10: '0x149d733' (ignoring for now)
Error processing block 21616435: invalid literal for int() with 

In [10]:
p1.head(10)

Unnamed: 0,blockNumber,timeStamp,hash,from,to,WETH_value,USDC_value,tokenName_WETH,tokenName_USDC,gas,gasPrice,gasUsed,cumulativeGasUsed,confirmations,sqrtPriceX96


**Get price within the Pool in ETH/USDC**

To derive the price for the pool in ETH/USDC, you must use the sqrtPriceX96 value, which is the pool price immediately after the transaction takes place (including slippage).  You can see below that there is almost always a descrepency, but its not always enough to over come transaction and gas fees (see below).

In [11]:
# row to pick for the swap...used just for the example.
pool0_price_in_USDC_per_ETH  = ((p0["sqrtPriceX96"].iloc[0] / 2**96)**2 / 1e12) **-1
pool1_price_in_USDC_per_ETH  = ((p1["sqrtPriceX96"].iloc[0] / 2**96)**2 / 1e12) **-1

print(f"Pool 0 Price in USDC per ETH (at Tx: 0x...{p0['hash'].iloc[0][-4:]}): ${pool0_price_in_USDC_per_ETH:.2f}")
print(f"Pool 1 Price in USDC per ETH (at Tx: 0x...{p1['hash'].iloc[0][-4:]}): ${pool1_price_in_USDC_per_ETH:.2f}")
print(f"Difference in price: ${pool1_price_in_USDC_per_ETH-pool0_price_in_USDC_per_ETH:.2f}")

IndexError: single positional indexer is out-of-bounds

**Get gas fees in ETH**

Gas fees for a transaction include all the 'work' done. There is a rate of fee per unit of work (i.e. gasPrice) and then there is the work done (i.e. gasUsed).  gasPrice and gasUsed is in gwei which is 1e9 of an ETH.  so to convert to eth, each value needs to be converted with the 1e9 scaling.

In [None]:
gas_price_eth_tokens_per_unit = int(p0['gasPrice'].iloc[0])/GWEI_SCALER
gas_used_units = int(p0['gasUsed'].iloc[0]) / GWEI_SCALER
gas_fees_eth_tokens  = gas_price_eth_tokens_per_unit* gas_used_units
gas_fees_usdc_tokens = pool0_price_in_USDC_per_ETH * gas_fees_eth_tokens 
print(f"Gas Price in ETH per unit: {gas_price_eth_tokens_per_unit}")
print(f"Gas Used in GWEI units for Uniswap Transaction: {gas_used_units}")
print(f"Gas fees for this Transaction in ETH: {gas_fees_eth_tokens:.5f} (ETH)")
print(f"Gas fees for this Transaction in USDC: ${gas_fees_usdc_tokens:.2f} (USDC)")