# Ethereum data sanity check 

We run the below sql queries on Tsuchiya et al's data to find the # of attack attempts on USDT, USDC contracts on Ethereum in a given time period. This gives us a way to evaluate the accuracy of our algorithm  
Note: You may need to chunk the raw dataset to load it locally into PSQL

## Setup: Find Block #s of interest 

Authors noted that attacks didn't begin until around December  

In [1]:
from web3 import Web3
from web3.middleware import ExtraDataToPOAMiddleware

In [2]:
# Setup RPC endpoint
RPC_URL = "https://ethereum-rpc.publicnode.com"
w3_eth = Web3(Web3.HTTPProvider(RPC_URL))
print(f"Is connected: {w3_eth.is_connected()}")

Is connected: True


In [3]:
def get_block_by_timestamp(target_timestamp, start_search_from_block=0):
    """
    Finds the block closest to the target_timestamp.
    Optimization: Starts the binary search at start_search_from_block 
    instead of block 0.
    """
    # 1. Get the latest block to set the upper limit
    latest_block = w3_eth.eth.get_block('latest')
    high = latest_block['number']
    
    # 2. Set the lower limit to your input block
    low = start_search_from_block

    # Edge Case: Ensure valid bounds
    if low > high:
        raise ValueError("Start block cannot be higher than the latest block.")

    closest_block = None
    min_diff = float('inf')

    print(f"Starting binary search between block {low} and {high}...")

    while low <= high:
        mid = (low + high) // 2
        
        # Fetch the block at the middle point
        try:
            mid_block = w3_eth.eth.get_block(mid)
        except Exception as e:
            print(f"Error fetching block {mid}: {e}")
            break
            
        mid_time = mid_block['timestamp']
        diff = abs(target_timestamp - mid_time)

        # Track the closest match found so far
        if diff < min_diff:
            min_diff = diff
            closest_block = mid

        # Exact match found
        if mid_time == target_timestamp:
            return mid

        # Adjust binary search bounds
        if mid_time < target_timestamp:
            low = mid + 1      # Target is in the future relative to mid
        else:
            high = mid - 1     # Target is in the past relative to mid

    return closest_block

# --- Usage Example ---



In [5]:
# 1. Define the target: Nov 1, 2022 1:00:00 AM GTM (Start of attacks)
target_start_time = 1672534800 

# 2. Optimization: Start searching from July 2022 (Start of dataset)
# Block 15,053,226 is roughly July 1, 2022
known_start_block = 15053226 

found_block = get_block_by_timestamp(target_start_time, start_search_from_block=known_start_block)

print(f"------------------------------------------------")
print(f"Target Timestamp: {target_start_time}")
print(f"Closest Block Found: {found_block}")

# find end block (Feb-01-2023 12:59:59 AM +UTC) (End of attacks)
target_end_time = 1675213200 

end_block = get_block_by_timestamp(target_end_time, start_search_from_block=found_block)

print(f"------------------------------------------------")
print(f"Target End Timestamp: {target_end_time}")
print(f"Closest Block Found: {end_block}")


Starting binary search between block 15053226 and 23956378...
------------------------------------------------
Target Timestamp: 1672534800
Closest Block Found: 16308487
Starting binary search between block 16308487 and 23956378...
------------------------------------------------
Target End Timestamp: 1675213200
Closest Block Found: 16530546


## SQL Queries

In [None]:
%%sql 
SELECT COUNT(*)
FROM public.address_poisoning_ethereum
WHERE addr IN ('0xdAC17F958D2ee523a2206206994597C13D831ec7',
               '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48');

In [None]:
 count   
----------
 24384541 

In [None]:
% sql
SELECT COUNT(*)
FROM public.address_poisoning_ethereum;

In [None]:
  count   
----------
 34905969

Now we find the # of zero value transfer attack attempts within our three month period

In [None]:
%% sql 
SELECT *
FROM public.address_poisoning_ethereum
WHERE addr IN ('0xdAC17F958D2ee523a2206206994597C13D831ec7',
               '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48')
AND block_number >= 16308487
AND block_number < 16530546
AND intended_transfer = FALSE 
AND zero_value_transfer = TRUE;
LIMIT 100

Now we find the # of intended tiny transfer (dust) attack attempts within our three month period

In [None]:
%% sql 
SELECT COUNT(*) AS row_count
FROM public.address_poisoning_ethereum
WHERE addr IN ('0xdAC17F958D2ee523a2206206994597C13D831ec7',
               '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48')
AND block_number >= 15871779
AND block_number < 16530546
AND intended_transfer = FALSE 
AND tiny_transfer = TRUE;

In [None]:
Output
 row_count 
-----------
         5