# Running Poison Hunter Algorithm with Google BigQuery's Ethereum Data 

We run the poison hunter detection algorithm for the USDC, USDT tokens on the Ethereum mainnet. This notebook provides all parts

## Consolidated Query

All queries are written in BigQuery Standard SQL & require access to Google's Bigquery as a prerequisite. 
Make sure to adjust the analysis_start and analysis_end as needed 

In [None]:
%% bigquery 
-- 1. CONFIGURATION
DECLARE analysis_start TIMESTAMP DEFAULT TIMESTAMP("2023-01-22 00:00:00 UTC");
DECLARE analysis_end   TIMESTAMP DEFAULT TIMESTAMP("2023-01-23 00:00:00 UTC");

-- Logic Settings
DECLARE lookahead_blocks INT64 DEFAULT 100; 
DECLARE prefix_len INT64 DEFAULT 3;
DECLARE suffix_len INT64 DEFAULT 4;
DECLARE dust_threshold FLOAT64 DEFAULT 1.0; -- Upper limit for a "Tiny" transfer ($1.00)

-- Token Contracts (USDC, USDT)
DECLARE usdc_addr STRING DEFAULT "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48";
DECLARE usdt_addr STRING DEFAULT "0xdac17f958d2ee523a2206206994597c13d831ec7";

-- 2. DATA EXTRACTION
WITH raw_transfers AS (
    SELECT 
        transaction_hash,
        block_number,
        block_timestamp,
        LOWER(address) as token_address,
        LOWER(from_address) as from_addr,
        LOWER(to_address) as to_addr,
        SAFE_CAST(quantity AS BIGNUMERIC) / 1e6 AS value_usd,
        CASE WHEN LOWER(address) = usdc_addr THEN 'USDC' ELSE 'USDT' END AS token_symbol
    FROM `bigquery-public-data.goog_blockchain_ethereum_mainnet_us.token_transfers`
    WHERE 
        block_timestamp BETWEEN analysis_start AND TIMESTAMP_ADD(analysis_end, INTERVAL 1 HOUR)
        AND LOWER(address) IN (usdc_addr, usdt_addr)
),

-- 3. IDENTIFY PAIRS
attack_pairs AS (
    SELECT
        S.transaction_hash AS attack_tx_hash,
        S.block_number AS attack_block,
        S.block_timestamp AS attack_ts,
        S.token_symbol,
        S.value_usd,
        
        -- NEW COLUMN: Attack Type
        CASE 
            WHEN S.value_usd = 0 THEN 'zero_value'
            ELSE 'tiny_transfer'
        END AS attack_type,

        -- Attacker Labeling
        CASE 
            WHEN S.from_addr = V.from_addr THEN S.to_addr 
            ELSE S.from_addr 
        END AS attacker_address,

        V.from_addr AS victim_address,
        V.to_addr AS intended_address,
        V.transaction_hash AS intended_tx_hash,
        V.block_timestamp AS intended_ts,
        V.block_number AS intended_block,
        
        CASE WHEN S.from_addr = V.from_addr THEN 'spoof_outgoing' ELSE 'spam_incoming' END AS direction

    FROM raw_transfers AS V -- History
    JOIN raw_transfers AS S -- Attack
        ON S.token_address = V.token_address
        AND S.block_number >= V.block_number
        AND S.block_number <= (V.block_number + lookahead_blocks)
    WHERE
        V.value_usd > 0
        AND V.block_timestamp BETWEEN analysis_start AND analysis_end
        
        -- MODIFIED FILTER: Allow 0 OR Small Values
        AND S.value_usd <= dust_threshold
        
        AND (
            (S.from_addr = V.from_addr)
            OR
            (S.to_addr = V.from_addr)
        )
        AND S.transaction_hash != V.transaction_hash
)

-- 4. FINAL SELECTION
SELECT 
    attack_tx_hash,
    attack_block,
    attack_ts,
    token_symbol,
    attack_type, -- <--- Included in final output
    value_usd,   -- <--- Good to verify the actual amount
    direction,
    attacker_address,
    victim_address,
    intended_address,
    intended_ts,
    (attack_block - intended_block) AS blocks_delay
FROM attack_pairs
WHERE
    SUBSTR(attacker_address, 3, prefix_len) = SUBSTR(intended_address, 3, prefix_len)
    AND
    SUBSTR(attacker_address, -1 * suffix_len) = SUBSTR(intended_address, -1 * suffix_len)
    AND attacker_address != intended_address

-- DEDUPLICATION
QUALIFY ROW_NUMBER() OVER(
    PARTITION BY attack_tx_hash, attacker_address, victim_address, direction
    ORDER BY intended_ts DESC
) = 1
ORDER BY attack_ts, attack_tx_hash

Deduplicaiton here is necessary to make sure we capture the case where an attacker sends multiple spoof attempts in 1 transaction to save on gas fees. 