In [9]:
import pandas as pd
from goplus.token import Token

# Counterfeit token transfers 

### Script used to find counterfeit USDT & USDC tokens 

```sql
WITH
  lookalikes AS (
    SELECT
      address AS token_address,
      name,
      symbol
    FROM
      `bigquery-public-data`.crypto_ethereum.tokens
    WHERE
   -- 1. Contains "USDT" or "USDC" anywhere (case-insensitive)
      (REGEXP_CONTAINS(symbol, r'(?i)usdt|usdc') 
    -- 2. Starts with common scam prefixes + visual tricks
      OR REGEXP_CONTAINS(symbol, r'(?i)^([0OÞ]+[.:]?[ -]?)?u[5s$]+[.: -]?[dt][t7]+') OR
      -- 3. Homoglyphs and Cyrillic lookalikes (extremely common in scams)
      REGEXP_CONTAINS(symbol, r'(?i)^([0OÞ]+[.:]?[ -]?)?u[5s$]+[.: -]?[dc][.:]?[c7]?') 
      -- 4. Common scam suffixes people fall for
      OR REGEXP_CONTAINS(symbol,
        r'(?i)([UУ][5ЅS$][DĐ])[TТ7]|[UУ][5ЅS$][DĐ][СC]')
        -- 5. Reversed or almost reversed
         OR symbol LIKE '%USDТ%' 
         -- 6. Circled/spaced/suffixed versions
         OR symbol LIKE '%USDT%' OR
      symbol LIKE '%USDС%' OR symbol LIKE '%USDC%'
      -- 7. Contains zero-width or suspicious Unicode (very common in top-tier scams) 
      OR REGEXP_CONTAINS(symbol, r'(?i)(usdt|usdc).*(gift|earn|bonus|claim|airdrop|reward|2\.0|202[4-9]|v2|new|official|pro|max|global|finance|pay|cash|eth|bridge|swap)') OR
      REGEXP_CONTAINS(symbol, r'(?i)tdsu|cdsu|tusd|cusd') OR REGEXP_CONTAINS(symbol, r'(?i)⊙|Ⓤ|usd[tс] ?⊗|usdt *⊸|usdc *©|usdt *™') OR
      REGEXP_CONTAINS(symbol, r'[\x{200B}-\x{200D}\x{2060}\x{180E}\x{00AD}\x{200E}\x{200F}\x{202A}-\x{202E}\x{2066}-\x{2069}]')) 
      AND
      -- Still exclude the real ones so they don't drown the results
      UPPER(symbol) NOT IN ('USDC', 'USDT', 'WUSDC', 'AUSDC', 'CUSDC', 'CUSDT', 'STGUSDC') AND UPPER(name) NOT LIKE '%CIRCLE%' AND
      UPPER(name) NOT LIKE '%TETHER%' AND address NOT IN ('0xdac17f958d2ee523a2206206994597c13d831ec7', '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48',
        '0x8ac76a51cc950d9822d68b83fe1ad97b32cd580d')
  )
SELECT DISTINCT
  l.token_address,
  l.name,
  l.symbol
FROM
  lookalikes AS l
  INNER JOIN
  `bigquery-public-data`.crypto_ethereum.token_transfers AS t
  ON LOWER(t.token_address) = LOWER(l.token_address)
WHERE
  t.block_timestamp >= TIMESTAMP("2024-07-01") AND t.block_timestamp < TIMESTAMP("2025-11-01")
ORDER BY l.symbol;
```

### Find true positives in counterfeit token set 

In [None]:
# Counterfeit tokens from 7/24 to 10/25
ATTACK_DATA_PATH = "data/counterfeit_token_attack.csv"

# Read CSV
raw_type_typosquat = pd.read_csv(ATTACK_DATA_PATH)