In [80]:
import asyncio
import aiohttp
import pandas as pd
import time
from goplus.token import Token

# Counterfeit token transfers 

### Script used to find counterfeit USDT & USDC tokens 

```sql
WITH
  lookalikes AS (
    SELECT
      address AS token_address,
      name,
      symbol
    FROM
      `bigquery-public-data`.crypto_ethereum.tokens
    WHERE
      (REGEXP_CONTAINS(symbol, r'(?i)usdt|usdc') OR REGEXP_CONTAINS(symbol, r'(?i)^([0OÞ]+[.:]?[ -]?)?u[5s$]+[.: -]?[dt][t7]+') OR
      REGEXP_CONTAINS(symbol, r'(?i)^([0OÞ]+[.:]?[ -]?)?u[5s$]+[.: -]?[dc][.:]?[c7]?') OR REGEXP_CONTAINS(symbol,
        r'(?i)([UУ][5ЅS$][DĐ])[TТ7]|[UУ][5ЅS$][DĐ][СC]') OR symbol LIKE '%USDТ%' OR symbol LIKE '%USDT%' OR
      symbol LIKE '%USDС%' OR symbol LIKE '%USDC%' OR REGEXP_CONTAINS(symbol, r'(?i)(usdt|usdc).*(gift|earn|bonus|claim|airdrop|reward|2\.0|202[4-9]|v2|new|official|pro|max|global|finance|pay|cash|eth|bridge|swap)') OR
      REGEXP_CONTAINS(symbol, r'(?i)tdsu|cdsu|tusd|cusd') OR REGEXP_CONTAINS(symbol, r'(?i)⊙|Ⓤ|usd[tс] ?⊗|usdt *⊸|usdc *©|usdt *™') OR
      REGEXP_CONTAINS(symbol, r'[\x{200B}-\x{200D}\x{2060}\x{180E}\x{00AD}\x{200E}\x{200F}\x{202A}-\x{202E}\x{2066}-\x{2069}]')) AND
      UPPER(symbol) NOT IN ('USDC', 'USDT', 'WUSDC', 'AUSDC', 'CUSDC', 'CUSDT', 'STGUSDC') AND UPPER(name) NOT LIKE '%CIRCLE%' 
      -- actual USDC, Addressse
    AND address NOT IN (
      # test 
      '0xdac17f958d2ee523a2206206994597c13d831ec7',  # Ethereum USDT
    
    # USDC (Circle)
    '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48',  # Ethereum native USDC
    '0x8ac76a51cc950d9822d68b83fe1ad97b32cd580d',  # Ethereum USDC.e (bridged)
    
    # Base USDC
    '0x1c7d4b196cb0c7b01d743fbc6116a902379c7238'
    )
  )
SELECT DISTINCT
  l.token_address,
  l.name,
  l.symbol
FROM
  lookalikes AS l
  INNER JOIN
  `bigquery-public-data`.crypto_ethereum.token_transfers AS t
  ON LOWER(t.token_address) = LOWER(l.token_address)
WHERE
  t.block_timestamp >= TIMESTAMP("2024-07-01") AND t.block_timestamp < TIMESTAMP("2025-11-01")
ORDER BY l.symbol;

```

### Find true positives in counterfeit token set 

In [82]:
# Candidate Counterfeit tokens from 7/24 to 10/25
CANDIDATES = "data/candidate_counterfeit_tokens.csv"

# Read CSV
raw_candiates = pd.read_csv(CANDIDATES)
# Go plus labs API config
client = Token(access_token=None)
CHAIN_ID = "1"

In [83]:
# sample
response = client.token_security(chain_id=CHAIN_ID, addresses=['0xdeff2caab5afd843b5be40358edc743571fb54e75ee6c25e2a050d47381bebb8'])
result_dict = response.result  # this MUST be a dict
# first_key = next(iter(result_dict))
# data = result_dict[first_key]
# print(response)
# creator_address
print(response)
# print(response)

{'code': 1, 'message': 'OK', 'result': None}


In [84]:
# Helper function that calls the token_security endpoint given an address

MAX_PER_MIN = 20        # rate limit
semaphore = asyncio.Semaphore(MAX_PER_MIN)

async def check_token(addr):
    async with semaphore:
        try:
            response = client.token_security(chain_id=CHAIN_ID, addresses=[addr])
            # access the result with the SDK wrapper
            info = response.result
            first_key = next(iter(info))
            data = info[first_key]
            if response.message != "OK" or data is None:
                print("-----failed to get data for", addr, "-------")
                print("first key", first_key)
                print("message", response.message)
                print(response)
                return None
            elif data.fake_token is None:
                print(addr,"was not labeled a scam")
                return None
            # scam case 
            print(addr, "was labeled a scam")
            row = {
                "token_address": addr,
                "token_symbol": data.token_symbol,
                "creator_address": data.creator_address,
                "is_airdrop_scam": data.is_airdrop_scam,
                "is_honeypot": data.is_honeypot,
            }

            return row

        except Exception as e:
            print(f"ERROR on {addr}: {e}")
            return {"token_address": addr, "error": str(e)}

In [85]:
addresses = raw_candiates["token_address"].astype(str).str.lower().unique().tolist()
tasks = [check_token(addr) for addr in addresses]

print(f"Checking {len(tasks)} tokens (30/min)...")

results = []
for i in range(0, len(tasks), MAX_PER_MIN):
    batch = tasks[i:i + MAX_PER_MIN]
    print(f"Processing {i//MAX_PER_MIN + 1}/{(len(tasks)-1)//MAX_PER_MIN + 1} batch...")
    results.extend(await asyncio.gather(*batch))
    if i + MAX_PER_MIN < len(tasks):
        print("Sleeping 60s for rate limit...")
        await asyncio.sleep(60)

# filter out None rows
results = [r for r in results if r is not None]

df = pd.DataFrame(results)

Checking 111 tokens (30/min)...
Processing 1/6 batch...
0xcd5fb50c82590b9d11474ff19d085eba3c483e57 was not labeled a scam
0xeea6a9166a382db43cf429e6b232a6666332ef4c was not labeled a scam
0x479763d77cb82155a27fbb94c11f055064473e17 was labeled a scam
0x64a308881990c70826676b908e54054fac0f1a96 was labeled a scam
0x76251603b6c7df8d06e7fade0c47ccc1d31d801c was labeled a scam
0xae2b2b6b879caf2cf9340e0b96901973590654b6 was labeled a scam
0xd6a94978a51796e0b570446f5fe6f616fe08a4f2 was labeled a scam
0x6b951017a3212edc6b275976bfdd50969f6e1c62 was not labeled a scam
0x48d984dad97e29590e131598d0b27d5d49eaaeac was not labeled a scam
0x3242aebcdcf8de491004b1c98e6595e9827f6c17 was not labeled a scam
0x4827e558e642861cd7a1c8f011b2b4661f8d51fa was not labeled a scam
0xa861518c1be7e7aae65f203d6cbe23e2612c0fc3 was not labeled a scam
0x21efeb62dc81659a09786a57e75aeee43aaf4889 was not labeled a scam
0xa33ace59e4b0d9a45cd4a3f0dbab86d87bdd67e2 was not labeled a scam
0xe37afa3aa95e153b8dd0fe8456cbf345cb4c51

In [87]:
df

Unnamed: 0,token_address,token_symbol,creator_address,is_airdrop_scam,is_honeypot
0,0x479763d77cb82155a27fbb94c11f055064473e17,B᠎N᠎B,0x756ea0e357c9dca2e8bbb7ac53bb7537a2861099,,0.0
1,0x64a308881990c70826676b908e54054fac0f1a96,B᠎Ν᠎B,0x756ea0e357c9dca2e8bbb7ac53bb7537a2861099,,0.0
2,0x76251603b6c7df8d06e7fade0c47ccc1d31d801c,B᠎Ν᠎B,0x78b7fa4b19268ee10e3a3bd7aada077ba7888888,,0.0
3,0xae2b2b6b879caf2cf9340e0b96901973590654b6,B᠎Ν᠎B,0x0daa4798ee696ffb75f660ec8c4d1a87d7666666,,0.0
4,0xd6a94978a51796e0b570446f5fe6f616fe08a4f2,B᠎Ν᠎B,0xccb9b9db0ca22ed3781f19a9d2583dfe7a000000,,0.0
5,0x0ee853550d97b42e5e832b2e216271fe69ddcd45,USDT­,0xbaf4fed85bbd89616973bc6ee3679640bbc4c4d4,,0.0
6,0xc6e9d9a2663879add56acef4dd2e04b2ce1c04f6,USDŦ,0x9ce533cab5d8428d777bb603d753be423396cbc1,,
7,0xf3135c779ce72c9b67afa853c3a8699242826ed7,USDŦ,0x9ce533cab5d8428d777bb603d753be423396cbc1,,0.0
8,0x3784b5d7b9256a8ec773d4bceab7b1abdc5c5aba,USDС,0x9872a42904f831e1b83f73815f11766ebf64c604,,
9,0xd6ba0c40f6e29747086a1edbb7299852fae40707,USDС,0x702659cdeca87552fb445b7ab9146c9568bae1d8,,


In [88]:
df.to_csv("data/counterfeit_tokens.csv", index=False)