In [2]:
# tronscan_usdt_downloader.py
import requests, time, math, json, os
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

In [3]:
# ---------- 설정값 (필수) ----------
TRONSCAN_URL = "https://apilist.tronscanapi.com/api/token_trc20/transfers"
USDT_CONTRACT = "TR7NHqjeKQxGTCi8q8ZY4pL8otSzgjLj6t"   # TRC20 USDT (공식)
OUT_DIR = "data_usdt"    # 결과 저장 폴더
os.makedirs(OUT_DIR, exist_ok=True)

In [4]:
# 페이징/레이트 관련 기본값 (환경·정책에 따라 조절)
PAGE_LIMIT = 200      # 한 페이지에 요청할 크기(최대 허용치는 변할 수 있으므로 실패시 20 등으로 낮춰 재시도)
SLEEP_BETWEEN_REQ = 0.25  # 초: 각 요청 사이 안전하게 쉬어주기
RETRY_TOTAL = 5

In [7]:
def dt_to_ms(dt: datetime) -> int:
    return int(dt.timestamp() * 1000)

def make_session():
    s = requests.Session()
    retry = Retry(total=RETRY_TOTAL, 
                  status_forcelist=[429,500,502,503,504],
                  backoff_factor=1,
                  allowed_methods=False)  # requests<2.26 compatibility
    s.mount("https://", HTTPAdapter(max_retries=retry))
    s.headers.update({"User-Agent": "tronscan-usdt-downloader/1.0 (+you@example.com)"})
    return s

def fetch_page(session, contract, start_ts_ms, end_ts_ms, start_idx=0, limit=200):
    params = {
        "contract_address": contract,
        "start_timestamp": start_ts_ms,
        "end_timestamp": end_ts_ms,
        "start": start_idx,
        "limit": limit,
        "confirm": "true"   # 확정된 거래만
    }
    r = session.get(TRONSCAN_URL, params=params, timeout=30)
    r.raise_for_status()
    data = r.json()
    # Tronscan 응답에서 실제 전송 배열 키는 "token_transfers"
    transfers = data.get("token_transfers", [])
    # rangeTotal(범위 내 전체)나 total 필드가 제공되는 경우가 있음(카운트 확인에 활용)
    total_est = data.get("rangeTotal") or data.get("total") or None
    return transfers, total_est

def fetch_window(session, contract, start_dt, end_dt, page_limit=PAGE_LIMIT, sleep_between=SLEEP_BETWEEN_REQ):
    start_ts = dt_to_ms(start_dt)
    end_ts = dt_to_ms(end_dt)
    out_rows = []
    idx = 0
    total_est = None
    while True:
        try:
            trfs, total_est = fetch_page(session, contract, start_ts, end_ts, start_idx=idx, limit=page_limit)
        except Exception as e:
            print("Request failed:", e)
            time.sleep(5)
            # 간단 재시도: 한 번 더
            trfs, total_est = fetch_page(session, contract, start_ts, end_ts, start_idx=idx, limit=page_limit)
        if not trfs:
            break
        for t in trfs:
            # 주요 필드 정리: (TX hash, from, to, amount(정수), tokenDecimal, block_ts, block)
            out_rows.append({
                "transaction_id": t.get("transaction_id") or t.get("transaction_hash") or t.get("txHash"),
                "from": t.get("from_address"),
                "to": t.get("to_address"),
                "quant": t.get("quant"),  # 정수(문자열)로 들어옴
                "block_ts": t.get("block_ts"),  # ms
                "block": t.get("block"),
                "contract_address": t.get("contract_address"),
                "token_decimal": (t.get("tokenInfo") or {}).get("tokenDecimal"),
                "token_symbol": (t.get("tokenInfo") or {}).get("tokenAbbr"),
            })
        idx += len(trfs)
        # 만약 API가 'rangeTotal'을 주면 빠르게 종료 판단 가능
        if total_est is not None and idx >= int(total_est):
            break
        # 페이지가 limit보다 적게 왔으면 마지막 페이지
        if len(trfs) < page_limit:
            break
        time.sleep(sleep_between)
    return out_rows, total_est

def sliding_window_download(start_dt, end_dt, window_td=timedelta(days=1)):
    """
    start_dt, end_dt: datetime 범위
    window_td: 각 청크(예: 하루, 1시간)
    """
    session = make_session()
    cur = start_dt
    part = 0
    summary = []
    while cur < end_dt:
        nxt = min(cur + window_td, end_dt)
        part += 1
        print(f"=== Window #{part}: {cur.isoformat()}  ~  {nxt.isoformat()} ===")
        rows, total_est = fetch_window(session, USDT_CONTRACT, cur, nxt)
        if not rows:
            print("  -> no transfers in window.")
            cur = nxt
            continue
        df = pd.DataFrame(rows)
        # quant(문자열) -> decimal 적용: amount = int(quant) / (10**token_decimal)
        def parse_amount(r):
            try:
                q = int(r["quant"])
                d = int(r["token_decimal"]) if r["token_decimal"] not in (None, "") else 6
                return q / (10**d)
            except Exception:
                return None
        df["amount"] = df.apply(parse_amount, axis=1)
        df["ts_iso"] = pd.to_datetime(df["block_ts"], unit="ms")
        # 저장 (파케이 권장(압축·열지향))
        out_path = os.path.join(OUT_DIR, f"usdt_{cur.strftime('%Y%m%dT%H%M')}_{nxt.strftime('%Y%m%dT%H%M')}.parquet")
        df.to_parquet(out_path, index=False)
        print(f"  -> saved {len(df)} rows to {out_path} (est total in window: {total_est})")
        summary.append((cur, nxt, len(df), total_est, out_path))
        cur = nxt
    return summary

In [12]:
# ---------- 예시 사용 ----------
if __name__ == "__main__":
    # 샘플: 1일치(테스트) - 실제는 범위를 더 크게 잡아 반복 실행
    start = datetime(2025, 7, 1, 0, 0)
    end   = datetime(2025, 7, 2, 0, 0)
    # window_td를 작게(예: timedelta(hours=1)) 하면 더 세밀히 쪼갭니다.
    summary = sliding_window_download(start, end, window_td=timedelta(hours=6))
    print("Done. windows:", summary)

=== Window #1: 2025-07-01T00:00:00  ~  2025-07-01T06:00:00 ===
  -> no transfers in window.
=== Window #2: 2025-07-01T06:00:00  ~  2025-07-01T12:00:00 ===
  -> no transfers in window.
=== Window #3: 2025-07-01T12:00:00  ~  2025-07-01T18:00:00 ===
  -> no transfers in window.
=== Window #4: 2025-07-01T18:00:00  ~  2025-07-02T00:00:00 ===
Request failed: HTTPSConnectionPool(host='apilist.tronscanapi.com', port=443): Max retries exceeded with url: /api/token_trc20/transfers?contract_address=TR7NHqjeKQxGTCi8q8ZY4pL8otSzgjLj6t&start_timestamp=1751360400000&end_timestamp=1751382000000&start=0&limit=200&confirm=true (Caused by ResponseError('too many 429 error responses'))


RetryError: HTTPSConnectionPool(host='apilist.tronscanapi.com', port=443): Max retries exceeded with url: /api/token_trc20/transfers?contract_address=TR7NHqjeKQxGTCi8q8ZY4pL8otSzgjLj6t&start_timestamp=1751360400000&end_timestamp=1751382000000&start=0&limit=200&confirm=true (Caused by ResponseError('too many 429 error responses'))

In [38]:
import requests
import pandas as pd
from datetime import datetime, timezone, timedelta

# -----------------------------
# 1. 시간 설정 (2025-07-01 00:00~01:00 KST)
# -----------------------------
kst = timezone(timedelta(hours=9))  # 한국 표준시
start_dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=kst)
end_dt   = datetime(2025, 1, 31, 0, 0, 0, tzinfo=kst)

# UTC 변환 (Tronscan API는 UTC 기준)
start_timestamp = int(start_dt.astimezone(timezone.utc).timestamp() * 1000)
end_timestamp   = int(end_dt.astimezone(timezone.utc).timestamp() * 1000)

print("조회 구간(UTC):", datetime.utcfromtimestamp(start_timestamp/1000), "~", datetime.utcfromtimestamp(end_timestamp/1000))

# -----------------------------
# 2. Tronscan API 요청
# -----------------------------
url = "https://apilist.tronscanapi.com/api/token_trc20/transfers"
params = {
    "contract_address": "TR7NHqjeKQxGTCi8q8ZY4pL8otSzgjLj6t",  # USDT (TRC20)
    "start_timestamp": start_timestamp,
    "end_timestamp": end_timestamp,
    "start": 0,
    "limit": 200,   # 한 번에 가져올 최대 개수
    "confirm": "true"
}

response = requests.get(url, params=params)
if response.status_code != 200:
    print("API 요청 실패:", response.status_code, response.text)
    exit()

data = response.json()

# -----------------------------
# 3. DataFrame으로 변환
# -----------------------------
records = data.get("token_transfers", [])
df = pd.DataFrame(records)

# 컬럼 정리
if not df.empty:
    df = df[["transaction_id", "block", "transferFromAddress", "transferToAddress", "amount_str", "block_ts"]]
    # timestamp → datetime 변환
    df["datetime"] = pd.to_datetime(df["block_ts"], unit="ms", utc=True).dt.tz_convert("Asia/Seoul")

# -----------------------------
# 4. CSV 저장
# -----------------------------
output_file = "usdt_trc20_2025-07-01_00-01.csv"
df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"총 {len(df)} 건 수집됨 → {output_file}")
print(df.head())


  print("조회 구간(UTC):", datetime.utcfromtimestamp(start_timestamp/1000), "~", datetime.utcfromtimestamp(end_timestamp/1000))


조회 구간(UTC): 2024-12-31 15:00:00 ~ 2025-01-30 15:00:00
총 0 건 수집됨 → usdt_trc20_2025-07-01_00-01.csv
Empty DataFrame
Columns: []
Index: []


In [44]:
start_timestamp = 1735647600000
end_timestamp = 1738239599000

url = "https://apilist.tronscanapi.com/api/token_trc20/transfers"
params = {
    "contract_address": "TR7NHqjeKQxGTCi8q8ZY4pL8otSzgjLj6t",  # USDT (TRC20)
    "start_timestamp": start_timestamp,
    "end_timestamp": end_timestamp,
    "start": 0,
    "limit": 200,   # 한 번에 가져올 최대 개수
    "confirm": "true"
}


response = requests.get(url, params=params)
data = response.json()
records = data.get("token_transfers", [])
df = pd.DataFrame(records)
df

In [43]:
records

[]

In [5]:
import requests
import pandas as pd
from datetime import datetime, timezone, timedelta
import time

# -----------------------------
# 1. 최근 1시간 UTC 타임스탬프
# -----------------------------
now_utc = datetime.now(timezone.utc)
one_hour_ago = now_utc - timedelta(hours=1)

start_timestamp = int(one_hour_ago.timestamp() * 1000)
end_timestamp   = int(now_utc.timestamp() * 1000)

print("조회 구간(UTC):", one_hour_ago, "~", now_utc)

# -----------------------------
# 2. API 정보
# -----------------------------
USDT_CONTRACT = "TR7NHqjeKQxGTCi8q8ZY4pL8otSzgjLj6t"
SAMPLE_ADDRESS = "TXYZ1234567890EXAMPLEADDR"  # 테스트용 예시 주소

url = "https://apilist.tronscanapi.com/api/token_trc20/transfers"

params = {
    "contract_address": USDT_CONTRACT,
    "address": SAMPLE_ADDRESS,  # 특정 지갑 기준
    "start_timestamp": start_timestamp,
    "end_timestamp": end_timestamp,
    "start": 0,
    "limit": 200,
    "confirm": "true"
}

# -----------------------------
# 3. API 호출
# -----------------------------
response = requests.get(url, params=params)
if response.status_code != 200:
    print("API 요청 실패:", response.status_code, response.text)
    exit()

data = response.json()
records = data.get("token_transfers", [])

# -----------------------------
# 4. DataFrame 변환
# -----------------------------
df = pd.DataFrame(records)

if not df.empty:
    df = df[["transaction_id", "block", "transferFromAddress", "transferToAddress", "amount_str", "block_ts"]]
    df["datetime"] = pd.to_datetime(df["block_ts"], unit="ms", utc=True).dt.tz_convert("Asia/Seoul")

# -----------------------------
# 5. CSV 저장
# -----------------------------
output_file = "usdt_trc20_recent_1h.csv"
df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"총 {len(df)} 건 수집됨 → {output_file}")
print(df.head())


조회 구간(UTC): 2025-08-18 03:49:11.633328+00:00 ~ 2025-08-18 04:49:11.633328+00:00
총 0 건 수집됨 → usdt_trc20_recent_1h.csv
Empty DataFrame
Columns: []
Index: []


In [12]:
import pandas as pd
import random
from datetime import datetime, timedelta

# -----------------------------
# 1. 가상 거래 데이터 생성
# -----------------------------
num_records = 100
base_time = datetime(2025, 8, 17, 0, 0, 0)

data = []
for i in range(num_records):
    tx_time = base_time + timedelta(minutes=random.randint(0, 60*24))  # 하루 안에서 랜덤 시간
    record = {
        "transaction_id": f"TX{i:05d}",
        "block": random.randint(1000000, 2000000),
        "transferFromAddress": f"TFrom{i:05d}ABC",
        "transferToAddress": f"TTo{i:05d}XYZ",
        "amount": round(random.uniform(1, 1000), 2),  # 1~1000 USDT
        "block_ts": int(tx_time.timestamp() * 1000),
        "datetime": tx_time.strftime("%Y-%m-%d %H:%M:%S")
    }
    data.append(record)

# -----------------------------
# 2. DataFrame으로 변환
# -----------------------------
df = pd.DataFrame(data)

# -----------------------------
# 3. CSV로 저장
# -----------------------------
output_file = "fake_usdt_trx_100.csv"
df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"총 {len(df)}건 생성됨 → {output_file}")
print(df.head())

총 100건 생성됨 → fake_usdt_trx_100.csv
  transaction_id    block transferFromAddress transferToAddress  amount  \
0        TX00000  1935816       TFrom00000ABC       TTo00000XYZ  446.88   
1        TX00001  1753188       TFrom00001ABC       TTo00001XYZ  987.90   
2        TX00002  1442580       TFrom00002ABC       TTo00002XYZ  799.58   
3        TX00003  1360531       TFrom00003ABC       TTo00003XYZ  803.36   
4        TX00004  1691689       TFrom00004ABC       TTo00004XYZ  147.44   

        block_ts             datetime  
0  1755405300000  2025-08-17 13:35:00  
1  1755411000000  2025-08-17 15:10:00  
2  1755359940000  2025-08-17 00:59:00  
3  1755430680000  2025-08-17 20:38:00  
4  1755402900000  2025-08-17 12:55:00  


In [14]:
df_raw = pd.read_csv("fake_usdt_trx_100.csv")
df = pd.DataFrame(df_raw)
df

Unnamed: 0,transaction_id,block,transferFromAddress,transferToAddress,amount,block_ts,datetime
0,TX00000,1935816,TFrom00000ABC,TTo00000XYZ,446.88,1755405300000,2025-08-17 13:35:00
1,TX00001,1753188,TFrom00001ABC,TTo00001XYZ,987.90,1755411000000,2025-08-17 15:10:00
2,TX00002,1442580,TFrom00002ABC,TTo00002XYZ,799.58,1755359940000,2025-08-17 00:59:00
3,TX00003,1360531,TFrom00003ABC,TTo00003XYZ,803.36,1755430680000,2025-08-17 20:38:00
4,TX00004,1691689,TFrom00004ABC,TTo00004XYZ,147.44,1755402900000,2025-08-17 12:55:00
...,...,...,...,...,...,...,...
95,TX00095,1353938,TFrom00095ABC,TTo00095XYZ,540.64,1755388620000,2025-08-17 08:57:00
96,TX00096,1948710,TFrom00096ABC,TTo00096XYZ,447.61,1755358740000,2025-08-17 00:39:00
97,TX00097,1428460,TFrom00097ABC,TTo00097XYZ,521.73,1755420960000,2025-08-17 17:56:00
98,TX00098,1611853,TFrom00098ABC,TTo00098XYZ,937.81,1755379440000,2025-08-17 06:24:00
