In [1]:
# ==============================================================================
# CELL 1
# ==============================================================================
# --- 1. SETUP & FUNCTION DEFINITIONS ---

# --- THÊM MỚI: Thiết lập đường dẫn để import module từ thư mục src ---
import sys
import os
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.append(project_root)

# Import libraries
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime, timezone
import collections
import joblib
import warnings

# --- SỬA ĐỔI: Import các hàm cần thiết từ utils.py ---
from src.utils import create_feature_dataframe, cyclical_encoder

# Suppress warnings for a clean output
warnings.filterwarnings('ignore', category=UserWarning)

# --- XÓA BỎ: Các hàm get_all_transactions, get_wallet_resources, create_wallet_data đã được chuyển vào utils.py ---
# Việc này đảm bảo code được nhất quán và dễ bảo trì.

print("✅ All necessary functions are defined and ready.")

✅ All necessary functions are defined and ready.


In [2]:

# ==============================================================================
# CELL 2
# ==============================================================================
# --- 2. LOAD THE PRE-TRAINED AI PIPELINE ---

# SỬA ĐỔI: Dùng đường dẫn tương đối
PIPELINE_PATH = '../models/aptos_pro_pipeline.joblib'

try:
    loaded_pipeline = joblib.load(PIPELINE_PATH)
    print("✅ AI Pipeline loaded successfully.")
    print("   You can now proceed to Cell 3 to predict any wallet.")
except FileNotFoundError:
    print(f"❌ ERROR: Pipeline file not found at '{PIPELINE_PATH}'.")
    print("   Please run the 'train_aptos.ipynb' notebook first to create the pipeline file.")
    loaded_pipeline = None

✅ AI Pipeline loaded successfully.
   You can now proceed to Cell 3 to predict any wallet.


In [3]:
# ==============================================================================
# CELL 3
# ==============================================================================
# --- 3. LIVE WALLET PREDICTOR & LOGGER ---

# ==================================================================
# ==> INPUT THE WALLET ADDRESS YOU WANT TO ANALYZE HERE <==
# ==================================================================
# SỬA ĐỔI: Xóa ký tự thừa ở cuối địa chỉ ví
wallet_to_predict = "0x983fe110ab9a83bd43da49260d064017af8527826957e7ebd057ba50a7b2458e"
# ==================================================================
# ==> DEFINE THE LOG FILE PATH <==
# ==================================================================
# SỬA ĐỔI: Dùng đường dẫn tương đối và trỏ vào thư mục logs
LOG_CSV_PATH = '../logs/prediction_log.csv'
# ==================================================================


if 'loaded_pipeline' in locals() and loaded_pipeline is not None:
    print(f"--- 1. Fetching live data for wallet: {wallet_to_predict[:15]}... ---\n")
    session = requests.Session()

    # --- SỬA ĐỔI: Sử dụng hàm create_feature_dataframe từ utils.py ---
    # Hàm này đã được sửa để xử lý ví không có giao dịch, tránh lỗi KeyError
    features_df = create_feature_dataframe(session, wallet_to_predict)

    print("\n--- 2. Running prediction ---")
    prediction = loaded_pipeline.predict(features_df)
    prediction_proba = loaded_pipeline.predict_proba(features_df)

    # --- 3. Display simple result on screen ---
    result_label = "SYBIL (Suspicious)" if prediction[0] == 1 else "NORMAL (Likely a regular user)"
    print("\n" + "="*30)
    print("--- PREDICTION RESULT ---")
    print(f"  Wallet: {wallet_to_predict}")
    print(f"  Result: {result_label}")
    print(f"  Sybil Probability: {prediction_proba[0][1]:.2%}")
    print("="*30)

    # --- 4. Prepare and save detailed data to log file ---
    # Tạo một bản sao của dataframe để ghi log
    log_df = features_df.copy()
    log_df['label'] = prediction[0]
    log_df['wallet_address'] = wallet_to_predict
    
    # Lấy ngày giao dịch đầu tiên từ timestamp (nếu có)
    if 'first_transaction_date' not in log_df.columns and 'wallet_age_days' in log_df.columns:
        # Ước tính lại ngày tạo nếu không có trong dữ liệu trả về
        from datetime import timedelta
        if log_df['wallet_age_days'].iloc[0] > 0:
            creation_dt = datetime.now(timezone.utc) - timedelta(days=int(log_df['wallet_age_days'].iloc[0]))
            log_df['first_transaction_date'] = creation_dt.strftime('%Y-%m-%d %H:%M:%S UTC')
        else:
            log_df['first_transaction_date'] = None
    
    # Lấy weekday từ day_of_week
    if 'most_active_weekday' not in log_df.columns and 'tx_day_of_week' in log_df.columns:
        days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
        day_index = int(log_df['tx_day_of_week'].iloc[0])
        log_df['most_active_weekday'] = days[day_index] if day_index != -1 else "N/A"


    columns_to_log = [
        'wallet_address', 'label', 'first_transaction_date', 'wallet_age_days',
        'apt_balance', 'other_token_count', 'total_transaction_count',
        'successful_transaction_count', 'failed_transaction_count',
        'unique_interacted_contracts', 'unique_interacted_addresses',
        'avg_time_between_tx_seconds', 'std_dev_time_between_tx_seconds',
        'most_active_hour', 'most_active_weekday', 'is_self_funded'
    ]
    
    # Đảm bảo tất cả các cột đều tồn tại trước khi ghi
    for col in columns_to_log:
        if col not in log_df.columns:
            log_df[col] = None # Thêm cột nếu thiếu

    try:
        file_exists = os.path.isfile(LOG_CSV_PATH)
        log_df[columns_to_log].to_csv(LOG_CSV_PATH, mode='a', header=not file_exists, index=False)
        print(f"\n✅ Successfully logged detailed features to: {LOG_CSV_PATH}")
    except Exception as e:
        print(f"\n❌ ERROR: Could not write to log file. Details: {e}")

else:
    print("\n❌ Cannot proceed. Please run Cell 2 to load the AI pipeline first.")

--- 1. Fetching live data for wallet: 0x983fe110ab9a8... ---

  - Fetching transactions and resources for 0x983fe110...
  - Successfully created feature profile for 0x983fe110...

--- 2. Running prediction ---

--- PREDICTION RESULT ---
  Wallet: 0x983fe110ab9a83bd43da49260d064017af8527826957e7ebd057ba50a7b2458e
  Result: SYBIL (Suspicious)
  Sybil Probability: 95.30%

✅ Successfully logged detailed features to: ../logs/prediction_log.csv
