## AIOETHERSCAN

https://github.com/ape364/aioetherscan



API modules
Supports all API modules:

- Accounts
- Contracts
- Transactions
- Blocks
- Event logs
- GETH/Parity proxy
- Tokens
- Gas Tracker
- Stats   


In [1]:
!pip install -U aioetherscan

Collecting aioetherscan
  Downloading aioetherscan-0.9.4-py3-none-any.whl.metadata (670 bytes)
Collecting aiohttp-retry<3.0.0,>=2.8.3 (from aioetherscan)
  Downloading aiohttp_retry-2.8.3-py3-none-any.whl.metadata (8.9 kB)
Collecting asyncio_throttle<2.0.0,>=1.0.1 (from aioetherscan)
  Downloading asyncio_throttle-1.0.2-py3-none-any.whl.metadata (3.4 kB)
Downloading aioetherscan-0.9.4-py3-none-any.whl (20 kB)
Downloading aiohttp_retry-2.8.3-py3-none-any.whl (9.8 kB)
Downloading asyncio_throttle-1.0.2-py3-none-any.whl (4.1 kB)
Installing collected packages: asyncio_throttle, aiohttp-retry, aioetherscan
Successfully installed aioetherscan-0.9.4 aiohttp-retry-2.8.3 asyncio_throttle-1.0.2


In [1]:
import asyncio
import logging

from aiohttp_retry import ExponentialRetry
from asyncio_throttle import Throttler

from aioetherscan import Client

from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv('ETHERSCAN_API_KEY')
if not api_key:
    raise ValueError("ETHERSCAN_API_KEY environment variable is not set")


logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)


async def main():
    throttler = Throttler(rate_limit=4, period=1.0)
    retry_options = ExponentialRetry(attempts=2)

    c = Client(api_key, throttler=throttler, retry_options=retry_options)

    try:
        print(await c.stats.eth_price())
        print(await c.block.block_reward(123456))

        address = '0x9f8f72aa9304c8b593d555f12ef6589cc3a579a2'
        async for t in c.extra.generators.token_transfers(
                address=address,
                start_block=19921833,
                end_block=19960851
        ):
            print(t)
            print(c.extra.link.get_tx_link(t['hash']))

        print(c.extra.link.get_address_link(address))
    finally:
        await c.close()


if __name__ == '__main__':
    asyncio.run(main())


RuntimeError: This event loop is already running

In [1]:
import pandas as pd

In [2]:
data = {
    "id": [
        "0xf04403e162a8a988780de9a0416f8b87633e71435f03785f1c0c1291a6eb330b",
        "0x5fb4348e6ef84249f12ec5058e373e956d95913dcdecc036636cb2156763e25c",
        "0x1bf427e14853d650e332b6188962e1ea3b21c02492e2fb31b99a1fd3576c33d6",
        "0x737bb2b517312cd2166bacb41daf5c1a80e560d3f0c8589a40791c43ec7d6566",
        "0x94002f1eabc5b491f9f143bd761ff693362c25e2832fce8191bbc96f5825e97e",
        "0xe4ecd37edb93d22eef32f62dcb397f23f75b699a0c5391e81a81f5885020e07c",
        "0x0232c4dd9f4b88222ed95a25f6236d7dc25b9d3dccb9a596034d37ca26807e98",
        "0xc5061da6f7c89ab0fc20c912f058636a8c3f9d8b0456e15db30e4196d451cb60",
        "0x34484bf7d85bc584599957718112f0dc309a45208457a2be1aa82ec3b4387f37"
    ],
    "timestamp": [
        "Jul-30-2024 01:53:37 PM UTC",
        "Jul-30-2024 01:53:36 PM UTC",
        "Jul-30-2024 12:34:38 AM UTC",
        "Jul-30-2024 12:02:23 AM UTC",
        "Jul-29-2024 21:05:16 AM UTC",
        "Jul-29-2024 21:05:15 AM UTC",
        "Jul-29-2024 21:05:14 AM UTC",
        "Jul-29-2024 21:05:13 AM UTC",
        "Jul-30-2024 01:53:38 PM UTC"
    ],
    "from": [
        "0x000000000231c53e9dCbD5Ee410f065FBc170c29",
        "0x000000000231c53e9dCbD5Ee410f065FBc170c29",
        "0xdA07eFFA7aEe8CD85feb894e33Da9163f082D19e",
        "0x639D2a49bdD0CF37614F46a83e4AA49eAFF1e537",
        "0x00000000fc65059bDdA566b43E17c310B3076e33",
        "0x00000000fc65059bDdA566b43E17c310B3076e33",
        "0x00000000fc65059bDdA566b43E17c310B3076e33",
        "0x00000000fc65059bDdA566b43E17c310B3076e33",
        "0x000000000231c53e9dCbD5Ee410f065FBc170c29"
    ],
    "to": [
        "0x00000000041d945c46E073F0048cEf510D148dEA",
        "0x00000000041d945c46E073F0048cEf510D148dEA",
        "0x46B3fDF7b5CDe91Ac049936bF0bDb12c5d22202e",
        "0x4E3288c9ca110bCC82bf38F09A7b425c095d92Bf",
        "0x4E3288c9ca110bCC82bf38F09A7b425c095d92Bf",
        "0x4E3288c9ca110bCC82bf38F09A7b425c095d92Bf",
        "0x4E3288c9ca110bCC82bf38F09A7b425c095d92Bf",
        "0x4E3288c9ca110bCC82bf38F09A7b425c095d92Bf",
        "0x00000000041d945c46E073F0048cEf510D148dEA"
    ],
    "value": [
        "$0.12",
        "$0.02",
        "$2.40",
        "$5",
        "$3.50",
        "$3.50",
        "$3.50",
        "$3.50",
        "$2,110"
    ],
    "method": [
        "buy",
        "buy",
        "buy",
        "transfer",
        "swap",
        "swap",
        "swap",
        "swap",
        "printMoney"
    ],
    "tokenPrice": [
        "$2.38",
        "$2.39",
        "$2.40",
        "$2.39",
        "$2.26",
        "$2.26",
        "$2.22",
        "$2.20",
        "$2.21"
    ],
    "liquidity": [
        "$1,153,212.96",
        "$1,153,234.56",
        "$1,153,266.32",
        "$1,153,265.16",
        "$1,153,344.33",
        "$1,153,342.55",
        "$1,153,312.90",
        "$1,153,319.12",
        "$1,153,278.06"
    ],
    "marketCap": [
        "$3,234,533.45",
        "$3,235,002.04",
        "$3,234,113.31",
        "$3,234,323.80",
        "$3,235,421.46",
        "$3,235,532.40",
        "$3,236,754.55",
        "$3,236,751.09",
        "$3,235,980.11"
    ],
    "largeTransaction": [
        "FALSE",
        "FALSE",
        "FALSE",
        "FALSE",
        "FALSE",
        "FALSE",
        "FALSE",
        "FALSE",
        "TRUE"
    ],
    "rapidTransaction": [
        "TRUE",
        "FALSE",
        "FALSE",
        "FALSE",
        "TRUE",
        "TRUE",
        "FALSE",
        "FALSE",
        "TRUE"
    ],
    "fraudTransaction": [
        "FALSE",
        "FALSE",
        "FALSE",
        "FALSE",
        "TRUE",
        "FALSE",
        "FALSE",
        "FALSE",
        "TRUE"
    ]
}

df = pd.DataFrame(data)
print(df)


                                                  id  \
0  0xf04403e162a8a988780de9a0416f8b87633e71435f03...   
1  0x5fb4348e6ef84249f12ec5058e373e956d95913dcdec...   
2  0x1bf427e14853d650e332b6188962e1ea3b21c02492e2...   
3  0x737bb2b517312cd2166bacb41daf5c1a80e560d3f0c8...   
4  0x94002f1eabc5b491f9f143bd761ff693362c25e2832f...   
5  0xe4ecd37edb93d22eef32f62dcb397f23f75b699a0c53...   
6  0x0232c4dd9f4b88222ed95a25f6236d7dc25b9d3dccb9...   
7  0xc5061da6f7c89ab0fc20c912f058636a8c3f9d8b0456...   
8  0x34484bf7d85bc584599957718112f0dc309a45208457...   

                     timestamp                                        from  \
0  Jul-30-2024 01:53:37 PM UTC  0x000000000231c53e9dCbD5Ee410f065FBc170c29   
1  Jul-30-2024 01:53:36 PM UTC  0x000000000231c53e9dCbD5Ee410f065FBc170c29   
2  Jul-30-2024 12:34:38 AM UTC  0xdA07eFFA7aEe8CD85feb894e33Da9163f082D19e   
3  Jul-30-2024 12:02:23 AM UTC  0x639D2a49bdD0CF37614F46a83e4AA49eAFF1e537   
4  Jul-29-2024 21:05:16 AM UTC  0x00000000fc65059

In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

import json
import re
import time

# Set up the Chrome WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

all_transactions = []

# Set the number of pages to crawl
num_pages = 2

# Crawl the specified number of pages
for page_num in range(1, num_pages + 1):
    retries = 2
    while retries > 0:
        try:
            # Open the URL
            driver.get(f"https://etherscan.io/txs?p={page_num}")

            # Wait for the page to load completely
            driver.implicitly_wait(10)

            # Get the page content
            page_content = driver.page_source

            # Extract the quickExportTransactionListData from the page_content
            match = re.search(r"const quickExportTransactionListData = '(\[.*?\])';", page_content)
            if match:
                quick_export_data = match.group(1)
                # Convert the JSON string to a Python dictionary
                transactions = json.loads(quick_export_data)
                all_transactions.extend(transactions)
            else:
                print(f"quickExportTransactionListData not found on page {page_num}")
            break
        except Exception as e:
            print(f"Error on page {page_num}: {e}")
            retries -= 1
            if retries == 0:
                print(f"Failed to retrieve data from page {page_num} after 2 retries")
        time.sleep(1)

# Print all transactions
print(all_transactions)


import pandas as pd

# Convert the list of transactions to a DataFrame
df = pd.DataFrame(all_transactions)

# Rename the columns of the DataFrame
df.rename(columns={
    'Txhash': 'hash',
    'Datetime': 'block_timestamp',
    'Sender': 'from_address',
    'Receiver': 'to_address',
    'Amount': 'value',
    'Method': 'method',
    'Blockno': 'block_number',
    'TxnFee': 'gas_price'
}, inplace=True)


# Save the DataFrame to a CSV file
df.to_csv('transactions.csv', index=False)

# Save the DataFrame to a JSON file
df.to_json('transactions.json', orient='records')


print("Transactions have been saved to transactions.csv")


# Close the WebDriver
driver.quit()


KeyboardInterrupt: 

In [None]:
!pip install webdriver-manager

Collecting webdriver-manager
  Using cached webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Using cached webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver-manager
Successfully installed webdriver-manager-4.0.2


## Load Data

In [19]:
from pymongo import MongoClient
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()

MONGO_DETAILS = os.getenv("MONGO_DB_URI", "mongodb://localhost:27017/")
client = MongoClient(MONGO_DETAILS)
database = client.transactions_db
transaction_collection = database.get_collection("transactions")

# Read the data from the MongoDB collection
transactions = list(transaction_collection.find())

# Convert the data to a DataFrame
df = pd.DataFrame(transactions)

## Rapid Transaction by grouping the transaction

In [20]:
import pandas as pd
TIMETHRESHOLD = 60

def detect_rapid_transactions(df, time_threshold):
    # Step 1: Convert 'timestamp' column to datetime if not already
    if not pd.api.types.is_datetime64_any_dtype(df['block_timestamp']):
        df['block_timestamp'] = pd.to_datetime(df['block_timestamp'])

    # Step 2: Sort DataFrame by 'block_timestamp'
    df = df.sort_values(by='block_timestamp')

    # group the df by the from_address
    grouped_df = df.groupby('from_address')

    for name, group in grouped_df:
        # print(group)
        rapid_transactions = []

        for i in range(len(group) - 1):
            current_tx = group.iloc[i]
            next_tx = group.iloc[i + 1]

            # Check if within time threshold
            if (next_tx['block_timestamp'] - current_tx['block_timestamp']).total_seconds() <= time_threshold:
                # rapid_transactions.append(current_tx)
                rapid_transactions.append(next_tx)

        # Mark the transactions as rapid in the original DataFrame
        for tx in rapid_transactions:
            df.loc[df['_id'] == tx['_id'], 'rapid_transaction'] = True

    # Mark the transactions that are not rapid as False
    df['rapid_transaction'] = df['rapid_transaction'].fillna(False)

    return df
     

df['block_timestamp'] = pd.to_datetime(df['block_timestamp'])

result = detect_rapid_transactions(df, time_threshold=TIMETHRESHOLD)

# result.to_json('../backend/data/rapid_transactions.json',orient='records', lines=False)

  df['rapid_transaction'] = df['rapid_transaction'].fillna(False)


In [21]:
AMOUNTTHRESHOLD = 1000
def detect_large_amount_transactions(df, amount_threshold):
    # Step 1: Convert 'value' column to numeric if not already
    if not pd.api.types.is_numeric_dtype(df['value']):
        df['value'] = pd.to_numeric(df['value'])

    # Step 2: Mark transactions with value greater than the threshold
    df['large_transaction'] = df['value'] >= amount_threshold

    return df



result = detect_large_amount_transactions(result, amount_threshold=AMOUNTTHRESHOLD)

result.to_json('../backend/data/rapid_transactions.json', orient='records', lines=False)


In [22]:
import pandas as pd 


address = pd.read_csv("/Users/hunglv/Downloads/address_data_k.csv")
address

Unnamed: 0.1,Unnamed: 0,Index,Address,FLAG,Avg min between sent tnx,Avg min between received tnx,Time Diff between first and last (Mins),Sent tnx,Received Tnx,Number of Created Contracts,...,ERC20 min val sent,ERC20 max val sent,ERC20 avg val sent,ERC20 min val sent contract,ERC20 max val sent contract,ERC20 avg val sent contract,ERC20 uniq sent token name,ERC20 uniq rec token name,ERC20 most sent token type,ERC20_most_rec_token_type
0,0,1,0x00009277775ac7d0d59eaad8fee3d10ac6c805e8,0,844.26,1093.71,704785.63,721,89,0,...,0.000000,1.683100e+07,271779.920000,0.0,0.0,0.0,39.0,57.0,Cofoundit,Numeraire
1,1,2,0x0002b44ddb1476db43c868bd494422ee4c136fed,0,12709.07,2958.44,1218216.73,94,8,0,...,2.260809,2.260809e+00,2.260809,0.0,0.0,0.0,1.0,7.0,Livepeer Token,Livepeer Token
2,2,3,0x0002bda54cb772d040f779e88eb453cac0daa244,0,246194.54,2434.02,516729.30,2,10,0,...,0.000000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,8.0,,XENON
3,3,4,0x00038e6ba2fd5c09aedb96697c8d7b8fa6632e5e,0,10219.60,15785.09,397555.90,25,9,0,...,100.000000,9.029231e+03,3804.076893,0.0,0.0,0.0,1.0,11.0,Raiden,XENON
4,4,5,0x00062d1dd1afb6fb02540ddad9cdebfe568e0d89,0,36.61,10707.77,382472.42,4598,20,1,...,0.000000,4.500000e+04,13726.659220,0.0,0.0,0.0,6.0,27.0,StatusNetwork,EOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9836,9836,2175,0xff481ca14e6c16b79fc8ab299b4d2387ec8ecdd2,1,12635.10,631.39,58748.48,4,13,0,...,0.000000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,2.0,,GSENetwork
9837,9837,2176,0xff718805bb9199ebf024ab6acd333e603ad77c85,1,0.00,0.00,0.00,0,0,0,...,0.000000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,1.0,,Blockwell say NOTSAFU
9838,9838,2177,0xff8e6af02d41a576a0c82f7835535193e1a6bccc,1,2499.44,2189.29,261601.88,67,43,0,...,0.000000,0.000000e+00,0.000000,0.0,0.0,0.0,0.0,5.0,,Free BOB Tokens - BobsRepair.com
9839,9839,2178,0xffde23396d57e10abf58bd929bb1e856c7718218,1,0.00,0.00,0.00,0,1,0,...,,,,,,,,,,


In [23]:
address.columns

Index(['Unnamed: 0', 'Index', 'Address', 'FLAG', 'Avg min between sent tnx',
       'Avg min between received tnx',
       'Time Diff between first and last (Mins)', 'Sent tnx', 'Received Tnx',
       'Number of Created Contracts', 'Unique Received From Addresses',
       'Unique Sent To Addresses', 'min value received', 'max value received ',
       'avg val received', 'min val sent', 'max val sent', 'avg val sent',
       'min value sent to contract', 'max val sent to contract',
       'avg value sent to contract',
       'total transactions (including tnx to create contract',
       'total Ether sent', 'total ether received',
       'total ether sent contracts', 'total ether balance',
       ' Total ERC20 tnxs', ' ERC20 total Ether received',
       ' ERC20 total ether sent', ' ERC20 total Ether sent contract',
       ' ERC20 uniq sent addr', ' ERC20 uniq rec addr',
       ' ERC20 uniq sent addr.1', ' ERC20 uniq rec contract addr',
       ' ERC20 avg time between sent tnx', ' ERC20 