In [1]:
import pandas as pd
import numpy as np 
from etherscan import Etherscan
import requests
import json
import time
from dotenv import load_dotenv
import os
from multiprocessing import Pool
import logging
from twilio.rest import Client
from datetime import datetime

In [17]:
import psutil

# Function to monitor memory usage
def monitor_memory_usage():
    # Get memory usage statistics
    mem = psutil.virtual_memory()
    
    # Print memory usage information
    print(f"Total Memory: {mem.total / (1024 ** 3):.2f} GB")
    print(f"Available Memory: {mem.available / (1024 ** 3):.2f} GB")
    print(f"Used Memory: {mem.used / (1024 ** 3):.2f} GB")
    print(f"Memory Usage Percentage: {mem.percent:.2f}%")
# Example usage
monitor_memory_usage()

def memory_usage_pass():
    mem = psutil.virtual_memory()
    if mem.percent > 90:
        return True
    
    return True 

print(memory_usage_pass())

Total Memory: 3.49 GB
Available Memory: 1.41 GB
Used Memory: 1.93 GB
Memory Usage Percentage: 59.70%
True


## Load Enviornment Variables

In [15]:
# Load environment variables from .env
load_dotenv()

# Access the API_KEY variable
ETH_API_KEY = os.getenv('ETH_API_KEY')

## Set up Twilio for SMS

In [16]:
# Your Twilio Account SID and Auth Token
account_sid = os.getenv('ACC_SID')
auth_token = os.getenv('AUTH_TOKEN')

# Your Twilio phone number and your own phone number
from_number = os.getenv('FROM_NUM')
to_number = os.getenv('TO_NUM')

tw_client = Client(account_sid, auth_token)

def send_sms(message):
    message = tw_client.messages.create(
        body=message,
        from_=from_number,
        to=to_number
    )

    print("SMS sent successfully!")

## Set up Logging for Error/ Warning

In [18]:
logging.basicConfig(level=logging.INFO,  # Set logging level to INFO
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    filename='transaction_logs.log',  # Log file name
                    filemode='a')  # Append mode

# Create a console handler and set the level to INFO
console = logging.StreamHandler()
console.setLevel(logging.INFO)

## Fetch Illicit & Reputable Addresses

In [19]:
def split_array_to_rows(value):
        cleaned_values = []
        if isinstance(value, list):
            for val in value:
                val = val.strip("[]").replace("'", "")  # Remove square brackets and single quotes
                if ',' in val:
                    val_list = val.split(',')
                    cleaned_values.extend(val_list)
                else:
                    cleaned_values.append(val)
        elif isinstance(value, str):
            val = value.strip("[]").replace("'", "")  # Remove square brackets and single quotes
            if ',' in val:
                val_list = val.split(',')
                cleaned_values.extend(val_list)
            else:
                cleaned_values.append(val)
        return cleaned_values

In [20]:
def get_reputable_sc_addresses(blockchain_network):
    csv_file = './fetch_reputable_accounts/ethereum_smart_contracts.csv'
    df = pd.read_csv(csv_file)
    return df[blockchain_network].dropna().reset_index(drop=True)

def get_illicit_sc_addresses(blockchain_network):
    csv_file = './fetch_illicit_accounts/illicit_smart_contract_addresses.csv'
    df = pd.read_csv(csv_file)
    addresses = df[blockchain_network].apply(split_array_to_rows).explode(blockchain_network)
    return addresses.dropna().reset_index(drop=True)

In [23]:
addr_rep_df = get_reputable_sc_addresses('ethereum')
addr_ill_df = get_illicit_sc_addresses('addresses.ETH')

In [8]:
def normal_transactions(address, blockchain_api_name):
    URL = f"https://api.{blockchain_api_name}.io/api?module=account&action=txlist&address={address}" \
          f"&startblock=0&endblock=99999999&page=1&offset=10000&sort=asc&apikey={ETH_API_KEY}"
    
    r = requests.get(url=URL)
    data = r.json()
    df = pd.DataFrame(data["result"]).drop(columns=['nonce', 'blockHash',
                                                      'transactionIndex', 'txreceipt_status', 'input', 
                                                      'cumulativeGasUsed', 'gasUsed', 'confirmations', 
                                                      'functionName', 'methodId'])
    return df

In [183]:
normal_transactions('0x8aF1380c436fC019b8055C61d09e3Ee5F182278C', 'etherscan')

{'status': '0', 'message': 'No transactions found', 'result': []}

## Set up API for Fetching Transactions

In [22]:
def fetch_transactions(address, blockchain_api_name, api_key):
    url = build_url(address, blockchain_api_name, api_key)
    try:
        r = send_request(url)
        if r is None:
            return pd.DataFrame()  # Return an empty DataFrame if request failed
        data = parse_response(r)
        if data is None:
            return pd.DataFrame()  # Return an empty DataFrame if response parsing failed
        return process_transactions(data)
    except Exception as e:
        logging.error(f"Error fetching transactions for address {address}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error

def build_url(address, blockchain_api_name, api_key):
    return f"https://api.{blockchain_api_name}.io/api?module=account&action=txlist&address={address}" \
           f"&startblock=0&endblock=99999999&page=1&offset=10000&sort=asc&apikey={api_key}"


def send_request(url):
    try:
        r = requests.get(url)
        r.raise_for_status()  # Raise an error for HTTP errors
        if r.status_code == 0:
            logging.error("Rate limit exceeded. Sleeping for 1 second.")
            time.sleep(1)
            r = requests.get(url)
            r.raise_for_status()  # Raise an error for HTTP errors after retrying
        return r
    except requests.HTTPError as e:
        logging.error(f"HTTP error: {e}")
        return None
    except Exception as e:
        logging.error(f"Error sending request: {e}")
        return None

def parse_response(response):
    try:
        data = response.json()
        if data.get('status') == '0':
            logging.warning("No transactions found.")
            return None
        return data
    except Exception as e:
        logging.error(f"Error parsing response: {e}")
        return None

def process_transactions(data):
    try:
        transactions = pd.DataFrame(data.get("result", []))
        transactions.drop(columns=['nonce', 'blockHash', 'transactionIndex', 'txreceipt_status', 'input', 
                                   'cumulativeGasUsed', 'gasUsed', 'confirmations', 'functionName', 'methodId'], 
                          inplace=True)
        return transactions
    except Exception as e:
        logging.error(f"Error processing transactions: {e}")
        return pd.DataFrame()

In [10]:
def fetch_transactions_parallel(addresses, blockchain_api_name, api_key, num_processes=7):
    # Use multiprocessing Pool to fetch transactions in parallel
    with Pool(processes=num_processes) as pool:
        # Use pool.starmap to pass multiple arguments to the function
        results = pool.starmap(fetch_transactions, [(addr, blockchain_api_name, api_key) for addr in addresses])
    
    # Concatenate results into a single DataFrame
    final_df = pd.concat(results, ignore_index=True)
    return final_df

## Fetch Illicit Transactions

In [216]:
final_ill_transactions_df = fetch_transactions_parallel(addr_ill_df, 'etherscan', ETH_API_KEY)
final_ill_transactions_df['FLAG'] = 0
final_ill_transactions_df.to_csv('ill_transactions.csv', index=False)

# Get the current timestamp
current_time = datetime.now()

# Print the message with the current timestamp
send_sms(f"Finished Fetching Illicit transactions at {current_time}")

SMS sent successfully!


In [217]:
final_ill_transactions_df

Unnamed: 0,blockNumber,timeStamp,hash,from,to,value,gas,gasPrice,isError,contractAddress,FLAG
0,4076477,1501076489,0x0b8f0ad2d8f85ce70b46a1498b52b56621fc62101ff7...,0xfdd3bfe92735e109726d397007464624a39334d7,0xd0cc2b24980cbcca47ef755da88b220a82291407,11493999000000000000,21000,21000000000,0,,0
1,4076495,1501076866,0x08fef225ec0eb7b4da6372935327256796227bec832e...,0xd0cc2b24980cbcca47ef755da88b220a82291407,0xd8711dba2db856eebd7f01db3d877e4c53d6dee2,100000000000000000,21000,21000000000,0,,0
2,4076502,1501077056,0xea426df711fc8ddebc1236449a4091baee58f2171985...,0x7e8def9b77417f60a5a6a420573f4580e14e17b1,0xd0cc2b24980cbcca47ef755da88b220a82291407,296901974000000000,21000,21000000000,0,,0
3,4076528,1501077776,0x757aba719f8d5ded4ff9d67f3ff7b0350ecda694971c...,0x984a01719fceeedd10c43afca9d4220508e54f1a,0xd0cc2b24980cbcca47ef755da88b220a82291407,361313983000011260,21000,21000000000,0,,0
4,4076652,1501080107,0x0091dcfbd215e6a4a126f9139ac204e522e62164319e...,0xd0cc2b24980cbcca47ef755da88b220a82291407,0x86849f631edd436794ec35982f4b5459056190d3,100000000000000000,21000,21000000000,0,,0
...,...,...,...,...,...,...,...,...,...,...,...
102774,18813209,1702906547,0x2643d4a32a746201e990596f16ca6c1e3d28ad9ff437...,0xb6af46be91b1ba4043c99f968c18871d3a763059,0x054d64b73d3d8a21af3d764efd76bcaa774f3bb2,0,42532,77822873118,0,,0
102775,18814490,1702922111,0x986d9759c170091f13220d4fd78f474cbef481bc36c4...,0xb6af46be91b1ba4043c99f968c18871d3a763059,0x4740735aa98dc8aa232bd049f8f0210458e7fca3,0,71478,87218808399,0,,0
102776,18986061,1705004951,0x0b723dfbd398799d94cd2d597b8da697e0d5514d0fd1...,0xb6af46be91b1ba4043c99f968c18871d3a763059,0xdac17f958d2ee523a2206206994597c13d831ec7,0,65606,29910880403,0,,0
102777,19064717,1705956335,0x205f5055d0eeab84d568dc1b53234760b6b6d175f884...,0xb6af46be91b1ba4043c99f968c18871d3a763059,0xf411903cbc70a74d22900a5de66a2dda66507255,0,82237,17490068736,0,,0


## Fetch Reputable Transactions

### Splitting the dataset into batches

In [19]:
# Function to split DataFrame into parts
def split_dataframe_into_parts(df):
    num_rows = len(df)
    part_size = num_rows // 10
    return [df.iloc[i*part_size:(i+1)*part_size] for i in range(10)]

# Main function to process DataFrame in batches
def process_dataframe_in_batches(blockchain_api_name, api_key):
    address_batches = split_dataframe_into_parts(addr_rep_df)
    for i, df_batch in enumerate(address_batches):
        transactions_batch_df = fetch_transactions_parallel(df_batch, blockchain_api_name, api_key)
        transactions_batch_df['FLAG'] = 1 
        transactions_batch_df.to_csv(f'rep_transactions_{i+1}.csv', index=False)

        current_time = datetime.now()
        print(f'Batch {i+1} processed. Transactions saved to transactions_batch_{i+1}.csv')
        send_sms(f'Batch {i+1}/4 processed at {current_time}')

### Executing -- Fetching in Batches

In [24]:
process_dataframe_in_batches('etherscan', ETH_API_KEY)
# Get the current timestamp
current_time = datetime.now()

# Send sms indicating finished fetching transactions
send_sms(f"Finished Fetching Reputable transactions at {current_time}")

: 