# Feature Engineering: Account-Based Dataset Creation

### Import Required Libraries

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#Etherscan API Python Library: https://github.com/pcko1/etherscan-python/tree/master/logs/standard
from etherscan import Etherscan
#Import Custom Functions Created to Help Engineer Features
import custom_functions as cf
import pandas as pd

In [None]:
#API Key
eth = Etherscan('API KEY')

### View Etherscan API Output Format 

In [None]:
#Define Block Range
start_block = 1
end_block = 18000000

In [None]:
%%time
#Get Normal Transactions by Account
#Use .lower() method on accounts, Etherscan explorer supports Checksum addresses but the API doesn't return Checksummed addresses 
txs = eth.get_normal_txs_by_address('0x0eD78CCA6cdfa68e08b85C463F29Dd8741Ff06ad'.lower(), start_block, end_block, 'asc')

In [None]:
#Get ER20 Token Transactions by Account
erc20_txs = eth.get_erc20_token_transfer_events_by_address('0x002Bf459dC58584D58886169EA0E80f3Ca95FFAF'.lower(), start_block, end_block, 'asc')

# Feature Engineering On a Single Address

In [None]:
#Define List of Output Features
feature_list = ['Sent_tnx',
                'Received_tnx',
                'Total_Ether_Balance',
                'Max_Val_Received',
                'Min_Val_Received',
                'Total_Ether_Received',
                'Time_Diff_between_first_and_last_(Mins)',
                'Total_Transactions_(Including_Tnx_to_Create_Contract)',
                'Avg_Val_Received',
                'Max_Val_Sent',
                'Min_Val_Sent',
                'Total_Ether_Sent',
                'Avg_Val_Sent',
                'Avg_min_between_sent_tnx',
                'Avg_min_between_received_tnx',
                'ERC20_Total_Ether_Received',
                'ERC20_Total_Ether_Sent',
                'ERC20_Min_Val_Rec',
                'ERC20_Max_Val_Rec',
                'ERC20_Min_Val_Sent',
                'ERC20_Max_Val_Sent',
                'Total_ERC20_Tnxs',
                'ERC20_Avg_Time_Between_Sent_Tnx',
                'ERC20_Avg_Time_Between_Rec_Tnx',
                'Unique_Received_From_Addresses',
                'Unique_Sent_To_Addresses',
                'ERC20_Uniq_Sent_Addr',
                'ERC20_Uniq_Rec_Addr',
                'ERC20_Avg_Val_Sent',
                'ERC20_Avg_Val_Rec']

In [None]:
%%time
#Automate Feature Generation
address = '0x0002b44ddb1476db43c868bd494422ee4c136fed'
df1 = cf.automate_feature_generation_single(address.lower(), feature_list, start_block, end_block)

#Display Output in Readable Manner
pd.options.display.float_format = '{:.3f}'.format
pd.set_option('display.max_columns', None)

df1

# Feature Engineering on Multiple Addresses

In [None]:
%%time
#Automate Feature Generation
addresses = ['0x0002b44ddb1476db43c868bd494422ee4c136fed','0x1d6065940bfd6ac7c4ec1e2a35e1420e0a9c1bff','0x1cdcec6630c99863fb1307c812691bc688f3e92e']
addresses_lower = [i.lower() for i in addresses]

df2 = cf.automate_feature_generation_multiple(addresses_lower, feature_list, start_block, end_block)
df2

# Batch Feature Engineering: Large Amount of Addresses

In [None]:
#Create Batches to Query Etherscan
df_list = cf.batch_creator(random_sample_accounts,200)

In [None]:
%%time
#Query Etherscan Batch by Batch
result_dataset, missed_addresses = cf.batch_feature_engineering(df_list, feature_list, start_block, end_block)
result_dataset
missed_addresses