# Most Relevant Features

## Modules and functions

Import all the modules:

In [1]:
import pandas as pd

from notebooks import *

Define some functions that will be used afterwards **(you should skip this part for now)**:

In [2]:
def distribution_table(target, column):
    """
    Creates a dataframe with the distribution of a column separated by honeypot and non-honeypots.
    
    Arguments:
    target -- the pandas dataframe to calculate the distribution
    column -- the column name to calculate the distribution
    """
    desc_positive = target[target.contract_is_honeypot][column].describe().apply("{:.2f}".format)
    desc_negative = target[~target.contract_is_honeypot][column].describe().apply("{:.2f}".format)
    return pd.DataFrame({"Yes": desc_positive, "No": desc_negative})

## Loading

In [3]:
honey_badger_labels = load_dictionary("honey_badger_labels.pickle")
fund_flow_cases = load_dictionary("fund_flow_cases.pickle")

In [4]:
df_file_path = "dataset-filtered.csv"
df = pd.read_csv(df_file_path, low_memory=False)

In [5]:
print_dimensions(df)

The dataset has 158863 rows and 434 columns


## Number of source code lines

The number of source code lines seems to have a lower upper bound for honeypots:

In [6]:
distribution_table(df, "contract_num_source_code_lines")

Unnamed: 0,Yes,No
count,295.0,158568.0
mean,57.0,279.7
std,22.8,278.42
min,19.0,1.0
25%,41.0,109.0
50%,54.0,177.0
75%,67.0,386.0
max,185.0,11409.0


Cross handcrafted rule with binary label:

In [7]:
pd.crosstab((15 < df.contract_num_source_code_lines) & (df.contract_num_source_code_lines < 200),
            df.contract_is_honeypot,
            rownames=["15 < contract_num_source_code_lines < 200"])

contract_is_honeypot,False,True
15 < contract_num_source_code_lines < 200,Unnamed: 1_level_1,Unnamed: 2_level_1
False,77641,0
True,80927,295


Extreme cases:

In [8]:
df.loc[
    df.contract_num_source_code_lines > 10000,
    ["contract_address", "contract_num_source_code_lines"]
].sort_values("contract_num_source_code_lines", ascending=False)

Unnamed: 0,contract_address,contract_num_source_code_lines
74269,0xcc6a0dd80ff3fa8c1b37011765ee19efc480f825,11409
85474,0x5f2539a5dc504493ba9ae1207803c08634cbb091,11409


## Number of normal transactions

Distribution:

In [9]:
distribution_table(df, "normal_transaction_count")

Unnamed: 0,Yes,No
count,295.0,158568.0
mean,5.13,680.61
std,4.26,33198.52
min,1.0,1.0
25%,3.0,1.0
50%,4.0,2.0
75%,6.0,4.0
max,32.0,10412943.0


Cross handcrafted rule with binary label:

In [10]:
pd.crosstab(df.normal_transaction_count < 40,
            df.contract_is_honeypot,
            rownames=["normal_transaction_count < 40"])

contract_is_honeypot,False,True
normal_transaction_count < 40,Unnamed: 1_level_1,Unnamed: 2_level_1
False,13562,0
True,145006,295


Extreme cases:

In [11]:
df.loc[
    df.normal_transaction_count > 1000000,
    ["contract_address", "normal_transaction_count"]
].sort_values("normal_transaction_count", ascending=False)

Unnamed: 0,contract_address,normal_transaction_count
158856,0x8d12a197cb00d4747a1fe03395095ce2a5cc6819,10412943
158858,0x2a0c0dbecc7e4d658f48e01e3fa353f44050c208,4787242
158857,0x86fa049857e0209aa7d9e616f7eb3b3b78ecfdb0,2953516
158850,0x06012c8cf97bead5deae237070f9587f8e7a266d,2677549
158862,0xf230b790e05390fc8295f4d3f60332c93bed42e2,1974150
158854,0xe94b04a0fed112f3664e45adb2b8915693dd5ff3,1536870
158861,0xa3c1e324ca1ce40db73ed6026c4a177f099b5770,1501351
90216,0x03df4c372a29376d2c8df33a1b5f001cd8d68b0e,1451766
158852,0xd26114cd6ee289accf82350c8d8487fedb8a0c07,1363506
158851,0x6090a6e47849629b7245dfa1ca21d94cd15878ef,1234585


## Mean value of the normal transactions

First transform unit into ether for readability:

In [12]:
weis_1_ether = 1000000000000000000

In [13]:
df["normal_transaction_value_mean_ether"] = df.normal_transaction_value_mean / weis_1_ether

Distribution:

In [14]:
distribution_table(df, "normal_transaction_value_mean_ether")

Unnamed: 0,Yes,No
count,295.0,158568.0
mean,0.27,5.46
std,0.22,570.3
min,0.0,0.0
25%,0.1,0.0
50%,0.25,0.0
75%,0.38,0.0
max,1.67,204365.82


Cross handcrafted rule with binary label:

In [15]:
pd.crosstab(df.normal_transaction_value_mean_ether < 2,
            df.contract_is_honeypot,
            rownames=["normal_transaction_value_mean_ether < 2"])

contract_is_honeypot,False,True
normal_transaction_value_mean_ether < 2,Unnamed: 1_level_1,Unnamed: 2_level_1
False,3515,0
True,155053,295


Extreme cases:

In [16]:
df.loc[
    df.normal_transaction_value_mean_ether > 20000,
    ["contract_address", "normal_transaction_value_mean_ether"]
].sort_values("normal_transaction_value_mean_ether", ascending=False)

Unnamed: 0,contract_address,normal_transaction_value_mean_ether
6329,0x341e790174e3a4d35b65fdc067b6b5634a61caea,204365.823295
32979,0xcea2b9186ece677f9b8ff38dc8ff792e9a9e7f8a,54166.666668
6363,0x52965f9bd9d0f2bbea9b5a9c155a455d0e58fe25,34649.186554
13229,0x900d0881a2e85a8e4076412ad1cefbe2d39c566c,32142.860715
46920,0x69c6dcc8f83b196605fa1076897af0e7e2b6b044,27501.000001
3181,0x33d9b12b3b05927a1a00d5896017c5ff4967fca9,24305.55
3180,0xc207b597e1c0b1dc6d2d8ccbfde0a47633d8c9b7,24074.066667


## Contracts with deposits from others

In [17]:
deposit_other_frequency = fund_flow_case_columns_accumulated_frequency(fund_flow_cases,
                                                                       df,
                                                                       sender="other",
                                                                       error=False,
                                                                       balance_sender="negative",
                                                                       balance_contract="positive")

In [18]:
pd.crosstab(df.contract_is_honeypot,
            deposit_other_frequency > 0,
            colnames=["deposit_other_frequency > 0"])

deposit_other_frequency > 0,False,True
contract_is_honeypot,Unnamed: 1_level_1,Unnamed: 2_level_1
False,145816,12752
True,192,103


## Contracts with withdraws from others

In [19]:
withdraw_other_frequency = fund_flow_case_columns_accumulated_frequency(fund_flow_cases,
                                                                        df,
                                                                        sender="other",
                                                                        error=False,
                                                                        balance_sender="positive",
                                                                        balance_contract="negative")

In [20]:
pd.crosstab(df.contract_is_honeypot,
            withdraw_other_frequency > 0,
            colnames=["withdraw_other_frequency > 0"])

withdraw_other_frequency > 0,False,True
contract_is_honeypot,Unnamed: 1_level_1,Unnamed: 2_level_1
False,155439,3129
True,245,50


## Contracts with deposits from the creator

In [21]:
deposit_creator_frequency = fund_flow_case_columns_accumulated_frequency(fund_flow_cases,
                                                                         df,
                                                                         sender="creator",
                                                                         error=False,
                                                                         balance_creator="negative",
                                                                         balance_contract="positive")

In [22]:
pd.crosstab(df.contract_is_honeypot,
            deposit_creator_frequency > 0,
            colnames=["deposit_creator_frequency > 0"])

deposit_creator_frequency > 0,False,True
contract_is_honeypot,Unnamed: 1_level_1,Unnamed: 2_level_1
False,150212,8356
True,29,266


## Contracts with withdraws from the creator

In [23]:
withdraw_creator_frequency = fund_flow_case_columns_accumulated_frequency(fund_flow_cases,
                                                                          df,
                                                                          sender="creator",
                                                                          error=False,
                                                                          balance_creator="positive",
                                                                          balance_contract="negative")

In [24]:
pd.crosstab(df.contract_is_honeypot,
            withdraw_creator_frequency > 0,
            colnames=["withdraw_creator_frequency > 0"])

withdraw_creator_frequency > 0,False,True
contract_is_honeypot,Unnamed: 1_level_1,Unnamed: 2_level_1
False,152601,5967
True,187,108
