In [1]:
%%time
import pandas as pd
import numpy as np

CPU times: user 610 ms, sys: 177 ms, total: 786 ms
Wall time: 913 ms


In [2]:
%%time
print("#\tReading cleaned data...")
file_name = "midstages/data_cleaned_withfunctionnames.csv"
dtype={"address": str, "solidity": str, "opcode": str, "function_names": list}
df = pd.read_csv(file_name, sep='\t')

#	Reading cleaned data...
CPU times: user 1.43 s, sys: 168 ms, total: 1.6 s
Wall time: 1.64 s


In [9]:
df.head(5)

Unnamed: 0,address,solidity,opcode,function_names
0,0x3ccd3f8531f1cc5d9075e8a5bb2e09f688644300,pragma solidity ^0.4.18;\r\n\r\n///>[ Crypto B...,"PUSH1,PUSH1,MSTORE,PUSH1,DUP1,SLOAD,PUSH1,NOT,...","approve,balanceof,implementserc721,ownerof,tak..."
1,0xe8ef8d9d9ff515720a62d2e2f14f3b5b677c6670,pragma solidity ^0.4.16;\r\n\r\ninterface toke...,"PUSH1,PUSH1,MSTORE,PUSH1,PUSH1,PUSH1,PUSH2,EXP...","receiveapproval,tokenerc20,_transfer,transfer,..."
2,0xecbd0854075009d23360c62da07047efc2312144,pragma solidity ^0.4.16;\r\n\r\ninterface toke...,"PUSH1,PUSH1,MSTORE,PUSH1,PUSH1,PUSH1,PUSH2,EXP...","transfer,checkgoalreached,safewithdrawal"
3,0x92d3e963aa94d909869940a8d15fa16ccbc6655e,pragma solidity ^0.4.18;\r\n\r\ncontract Ownab...,"PUSH1,PUSH1,MSTORE,PUSH1,DUP1,SLOAD,PUSH1,PUSH...","ownable,transferownership,pause,unpause,kill,b..."
4,0x3e516824a408c7029c3f870510d59442143c2db9,pragma solidity ^0.4.13;\r\n\r\ninterface Fund...,"PUSH1,PUSH1,MSTORE,CALLVALUE,ISZERO,PUSH2,JUMP...","requestinvestment,requestredemption,executereq..."


In [3]:
%%time
def write_dataset_into_file(potential, potential_non, file_name_potential, file_name_non_potential):
    sample_size = min(potential_non.shape[0], potential.shape[0])
    potential.head(sample_size).to_csv(file_name_potential, sep=',', index=False, header='opcode')
    potential_non.head(sample_size).to_csv(file_name_non_potential, sep=',', index=False, header='opcode')


CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 7.15 µs


#### ERC20 Example

In [19]:
%%time
print("#\tCreating erc20-nonerc20 dataset.")

potential_erc20 = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'balanceof' in str(l) and
                  'totalsupply' in str(l) and 
                  'transfer' in str(l) and
                  'transferfrom' in str(l) and
                  'approve' in str(l) and
                  'allowance' in str(l)
    )
]
print("#\tNumber of potential erc20 contracts found: {}".format(potential_erc20.shape[0]))

potential_non_erc20 = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'balanceof' not in str(l) and
                  'totalsupply' not in str(l) and 
                  'transfer' not in str(l) and
                  'transferfrom' not in str(l) and
                  'approve' not in str(l) and
                  'allowance' not in str(l)
    )
]
print("#\tNumber of potential non-erc20 contracts found: {}".format(potential_non_erc20.shape[0]))

print("#\tWriting the potential erc20-nonerc20 contracts into file with an equal distribution...")
write_dataset_into_file(potential_erc20, potential_non_erc20, "erc20.csv", "non-erc20.csv")

#	Creating erc20-nonerc20 dataset.
#	Number of potential erc20 contracts found: 1684
#	Number of potential non-erc20 contracts found: 1601
#	Writing the potential erc20-nonerc20 contracts into file with an equal distribution...
CPU times: user 3.16 s, sys: 186 ms, total: 3.35 s
Wall time: 3.96 s


# Creating training data sets

# Application Patterns

## Voting

In [20]:
%%time
print("#\tCreating voting-nonvoting dataset.")

potential_voting = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'vote' in str(l) or
                  'ballot' in str(l) or
                  'gettotalvotes' in str(l) or
                  'sendvote' in str(l) or
                  'voter' in str(l)
    )
]

print("#\tNumber of potential vote contracts found: {}".format(potential_voting.shape[0]))
potential_non_voting = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'vote' not in str(l) and
                  'ballot' not in str(l) and
                  'gettotalvotes' not in str(l) and
                  'sendvote' not in str(l)
    )
]
print("#\tNumber of potential non-vote contracts found: {}".format(potential_non_voting.shape[0]))

print("#\tWriting the potential voting-nonvoting contracts into file with an equal distribution...")
write_dataset_into_file(potential_voting, potential_non_voting, "voting_not_annotated.csv", "non-voting.csv")

#	Creating voting-nonvoting dataset.
#	Number of potential vote contracts found: 207
#	Number of potential non-vote contracts found: 7228
#	Writing the potential voting-nonvoting contracts into file with an equal distribution...
CPU times: user 706 ms, sys: 32.1 ms, total: 738 ms
Wall time: 825 ms


## Auction

In [4]:
%%time
print("#\tCreating auction-nonauciton dataset.")

potential_auction = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'auction' in str(l) or
                  'bid' in str(l)
    )
]

print("#\tNumber of potential auction contracts found: {}".format(potential_auction.shape[0]))

potential_non_auction = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'auction' not in str(l) and
                  'bid' not in str(l) and
                  'offer' not in str(l)
    )
]

print("#\tNumber of potential non-auction contracts found: {}".format(potential_non_auction.shape[0]))

print("#\tWriting the potential auction-nonauction contracts into file with an equal distribution...")
write_dataset_into_file(potential_auction, potential_non_auction, "auction_not_annotated.csv", "non-auction.csv")

#	Creating auction-nonauciton dataset.
#	Number of potential auction contracts found: 66
#	Number of potential non-auction contracts found: 7322
#	Writing the potential auction-nonauction contracts into file with an equal distribution...
CPU times: user 156 ms, sys: 10.7 ms, total: 167 ms
Wall time: 175 ms


## Trading

In [37]:
%%time
print("#\tCreating trading-nontrading dataset.")

potential_trading = df.loc[
    lambda data: data.function_names.apply(
        lambda l: 'transferownership' in str(l) 
#         or
#                   ('trade' in str(l) and
#                   'seller' in str(l) and
#                   'buyer' in str(l))
#         or
#                   'asset' in str(l)
    )
]

print("#\tNumber of potential trading contracts found: {}".format(potential_trading.shape[0]))

potential_trading.head(1)['solidity'].values[0]

# potential_non_trading = df.loc[
#     lambda data: data.function_names.apply(
#         lambda l: 'ownership' not in str(l) and
#                   'transfer' not in str(l) and
#                   'trade' not in str(l) and
#                   'seller' not in str(l) and
#                   'buyer' not in str(l) and
#                   'asset' not in str(l)
#     )
# ]
# print("#\tNumber of potential non-trading contracts found: {}".format(potential_non_trading.shape[0]))

# print("#\tWriting the potential trading-nontrading contracts into file with an equal distribution...")
# write_dataset_into_file(potential_trading, potential_non_trading, "trading_not_annotated.csv", "non-trading.csv")

#	Creating trading-nontrading dataset.
#	Number of potential trading contracts found: 2447
CPU times: user 10.6 ms, sys: 1.58 ms, total: 12.2 ms
Wall time: 11.1 ms


'pragma solidity ^0.4.18;\r\n\r\ncontract Ownable {\r\naddress public owner;\r\n\r\n\r\nevent OwnershipTransferred(address indexed previousOwner, address indexed newOwner);\r\n\r\nfunction Ownable() public {\r\nowner = msg.sender;\r\n}\r\n\r\nmodifier onlyOwner() {\r\nrequire(msg.sender == owner);\r\n_;\r\n}\r\n\r\nfunction transferOwnership(address newOwner) public onlyOwner {\r\nrequire(newOwner != address(0));\r\nOwnershipTransferred(owner, newOwner);\r\nowner = newOwner;\r\n}\r\n}\r\n\r\ncontract Pausable is Ownable {\r\nevent Pause();\r\nevent Unpause();\r\n\r\nbool public paused = false;\r\n\r\nmodifier whenNotPaused() {\r\nrequire(!paused);\r\n_;\r\n}\r\n\r\nmodifier whenPaused() {\r\nrequire(paused);\r\n_;\r\n}\r\n\r\nfunction pause() onlyOwner whenNotPaused public {\r\npaused = true;\r\nPause();\r\n}\r\n\r\nfunction unpause() onlyOwner whenPaused public {\r\npaused = false;\r\nUnpause();\r\n}\r\n\r\nfunction kill() onlyOwner public {\r\n    if (msg.sender == owner) selfdestruc