### Evaluation of Tool Patch Generation  for SmartBugs curated
Notebook to study the patches generated by APR tools on smart contracts.
In here we take into consideration if the patch generated by the tool is actually different from the original file.

Consider that some tools will always modified the original file for patch Generation:
- Aroc, TIPS

The data regarding diff analysis is on
'../results/smartbugs/TOOL/patches_diff.csv'

By convention, the result of experiments are allocated on:
'../results/smartbugs/contracts.csv'

The results of this notebook will be allocated on:
'../results/smartbugs/tool_patch_ability_DIFF_per_contract.csv'



In [378]:
import pandas as pd
#Example of the inout data
tool_file='../results/smartbugs/sGuard/patches_diff.csv'
df=pd.read_csv(tool_file)

In [381]:
#df[df['original'].str.contains("0x3a0e9acd953ffc0dd18d63603488846a6b8b2b01") ].values.tolist()
df.head()

Unnamed: 0,patch,original,diff
0,time_manipulation/lottopollo/lottopollo.sol,smartbugs-curated/dataset/time_manipulation/lo...,True
1,time_manipulation/ether_lotto/ether_lotto.sol,smartbugs-curated/dataset/time_manipulation/et...,True
2,time_manipulation/roulette/roulette.sol,smartbugs-curated/dataset/time_manipulation/ro...,False
3,time_manipulation/governmental_survey/governme...,smartbugs-curated/dataset/time_manipulation/go...,True
4,bad_randomness/old_blockhash/old_blockhash.sol,smartbugs-curated/dataset/bad_randomness/old_b...,True


In [382]:
import os

#collecting contracts name
dataset='smartbugs'
results_path='../results'
contracts_path=os.path.join(results_path,dataset,'contracts.csv')
storage_path=os.path.join(results_path,dataset)

import pandas as pd
contracts_df = pd.read_csv(contracts_path)


In [384]:
contracts_df.head()

Unnamed: 0,Name,Category,Occurrences,Lines
0,FibonacciBalance.sol,access_control,2,3138
1,arbitrary_location_write_simple.sol,access_control,1,27
2,incorrect_constructor_name1.sol,access_control,1,20
3,incorrect_constructor_name2.sol,access_control,1,18
4,incorrect_constructor_name3.sol,access_control,1,17


In [385]:
tools = [
    "SmartShield",
    "sGuard",
    "Aroc",
    "Elysium",
    "TIPS",
    "SmartFix",
    "sGuardPlus"
]

In [386]:
def generate_result_dic():
    result_dic={}
    index=0
    for contract , category in contracts_df[["Name","Category"]].values.tolist():
        index_str=str(index)
        result_dic[index_str]={}
        result_dic[index_str]["Contract"]= contract
        result_dic[index_str]["Category"]=category
        for tool in tools:
            result_dic[index_str][tool]=None
        index+=1
    return result_dic
        
initialize_result_dic=generate_result_dic()
    

In [387]:
import re
def get_contract(input_string):
    match = re.search(r'[^/]+$', input_string)
    if match:
        return match.group()

def get_category(text):
    match = re.search(r'^([^/]+)', text)
    if match:
        return match.group(1)

def read_diff_csv(tool, result_df) :
    tool_file='../results/smartbugs/'+tool+'/patches_diff.csv'
    df=pd.read_csv(tool_file)
    values=df.values.tolist()
    for line in values:
        contract = get_contract(line[1])
        category= get_category(line[0])
        if tool in ['SmartShield', 'Elysium']:
            contract=contract.replace(".rt.hex", ".sol")
        #case duplicates
        if tool in ["TIPS", "Aroc"]:
            if result_df[(result_df['Contract'] == contract) & (result_df['Category'] == category)][tool].values.tolist() == [True]:
                pass
        result_df.loc[(result_df['Contract'] == contract) & (result_df['Category'] == category) , tool] = line[-1]
    return result_df
        
def feed_result_dic(initialize_dic, tool_list):
    for tool in tools:
        initialize_dic=read_diff_csv(tool,initialize_dic)
    return initialize_dic

def generate_df():
    initialize_result_dic=generate_result_dic()
    in_df=pd.DataFrame.from_dict(initialize_result_dic, orient='index',columns=['Contract','Category']+tools)
    return feed_result_dic(in_df,tools)



In [388]:
result_dic=generate_df()

In [389]:
result_dic

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus
0,FibonacciBalance.sol,access_control,True,True,,True,True,True,
1,arbitrary_location_write_simple.sol,access_control,True,False,,True,True,True,
2,incorrect_constructor_name1.sol,access_control,True,False,,True,True,True,
3,incorrect_constructor_name2.sol,access_control,True,False,,True,True,True,
4,incorrect_constructor_name3.sol,access_control,True,False,,True,True,True,
...,...,...,...,...,...,...,...,...,...
138,etherpot_lotto.sol,unchecked_low_level_calls,True,True,,True,True,True,
139,king_of_the_ether_throne.sol,unchecked_low_level_calls,True,,True,True,True,True,
140,lotto.sol,unchecked_low_level_calls,True,True,True,True,True,True,True
141,mishandled.sol,unchecked_low_level_calls,True,False,True,True,True,,True


In [390]:
#Sanity Check
#If the element has either True or False it means the tool generated a patch, and therefore it needs to match the number form GEN

df=result_dic
#assert len(df[(df['SmartShield'] == True) | (df['SmartShield'] == False)].values.tolist()) == 134
assert len(df[(df['sGuard'] == True) | (df['sGuard'] == False)].values.tolist()) == 109
assert len(df[(df['Aroc'] == True) | (df['Aroc'] == False)].values.tolist()) == 90
assert len(df[(df['Elysium'] == True) | (df['Elysium'] == False)].values.tolist()) == 126
assert len(df[(df['TIPS'] == True) | (df['TIPS'] == False)].values.tolist()) == 141
assert len(df[(df['SmartFix'] == True) | (df['SmartFix'] == False)].values.tolist()) == 86
assert len(df[(df['sGuardPlus'] == True) | (df['sGuardPlus'] == False)].values.tolist()) == 81


In [391]:
#Sanity Check
#Compare with the number on the repository markdown
assert len(df[df['SmartShield']==True].values.tolist()) == 131
assert len(df[df['sGuard']==True].values.tolist()) == 62
#assert len(df[df['Aroc']==True].values.tolist()) == 96 # Can not double check with Monica's result because it has duplicates
assert len(df[df['Elysium']==True].values.tolist()) == 101
#assert len(df[df['TIPS']==True].values.tolist()) == 231 / 141 # Can not double check with Monica's result because it has duplicates
assert len(df[df['SmartFix']==True].values.tolist()) == 86 
assert len(df[df['sGuardPlus']==True].values.tolist()) == 81

In [392]:
#Number of contracts that have a patch with a diff file
len(df[df['TIPS']==True].values.tolist()) 

141

In [349]:
df=generate_df()

# Columns to consider
columns_to_consider = ['SmartShield', 'sGuard', "Aroc", 'Elysium', 'TIPS', "SmartFix",'sGuardPlus']

# Summing up the number of True values in each row
df['PatchCount'] = df[columns_to_consider].sum(axis=1)

# Sorting the DataFrame based on the PatchCount column in ascending order
df_sorted = df.sort_values(by='PatchCount', ascending=True)



In [350]:
df_sorted

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
8,parity_wallet_bug_1.sol,access_control,,,,,,,,0
43,dos_number.sol,denial_of_service,,,,False,True,,,1
46,send_loop.sol,denial_of_service,True,False,,False,True,,,2
90,timed_crowdsale.sol,time_manipulation,True,,,False,True,,,2
39,random_number_generator.sol,bad_randomness,True,,,False,True,,,2
...,...,...,...,...,...,...,...,...,...,...
32,tokensalechallenge.sol,arithmetic,True,True,True,True,True,True,True,7
124,0xb7c5c5aa4d42967efe906e1b66cb8df9cebf04f7.sol,unchecked_low_level_calls,True,True,True,True,True,True,True,7
30,timelock.sol,arithmetic,True,True,True,True,True,True,True,7
75,etherstore.sol,reentrancy,True,True,True,True,True,True,True,7


In [351]:
df_sorted
file_name='tool_patch_ability_DIFF_per_contract.csv'
df_sorted.to_csv(os.path.join(storage_path,file_name), index=True)  

## Comparison per category
The following DataFrames are grouped by category.

What to look in here:

- What are the contracts with the lowest Patch Count
- How many tools overlaps
- How good each tool is on the category

In [352]:
df_sorted[df_sorted['Category']=='access_control']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
8,parity_wallet_bug_1.sol,access_control,,,,,,,,0
14,unprotected0.sol,access_control,True,,,False,True,,,2
6,multiowned_vulnerable.sol,access_control,True,,,False,True,,,2
11,proxy.sol,access_control,True,False,,True,True,,,3
9,parity_wallet_bug_2.sol,access_control,,,,True,True,True,,3
13,simple_suicide.sol,access_control,False,False,,True,True,True,True,4
12,rubixi.sol,access_control,,,,True,True,True,True,4
3,incorrect_constructor_name2.sol,access_control,True,False,,True,True,True,,4
2,incorrect_constructor_name1.sol,access_control,True,False,,True,True,True,,4
5,mapping_write.sol,access_control,True,False,,True,True,,True,4


In [353]:
df_sorted[df_sorted['Category']=='arithmetic']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
22,integer_overflow_benign_1.sol,arithmetic,True,,True,False,True,True,,4
25,integer_overflow_mul.sol,arithmetic,True,,True,True,True,,True,5
26,integer_overflow_multitx_multifunc_feasible.sol,arithmetic,True,,True,True,True,True,,5
20,integer_overflow_1.sol,arithmetic,True,,True,True,True,True,,5
18,BECToken.sol,arithmetic,True,,True,True,True,True,,5
19,insecure_transfer.sol,arithmetic,True,,True,True,True,,True,5
31,token.sol,arithmetic,True,,True,True,True,True,True,6
29,overflow_single_tx.sol,arithmetic,True,,True,True,True,True,True,6
27,integer_overflow_multitx_onefunc_feasible.sol,arithmetic,True,,True,True,True,True,True,6
24,integer_overflow_minimal.sol,arithmetic,True,,True,True,True,True,True,6


In [354]:
df_sorted[df_sorted['Category']=='bad_randomness']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
39,random_number_generator.sol,bad_randomness,True,,,False,True,,,2
36,lottery.sol,bad_randomness,False,False,,False,True,,True,2
40,smart_billions.sol,bad_randomness,True,,,True,,True,,3
33,blackjack.sol,bad_randomness,True,,,True,True,,,3
35,guess_the_random_number.sol,bad_randomness,True,False,,True,True,True,,4
34,etheraffle.sol,bad_randomness,True,,,True,True,True,,4
37,lucky_doubler.sol,bad_randomness,True,True,,True,True,True,,5
38,old_blockhash.sol,bad_randomness,True,True,,True,True,True,,5


In [355]:
df_sorted[df_sorted['Category']=='denial_of_service']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
43,dos_number.sol,denial_of_service,,,,False,True,,,1
46,send_loop.sol,denial_of_service,True,False,,False,True,,,2
44,dos_simple.sol,denial_of_service,,,,False,True,,True,2
42,dos_address.sol,denial_of_service,,,,True,True,,,2
41,auction.sol,denial_of_service,True,True,,False,True,True,,4
45,list_dos.sol,denial_of_service,True,,,True,True,True,True,5


In [356]:
df_sorted[df_sorted['Category']=='front_running']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
47,ERC20.sol,front_running,True,,,False,True,,,2
48,FindThisHash.sol,front_running,True,False,,False,True,True,,3
49,eth_tx_order_dependence_minimal.sol,front_running,True,True,,False,True,True,,4
50,odds_and_evens.sol,front_running,True,True,,True,True,True,True,6


In [357]:
df_sorted[df_sorted['Category']=='other']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
52,name_registrar.sol,other,True,,,False,True,,,2
51,crypto_roulette.sol,other,True,True,,,True,True,,4
53,open_address_lottery.sol,other,True,True,,,True,True,,4


In [358]:
df_sorted[df_sorted['Category']=='reentrancy']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
84,spank_chain_payment.sol,reentrancy,,True,True,,True,,,3
76,modifier_reentrancy.sol,reentrancy,True,True,True,False,True,True,,5
54,0x01f8c4e3fa3edeb29e514cba738d87ce8c091d3f.sol,reentrancy,True,True,True,,True,True,True,6
55,0x23a91059fdc9579a9fbd0edc5f2ea0bfdb70deb4.sol,reentrancy,True,True,True,,True,True,True,6
56,0x4320e6f8c05b27ab4707cd1f6d5ce6f3e4b3a5a1.sol,reentrancy,True,True,True,,True,True,True,6
71,0xbe4041d55db380c5ae9d4a9b9703f1ed4e7e3888.sol,reentrancy,True,True,,True,True,True,True,6
58,0x561eac93c92360949ab1f1403323e6db345cbf31.sol,reentrancy,True,True,True,,True,True,True,6
79,reentrancy_cross_function.sol,reentrancy,True,True,True,True,True,,True,6
81,reentrancy_insecure.sol,reentrancy,True,True,True,True,True,,True,6
73,0xf015c35649c82f5467c9c74b7f28ee67665aad68.sol,reentrancy,True,True,,True,True,True,True,6


In [359]:
df_sorted[df_sorted['Category']=='short_addresses']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
85,short_address_example.sol,short_addresses,True,,,True,True,,,3


In [360]:
df_sorted[df_sorted['Category']=='short_addresses']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
85,short_address_example.sol,short_addresses,True,,,True,True,,,3


In [361]:
df_sorted[df_sorted['Category']=='time_manipulation']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
90,timed_crowdsale.sol,time_manipulation,True,,,False,True,,,2
89,roulette.sol,time_manipulation,True,False,,True,True,True,,4
88,lottopollo.sol,time_manipulation,False,True,,False,True,True,True,4
86,ether_lotto.sol,time_manipulation,True,True,,True,True,True,,5
87,governmental_survey.sol,time_manipulation,True,True,,True,True,,True,5


In [362]:
df_sorted[df_sorted['Category']=='unchecked_low_level_calls']

Unnamed: 0,Contract,Category,SmartShield,sGuard,Aroc,Elysium,TIPS,SmartFix,sGuardPlus,PatchCount
115,0x8fd1e427396ddb511533cf9abdbebd0a7e08da35.sol,unchecked_low_level_calls,True,False,True,False,True,,,3
96,0x3a0e9acd953ffc0dd18d63603488846a6b8b2b01.sol,unchecked_low_level_calls,True,False,True,False,True,,,3
95,0x39cfd754c85023648bf003bea2dd498c5612abfa.sol,unchecked_low_level_calls,True,False,True,False,True,,,3
135,0xf2570186500a46986f3139f65afedc2afe4f445d.sol,unchecked_low_level_calls,True,False,True,False,True,,,3
113,0x84d9ec85c9c568eb332b7226a8f826d897e0a4a8.sol,unchecked_low_level_calls,True,False,True,False,True,,True,4
101,0x4b71ad9c1a84b9b643aa54fdd66e2dec96e8b152.sol,unchecked_low_level_calls,True,False,True,True,True,,,4
102,0x524960d55174d912768678d8c606b4d50b79d7b1.sol,unchecked_low_level_calls,True,False,True,True,True,,,4
99,0x4051334adc52057aca763453820cb0e045076ef3.sol,unchecked_low_level_calls,True,False,True,True,True,,,4
94,0x2972d548497286d18e92b5fa1f8f9139e5653fd2.sol,unchecked_low_level_calls,True,False,True,True,True,,,4
107,0x663e4229142a27f00bafb5d087e1e730648314c3.sol,unchecked_low_level_calls,,,True,True,True,True,,4
