# Filter sqlite database

- TODO: execute this from worker to populate small tables for each commit
- TODO: Better grouping for large exception texts that only slightly differ

In [1]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
con = sqlite3.connect("samples.sqlite3")
df = pd.read_sql_query("SELECT * from dewolf", con)
con.close()
print("#records:", len(df.index))

#records: 642998


In [3]:
df = df.drop_duplicates()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 642998 entries, 0 to 642997
Data columns (total 16 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   id                          642998 non-null  int64  
 1   function_name               642998 non-null  object 
 2   function_basic_block_count  642998 non-null  int64  
 3   function_size               642998 non-null  int64  
 4   function_arch               642998 non-null  object 
 5   function_platform           642998 non-null  object 
 6   sample_hash                 642998 non-null  object 
 7   sample_name                 642998 non-null  object 
 8   dewolf_options              642998 non-null  object 
 9   dewolf_current_commit       642998 non-null  object 
 10  binaryninja_version         642998 non-null  object 
 11  dewolf_exception            231507 non-null  object 
 12  dewolf_traceback            231507 non-null  object 
 13  dewolf_decompi

# Failed decompilations (functions)

In [4]:
failed_runs = df[df.is_successful == 0]
print("# failed runs:", len(failed_runs.index))

# failed runs: 231507


## Exceptions

In [5]:
print("Unique exceptions:", len(failed_runs.dewolf_exception.unique()))

Unique exceptions: 80


In [6]:
for index, value in failed_runs.dewolf_exception.value_counts().items():
    print(f"{value}:", index.strip())

224517: RuntimeError: 'NoneType' object has no attribute 'getInstructions'
4727: AssertionError: Can not remove the root node!
668: ValueError: The given set of variables is not an independent set. At least two variables interfere!
610: RuntimeError: did not expect to reach canary check this way
604: ValueError: Couldn't find target for type
221: RuntimeError: argument of type 'NoneType' is not iterable
18: AttributeError: 'MediumLevelILSyscallSsa' object has no attribute 'dest'
14: ValueError: No switch variable candidate found.
9: KeyError: 512
8: OverflowError: cannot convert float infinity to integer
6: KeyError: 224
6: KeyError: 48
5: AttributeError: 'MediumLevelILFunction' object has no attribute '_arch'
5: KeyError: 24
4: KeyError: 80
4: RuntimeError
4: KeyError: 96
4: KeyError: 8
4: ValueError: The children of Sequence can not be sorted due to circular reachability
3: TypeError: 'NoneType' object is not iterable
3: AttributeError: 'Branch' object has no attribute 'destination'


## Tracebacks

In [7]:
print("Unique tracebacks:", len(failed_runs.dewolf_traceback.unique()))

Unique tracebacks: 35


In [8]:
for index, value in failed_runs.dewolf_traceback.value_counts().items():
    print(f"{value}:", "\n".join(index.strip().split("\n")[-2:]))

224742:   File "/opt/dewolf/decompiler/frontend/binaryninja/tagging.py", line 30, in run
    raise RuntimeError(e)
4727:   File "/opt/dewolf/decompiler/structures/graphs/rootedgraph.py", line 87, in remove_node
    assert node is not self.root, "Can not remove the root node!"
668:   File "/opt/dewolf/decompiler/structures/interferencegraph.py", line 62, in contract_independent_set
    raise ValueError(f"The given set of variables is not an independent set. At least two variables interfere!")
610:   File "/opt/dewolf/decompiler/pipeline/preprocessing/remove_stack_canary.py", line 73, in _patch_branch_condition
    raise RuntimeError("did not expect to reach canary check this way")
339:   File "/opt/binaryninja/python/binaryninja/binaryview.py", line 8463, in _value_helper
    raise ValueError("Couldn't find target for type")
265:   File "/opt/binaryninja/python/binaryninja/binaryview.py", line 8463, in _value_helper
    raise ValueError("Couldn't find target for type")
37:   File "/opt/

## Find 5 smallest cases per unique exception

In [7]:
failed_runs.sort_values("function_size").drop_duplicates("dewolf_exception")


filtered_df = failed_runs.groupby("dewolf_exception").apply(lambda x: x.nsmallest(10, "function_basic_block_count"))
filtered_df = filtered_df.reset_index(drop=True)
filtered_df


Unnamed: 0,id,function_name,function_basic_block_count,function_size,function_arch,function_platform,sample_hash,sample_name,dewolf_options,dewolf_current_commit,binaryninja_version,dewolf_exception,dewolf_traceback,dewolf_decompilation_time,dewolf_undecorated_code,is_successful
0,8,__stack_chk_fail,1,10,x86_64,linux-x86_64,7690a63b03116527b8f3aa95625d36c44250bcbc3eb1d2...,7690a63b03116527b8f3aa95625d36c44250bcbc3eb1d2...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,AssertionError: Can not remove the root node!\n,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
1,79,__stack_chk_fail,1,10,x86_64,linux-x86_64,00d64368416328f6891235d725e605021d26eedbbf4f18...,00d64368416328f6891235d725e605021d26eedbbf4f18...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,AssertionError: Can not remove the root node!\n,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
2,234,__stack_chk_fail,1,10,x86_64,linux-x86_64,48ae6c2ee4c1fe9a072fe376bc66824eda5be072524793...,48ae6c2ee4c1fe9a072fe376bc66824eda5be072524793...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,AssertionError: Can not remove the root node!\n,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
3,328,__stack_chk_fail,1,10,x86_64,linux-x86_64,a46a30a17a093415db567c15b497d48ed9a4e56c41885f...,a46a30a17a093415db567c15b497d48ed9a4e56c41885f...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,AssertionError: Can not remove the root node!\n,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
4,417,__stack_chk_fail,1,10,x86_64,linux-x86_64,6cbd692bcbc3d8d4b7092bc1bd7d14a13df4f29e18402f...,6cbd692bcbc3d8d4b7092bc1bd7d14a13df4f29e18402f...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,AssertionError: Can not remove the root node!\n,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,252511,ftdi_usb_open_string,39,728,x86_64,linux-x86_64,121dfa46b9da0745086ac13444228802b83756741d3682...,121dfa46b9da0745086ac13444228802b83756741d3682...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,networkx.exception.NetworkXError: The node cas...,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
276,47095,sub_3da0,42,840,x86_64,linux-x86_64,2cc4148bcdd7907a4aa9c8fa9a1b653cb6fe0c987f40fb...,2cc4148bcdd7907a4aa9c8fa9a1b653cb6fe0c987f40fb...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,networkx.exception.NetworkXError: The node cas...,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
277,54497,sub_6160,38,1016,x86_64,linux-x86_64,96c1ecedc2e8ec7671b005b5c64e345604b1d39363383b...,96c1ecedc2e8ec7671b005b5c64e345604b1d39363383b...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,networkx.exception.NetworkXError: The node cas...,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0
278,191785,sub_43b0,18,376,x86_64,linux-x86_64,6207f05f2949ce3b51fce179729988b704de606045664b...,6207f05f2949ce3b51fce179729988b704de606045664b...,"{\n ""expression-propagation.maximum_instruc...",b978d098f2a829650fb82e984bc7fd611c3190ea,3.1.3469,networkx.exception.NetworkXError: The node cas...,"File ""/opt/dewolf/decompiler/util/bugfinder/...",,,0


In [10]:
import pandas as pd
import sqlite3

database_path = 'filtered.db'

with sqlite3.connect(database_path) as conn:
    # TODO: write to table 'commit'
    filtered_df.to_sql('dewolf', conn, index=False, if_exists='replace')
