In [1]:
!apt-get install -y -qq git
!git clone https://github.com/ConsenSys/python-solidity-parser.git
%ls
%cd python-solidity-parser
%ls
!pip install .
from solidity_parser import parser

import sys
import pprint

def parse_solidity_code(source_code):
    try:
        ast = parser.parse(source_code)
        return ast
    except Exception as e:
        print(f"Error parsing Solidity code: {e}",)
        return None

import pandas as pd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

train_df = pd.read_csv('/content/drive/MyDrive/Practicum/SascDatasets/train_data_source_codes.csv')

# Set of Solidity keywords
solidity_keywords = [
    # Variable Types
    'address', 'bool', 'int', 'int8', 'int16', 'int32', 'int64', 'int128', 'int256', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256',
    'float', 'double', 'fixed', 'ufixed', 'byte', 'bytes', 'bytes1', 'bytes2', 'bytes4', 'bytes8', 'bytes16', 'bytes32', 'string', 'mapping', 'struct', 'enum',

    # Control Structures
    'if', 'else', 'while', 'do', 'for', 'switch', 'case', 'default', 'break', 'continue', 'return',
    'throw', 'require', 'revert', 'modifier',

    # Visibility Specifiers
    'public', 'external', 'internal', 'private',

    # Function Modifiers
    'pure', 'view', 'payable', 'constant', 'anonymous'

    # Special Keywords
    'this', 'super', 'selfdestruct', 'assembly',

    # Events
    'event', 'indexed',

    # Built-in Functions
    'msg', 'msg.sender', 'msg.value', 'sender', 'value', 'now', 'block', 'tx', 'origin', 'gasleft', 'assert', 'require', 'revert',
    'keccak256', 'sha256', 'ecrecover', 'addmod', 'mulmod', 'create', 'call', 'delegatecall', 'callcode', 'send',
    'staticcall', 'selfdestruct', 'balance', 'div', 'mod', 'exp', 'sqrt', 'gas', 'this', 'abs', 'min', 'max',
    'balance', 'transfer', 'block', 'number', 'blockHash', 'timestamp',

    # Other
    'constructor', 'fallback', 'receive', 'pragma', 'after', 'alias', 'apply', 'auto', 'case', 'copyof', 'default', 'defined', 'final', 'implements', 'in', 'inline', 'let',
    'macro', 'match', 'mutable', 'null', 'of', 'partial', 'promise', 'reference', 'relocatable', 'sealed', 'sizeof', 'static', 'supports', 'switch', 'typedef', 'typeof', 'var'
]

def get_function_nodes(ast):
    function_nodes = []

    def traverse(node):
        if isinstance(node, dict):
            if 'type' in node and node['type'] == 'FunctionDefinition':
                function_nodes.append(node)
            elif 'type' in node and node['type'] == 'ModifierDefinition':
                function_nodes.append(node)
            for key, value in node.items():
                if isinstance(value, list):
                    for item in value:
                        traverse(item)
                elif isinstance(value, dict):
                    traverse(value)

    traverse(ast)
    return function_nodes

def serialize_function_node(node, keywords):
    serialized_node = []
    has_string_literal_type = False
    has_hex_literal_type = False
    has_hex_number_type = False
    has_decimal_number_type = False

    for key, value in node.items():
        if key == 'name' and value not in keywords:
            value = 'XX'
        elif key == 'namePath' and value not in keywords:
            value = 'XX'
        elif key == 'memberName' and value not in keywords:
            value = 'XX'
        # elif key == 'functionName' and value not in keywords:
        #     value = 'assembly_func'
        elif key == 'decl':
            value = 'parser_error'
        elif key == 'number':
            value = 'num_literal'
        elif key == 'type' and value == 'stringLiteral':
            has_string_literal_type = True
        elif key == 'type' and value == 'hexLiteral':
            has_hex_literal_type = True
        elif key == 'type' and value == 'HexNumber':
            has_hex_number_type = True
        elif key == 'type' and value == 'DecimalNumber':
            has_decimal_number_type = True
        elif key == 'value' and has_string_literal_type:
            value = 'string_literal'
            has_string_literal_type = False
        elif key == 'value' and has_hex_literal_type:
            value = 'hex_literal'
            has_hex_literal_type = False
        elif key == 'value' and has_hex_number_type:
            value = 'hex_value'
            has_hex_number_type = False
        elif key == 'value' and has_decimal_number_type:
            value = 'dec_value'
            has_decimal_number_type = False

        if isinstance(value, list):
            if not value:
                serialized_value = 'empty_list'
            else:
                serialized_value = ' '.join(serialize_function_node(item, keywords) for item in value if isinstance(item, dict))
        elif isinstance(value, dict):
            serialized_value = serialize_function_node(value, keywords)
        else:
            serialized_value = str(value)
        serialized_node.append(f'{key} {serialized_value}')
    return ' '.join(serialized_node)

def serialize_ast(ast, solidity_keywords):

    function_nodes = get_function_nodes(ast)
    serialized_ast = [serialize_function_node(node, solidity_keywords) for node in function_nodes]
    return ' '.join(serialized_ast)


def preprocessDataframe(df, startIndex, rowCount, output_path):
    processed_sequences = []
    for i in range(startIndex, startIndex+rowCount):
        # Step 1: Parse the source code
        ast = parse_solidity_code(df.iloc[i]['source_code'])

        # Step 2: Serialize the AST
        try:
            serialized_ast = serialize_ast(ast, solidity_keywords)
        except Exception as e:
            print("Error serializing AST: ", e)
            serialized_ast = "unserializable"

        processed_sequences.append(serialized_ast)

        # Print progress after every 1000 rows
        if (i - startIndex + 1) % 500 == 0:
            print(f"{i - startIndex + 1} rows processed")

    # Create a new DataFrame with the processed sequences and 'slither' column
    processed_df = pd.DataFrame({'processed_sequence': processed_sequences,
                                 'labels': df['slither'].iloc[startIndex:startIndex + rowCount]})

    # Save the new DataFrame to a CSV file
    processed_df.to_csv(output_path, index=False)

csvIndex = 1
startIndex = 15000
rowCount = 15000
training_output_path = f'/content/drive/MyDrive/Practicum/ASTDataChunked/training_ast_sequences_{csvIndex}.csv'
preprocessDataframe(train_df, startIndex, rowCount, training_output_path)

Cloning into 'python-solidity-parser'...
remote: Enumerating objects: 198, done.[K
remote: Counting objects: 100% (60/60), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 198 (delta 55), reused 49 (delta 49), pack-reused 138[K
Receiving objects: 100% (198/198), 270.51 KiB | 1.45 MiB/s, done.
Resolving deltas: 100% (103/103), done.
[0m[01;34mpython-solidity-parser[0m/  [01;34msample_data[0m/
/content/python-solidity-parser
README.md         [0m[01;34msamples[0m/  setup.py          [01;34msolidity_parser[0m/
requirements.txt  [01;34mscripts[0m/  [01;34msolidity-antlr4[0m/
Processing /content/python-solidity-parser
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting antlr4-python3-runtime==4.9.3 (from solidity-parser==0.1.1)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup

line 1788:14 extraneous input ',' expecting {'from', '{', '}', '(', 'error', 'for', 'function', 'address', 'calldata', 'if', 'assembly', 'return', 'revert', 'byte', 'let', '=:', 'switch', 'callback', DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'constructor', 'receive', Identifier, StringLiteralFragment}
line 1788:18 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive

1500 rows processed
Error parsing Solidity code: 'list' object has no attribute 'getText'
2000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
2500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'


line 857:14 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier, StringLiteralFragment}
line 857:18 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'd

3000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'list' object has no attribute 'getText'


line 2326:14 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier, StringLiteralFragment}
line 2326:18 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 

3500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'


line 1424:14 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier, StringLiteralFragment}
line 1424:18 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 

Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'


line 9:13 no viable alternative at input 'functionfallback'
line 43:13 no viable alternative at input 'functionfallback'
line 77:24 mismatched input 'fallback' expecting {'from', 'error', 'calldata', 'revert', 'callback', 'leave', 'payable', 'constructor', 'receive', Identifier}
line 79:8 extraneous input 'if' expecting {'from', '}', 'error', 'using', 'struct', 'modifier', 'function', 'event', 'enum', 'address', 'mapping', 'calldata', 'revert', 'var', 'bool', 'string', 'byte', 'callback', Int, Uint, Byte, Fixed, Ufixed, 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier}
line 79:16 mismatched input '==' expecting {'from', 'error', 'calldata', 'revert', 'callback', 'override', 'constant', 'immutable', 'leave', 'internal', 'payable', 'private', 'public', 'constructor', 'receive', Identifier}
line 79:27 mismatched input '=' expecting {'from', 'error', 'calldata', 'revert', 'callback', 'override', 'constant', 'immutable', 'leave', 'internal', 'payable', 'private',

Error parsing Solidity code: 'NoneType' object is not subscriptable
4000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'list' object has no attribute 'getText'
Error parsing Solidity code: 'NoneType' object has no attribute 'assemblyIdentifierList'
4500 rows processed
5000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
5500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'list' object has no attribute 'getText'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
6000 rows processed


line 71:12 extraneous input '=' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier, StringLiteralFragment}
line 149:12 extraneous input '=' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do

Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
6500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'list' object has no attribute 'getText'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'list' object has no attribute 'getText'
7000 rows processed
Error parsing Solidity code: 'list' object has no attribute 'getText'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
7500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneTyp

line 857:14 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier, StringLiteralFragment}
line 857:18 extraneous input ',' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'd

Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'list' object has no attribute 'getText'
10000 rows processed


line 488:11 no viable alternative at input 'functionfallback'
line 496:4 extraneous input 'fallback' expecting {'~', 'from', '{', '}', '(', 'error', 'for', 'function', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'receive', Identifier, StringLiteralFragment}
line 782:11 no viable alternative at input 'functionfallback'


Error parsing Solidity code: 'NoneType' object is not subscriptable
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'


line 107:12 extraneous input '=' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'do', 'return', 'throw', 'emit', 'revert', 'var', 'bool', 'string', 'byte', '++', '--', 'new', '+', '-', 'after', 'delete', '!', 'let', '=:', 'switch', 'callback', Int, Uint, Byte, Fixed, Ufixed, BooleanLiteral, DecimalNumber, HexNumber, HexLiteralFragment, 'break', 'continue', 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier, StringLiteralFragment}
line 185:12 extraneous input '=' expecting {<EOF>, 'pragma', '~', 'import', 'from', '{', '}', 'abstract', 'contract', 'interface', 'library', '(', 'error', 'using', 'for', 'struct', 'modifier', 'function', 'event', 'enum', '[', 'address', 'mapping', 'calldata', 'if', 'try', 'while', 'unchecked', 'assembly', 'd

10500 rows processed
11000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
11500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
12000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
12500 rows processed


line 5:13 no viable alternative at input 'functionemit'
line 5:24 mismatched input ')' expecting {';', '='}
line 6:12 extraneous input '(' expecting {'from', 'error', 'calldata', 'revert', 'callback', 'override', 'constant', 'immutable', 'leave', 'internal', 'payable', 'private', 'public', 'constructor', 'receive', Identifier}
line 6:14 extraneous input ')' expecting {';', '='}
line 8:0 extraneous input '}' expecting {<EOF>, 'pragma', 'import', 'from', 'abstract', 'contract', 'interface', 'library', 'error', 'struct', 'function', 'enum', 'address', 'mapping', 'calldata', 'revert', 'var', 'bool', 'string', 'byte', 'callback', Int, Uint, Byte, Fixed, Ufixed, 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier}
line 18:25 mismatched input 'emit' expecting {'from', 'error', 'calldata', 'revert', 'callback', 'leave', 'payable', 'constructor', 'receive', Identifier}
line 18:32 mismatched input ';' expecting '('


Error parsing Solidity code: 'NoneType' object is not subscriptable
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
13000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
13500 rows processed


line 523:0 extraneous input '/' expecting {<EOF>, 'pragma', 'import', 'from', 'abstract', 'contract', 'interface', 'library', 'error', 'struct', 'function', 'enum', 'address', 'mapping', 'calldata', 'revert', 'var', 'bool', 'string', 'byte', 'callback', Int, Uint, Byte, Fixed, Ufixed, 'leave', 'payable', 'type', 'constructor', 'fallback', 'receive', Identifier}
line 524:12 mismatched input '(' expecting 'constant'
line 524:14 mismatched input ')' expecting 'constant'
line 525:1 mismatched input '*' expecting 'constant'
line 525:12 mismatched input '(' expecting 'constant'
line 525:14 mismatched input ')' expecting 'constant'
line 525:24 mismatched input '-' expecting 'constant'
line 526:1 mismatched input '*' expecting 'constant'
line 526:11 missing 'constant' at 'under'
line 526:17 missing '=' at 'the'
line 526:21 missing ';' at 'MIT'
line 526:25 missing 'constant' at 'license'
line 527:1 mismatched input '*' expecting '='
line 527:7 mismatched input ':' expecting 'constant'


Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'


line 86:6 mismatched input 'true' expecting {DecimalNumber, HexNumber, HexLiteralFragment, StringLiteralFragment}
line 108:6 mismatched input 'false' expecting {DecimalNumber, HexNumber, HexLiteralFragment, StringLiteralFragment}
line 112:8 mismatched input 'true' expecting {DecimalNumber, HexNumber, HexLiteralFragment, StringLiteralFragment}
line 115:8 mismatched input 'false' expecting {DecimalNumber, HexNumber, HexLiteralFragment, StringLiteralFragment}


Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
14000 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
14500 rows processed
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'NoneType' object has no attribute 'identifier'
Error parsing Solidity code: 'None

In [15]:
tdf = pd.read_csv(training_output_path)
count = 0
for j in range(0, 4000):
    # Convert the element to string before calculating length
    element = str(tdf.iloc[j][0])

    length = len(element)
    if length < 1000:
        print(j)
        count += 1

print(f"count = {count}")
# print((tdf.iloc[0][0]))

24
117
222
237
266
309
386
410
463
525
568
592
805
829
875
941
981
1262
1288
1305
1442
1539
1570
1585
1605
1635
1654
1682
1909
2033
2301
2397
2415
2523
2740
2758
2768
2842
2869
2899
2966
2971
3120
3165
3350
3584
3670
3690
3704
3989
count = 50


In [16]:
print(tdf.iloc[24][0])

nan


In [None]:
for j in range(0, len(tdf)):
    row_data_type = tdf.iloc[j].dtypes
    print(f"Row {j} data type: {row_data_type}")

In [13]:
for j in range(0, 10):
    row_data_type = tdf.iloc[j][0]
    print(f"Row {j}: {row_data_type}")

Row 0: type FunctionDefinition name XX parameters type ParameterList parameters empty_list returnParameters empty_list body empty_list visibility external modifiers empty_list isConstructor False isFallback False isReceive False stateMutability None type FunctionDefinition name XX parameters type ParameterList parameters type Parameter typeName type ElementaryTypeName name address name XX storageLocation None isStateVar False isIndexed False returnParameters type ParameterList parameters type Parameter typeName type ElementaryTypeName name bool name XX storageLocation None isStateVar False isIndexed False type Parameter typeName type ArrayTypeName baseTypeName type ElementaryTypeName name uint256 length type NumberLiteral number 12 subdenomination None name XX storageLocation memory isStateVar False isIndexed False body empty_list visibility external modifiers empty_list isConstructor False isFallback False isReceive False stateMutability view type FunctionDefinition name XX parameters

In [None]:
# install datasets
!pip install datasets

# Let's import the library. We typically only need at most two methods:
from datasets import list_datasets, load_dataset

from pprint import pprint

# Downloading and loading a dataset
dataset = load_dataset('mwritescode/slither-audited-smart-contracts', 'big-multilabel')

train_data = dataset["train"]
validation_data = dataset["validation"]
test_data = dataset["test"]

# Print the dimensions of train_data
print("Dimensions of train_data:", train_data.shape)
print("Dimensions of validation_data:", validation_data.shape)
print("Dimensions of test_data:", test_data.shape)

In [3]:
import pandas as pd
train_df = pd.DataFrame(train_data)

In [4]:
print(train_df.iloc[14][:])

address               0x0728e0023699186D5693Bb8e7e762B9972B3852E
source_code    pragma solidity 0.6.12;\npragma experimental A...
bytecode       0x6080604052600436106100345760003560e01c80630b...
slither                                                   [5, 1]
Name: 14, dtype: object


In [5]:
print(train_df.iloc[34][:])

address               0x13B70f58f8e7Fce3811401fF65D2dF33AD5DC61D
source_code    pragma solidity 0.6.12;\npragma experimental A...
bytecode       0x6080604052600436106100295760003560e01c80638b...
slither                                                   [5, 1]
Name: 34, dtype: object
