In [1]:
import os
import requests
import pprint
import pandas as pd
from solidity_parser import parser

In [2]:
if not os.path.isdir('/tmp'):
    os.mkdir('/tmp')

# Construct governance surface of a Solidity smart contract
- [ ] Parse structure of the smart contract, extracting all functions/modifiers/events, their parameters, and other relevant properties (e.g., visibility)
- [ ] Get comments corresponding to function/parameter definitions to contextualize the structure
- [ ] Select subset of functions/parameters relevant to governance, preserving their structural relationships

The following example tests this out with [Gnosis Safe contract](https://github.com/gnosis/safe-contracts/blob/main/contracts/GnosisSafe.sol).

## Parse contract
Use the `solidity_parser` library to parse the contract as an [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST). This gets us the full structure of the contract, including functions, their parameters, and which functions call which other functions. 

Since this library does not preserve comments, add location information to the AST nodes so that later we can go back to the text and extract the relevant function/parameter descriptions.

Note that it's also possible to create abject from the OrderedList using `parser.objectify`, but this turns out not to actually be that useful, since only some of the information in the AST is presered.

In [3]:
def get_parameter_type(param):
    """Get parameter dtype"""
    typeDict = param['typeName']
    paramType = typeDict.get('name', typeDict.get('namePath', None))
    
    return paramType

In [4]:
def extract_functions_and_parameters(contracts):
    """Collect information on functions/modifiers/events and their corresponding parameters
    Returns two DataFrames, one for functions, the other for their parameters
    """
    df_items = pd.DataFrame()
    df_parameters = pd.DataFrame()
    # Iterate through contracts (there may be multiple in a solidity file)
    for c in contracts:
        contractName = c['name']
        baseContracts = [b['baseName']['namePath'] for b in c.get('baseContracts', [])]
        print(f"{contractName} inherits from {baseContracts}")
    
        # Append item and parameters for contract itself
        itemDict = {'contract': contractName,
                    'name': None,
                    'type': 'ContractDefinition',
                    #'modifiers': item['modifiers'] # TODO: find example to parse this correctly
                    #'visibility': item['visibility] # TODO: find example to parse this correctly
                    'line_numbers': (c['loc']['start']['line'], c['loc']['end']['line'])}
        df_items = df_items.append(pd.Series(itemDict), ignore_index=True)

        # Iterate through subnotes (functions, event definitions, etc.) in contract
        for item in c.get('subNodes', []):
            itemType = item['type']
            
            if itemType in ['StateVariableDeclaration']:
                # Append contract variables to DataFrame
                for param in item.get('variables', {}):
                    paramDict = {'parameter_name': param['name'],
                                 'contract': contractName,
                                 'type': get_parameter_type(param),
                                 'line_number': param['loc']['start']['line'],
                                 'description': '',
                                 'initialValue': item['initialValue'],
                                 'visibility': param.get('visibility')}
                    df_parameters = df_parameters.append(pd.Series(paramDict), ignore_index=True)

            if itemType in ['EventDefinition', 'ModifierDefinition', 'FunctionDefinition']:
                itemName = item['name']

                # Append function/event/modifier to DataFrame
                itemDict = {'contract': contractName,
                            'name': itemName,
                            'type': itemType,
                            #'modifiers': item['modifiers'] # TODO: find example to parse this correctly
                            'visibility': item.get('visibility'), # TODO: find example to parse this correctly
                            'line_numbers': (item['loc']['start']['line'], item['loc']['end']['line'])}
                df_items = df_items.append(pd.Series(itemDict), ignore_index=True)

                # Append each parameter to DataFrame
                for param in item.get('parameters', {}).get('parameters', []):
                    paramDict = {'parameter_name': param['name'],
                                 'function_name': itemName,
                                 'contract': contractName,
                                 'type': get_parameter_type(param),
                                 'line_number': param['loc']['start']['line'],
                                 'description': ''}
                    df_parameters = df_parameters.append(pd.Series(paramDict), ignore_index=True)

    return df_items, df_parameters

## Parse comments
Extract two kinds of comments:
- Docstrings, or any other set of comments right before a function
- Inline comments on parameter definitions

In [5]:
def clean_comment_lines(lines):
    lines_new = [s.strip() for s in lines if s]
    lines_new = [s.split('//')[-1] for s in lines]
    lines_new = [s.strip('/* ').strip() for s in lines_new]
    lines_new = [s for s in lines_new if s]

    return lines_new

In [6]:
def clean_parameter_lines(lines):
    lines_new = [s.strip() for s in lines if s]
    if len(lines_new) > 0:
        prevLines = [s for s in lines_new[:-1] if s.startswith('//')]
        tmp = lines_new[-1].split('//')
        inLine = [tmp[-1]] if len(tmp) > 1 else ['']
        lines_new = prevLines + inLine
        lines_new = [s.split('//')[-1] for s in lines_new if len(lines_new) > 1]
        lines_new = [s.strip('/* ').strip() for s in lines_new]
        lines_new = [s for s in lines_new if s]

    return lines_new

In [7]:
def parse_function_description(lines):
    """Parse comment before a function definition.
    May be a block comment or individual line comments."""
    commentDict = {}

    # Parse dev line(s); keep just the first one
    devLines = [s.split('@dev')[-1].strip() for s in lines if s.startswith('@dev')]
    try: 
        commentDict['dev'] = devLines[0]
    except IndexError:
        commentDict['dev'] = None

    # Parse notice line(s); keep just the first one
    noticeLines = [s.split('@notice')[-1].strip() for s in lines if s.startswith('@notice')]
    try: 
        commentDict['notice'] = noticeLines[0]
    except IndexError:
        commentDict['notice'] = None

    # Parse parameter lines; create dict of parameter:description pairs
    paramLines = [s.split('@param')[-1].strip().split(' ', 1) for s in lines if s.startswith('@param')]
    commentDict['param'] = [{'parameter': p[0], 'description': p[1]} for p in paramLines]

    return commentDict

In [8]:
def parse_parameter_description(lines, parameterName):
    """Parse comment relating to parameter, either inline or right before the paramter"""

    description = ''

    # Try to get notice first
    noticeLines = [s.split('@notice')[-1].strip() for s in lines if s.startswith('@notice')]
    if len(noticeLines) > 0:
        description = noticeLines[-1]

    # Parse parameter lines; create dict of parameter:description pairs
    paramLines = [s.split('@param')[-1].strip().split(' ', 1) for s in lines if '@param' in s]
    paramDict = {p[0]: p[1] for p in paramLines}
    description = paramDict.get(parameterName, description)

    # If above two methods failed
    if description == '' and len(lines) > 0:
        description = lines[-1]

    return description

In [9]:
def add_docstring_comments(lines, df_items, df_parameters):
    """Parse comments and add them to the relevant rows in the function and parameter DataFrames"""

    df_i = df_items.copy()
    df_p = df_parameters.copy()

    df_i['full_comment'] = ''
    df_i['dev'] = ''
    df_i['notice'] = ''
    df_i['description'] = ''

    commentStart = 0
    for i, row in df_i.iterrows():

        #if row['type'] == 'FunctionDefinition':
        functionName = row['name']

        # Grab and clean comment lines
        commentEnd = row['line_numbers'][0] - 1
        commentLines = clean_comment_lines(lines[commentStart:commentEnd])
        commentStr = '\n'.join(commentLines)

        # Parse comment lines
        commentDict = parse_function_description(commentLines)

        # Add function descriptions to function
        df_i.iat[i, df_i.columns.get_loc('full_comment')] = commentStr
        for key, value in commentDict.items():
            if key in df_i.columns:
                df_i.iat[i, df_i.columns.get_loc(key)] = value

        # Add parameter descriptions to parameters
        for item in commentDict['param']:
            index = df_p.loc[(df_p['function_name']==functionName) &
                             (df_p['parameter_name']==item['parameter'])].index[0]
            df_p.iat[index, df_p.columns.get_loc('description')] = item['description']

        commentStart = row['line_numbers'][1]

    return df_i, df_p

In [10]:
def add_inline_comments(lines, df_parameters):
    """Parse comments and add them to the relevant rows in the parameter DataFrame"""

    df_p = df_parameters.copy()
    df_p['full_comment'] = ''
    df_p['description'] = ''

    commentStart = 0
    for i, row in df_p.iterrows():   
        # Grab and clean comment lines
        commentEnd = int(row['line_number'])
        commentLines = clean_parameter_lines(lines[min(commentStart, commentEnd - 2):commentEnd + 1])

        # Parse comment and add to dict
        description = parse_parameter_description(commentLines, row['parameter_name'])
        if df_p.iat[i, df_p.columns.get_loc('description')] == '':
            df_p.iat[i, df_p.columns.get_loc('description')] = description
            df_p.iat[i, df_p.columns.get_loc('full_comment')] = '\n'.join(commentLines)

        commentStart = commentEnd

    return df_p

# Parse Compound Governor Bravo

In [11]:
urls = ['https://raw.githubusercontent.com/notchia/metagov/main/contracts/GovernorBravoDelegator.sol',
        'https://raw.githubusercontent.com/notchia/metagov/main/contracts/GovernorBravoInterfaces.sol'] 
fpath = '/tmp/solidity.txt'

In [12]:
df_cgb_items = pd.DataFrame()
df_cgb_parameters = pd.DataFrame()
for url in urls:
    # Get content of Gnosis Safe contract and save to temporary file
    content = requests.get(url).text
    with open(fpath, 'w') as f:
        f.write(content)
    lines = content.split('\n')

    # Get contract structure as OrderedList
    sourceUnit = parser.parse_file(fpath, loc=True)

    contracts = [c for c in sourceUnit['children'] if c['type'] == 'ContractDefinition']

    # Save to file
    savename = url.split('/')[-1].split('.sol')[0]
    with open(savename + '.txt', 'w') as f:
        pprint.pprint(sourceUnit, stream=f)

    # Get  
    df_items, df_parameters = extract_functions_and_parameters(contracts)
    df_items, df_parameters = add_docstring_comments(lines, df_items, df_parameters)
    df_parameters = add_inline_comments(lines, df_parameters)
    df_cgb_items = df_cgb_items.append(df_items)
    df_cgb_parameters = df_cgb_parameters.append(df_parameters)


GovernorBravoDelegator inherits from ['GovernorBravoDelegatorStorage', 'GovernorBravoEvents']
GovernorBravoEvents inherits from []
GovernorBravoDelegatorStorage inherits from []
GovernorBravoDelegateStorageV1 inherits from ['GovernorBravoDelegatorStorage']
TimelockInterface inherits from []
CompInterface inherits from []
GovernorAlpha inherits from []


In [13]:
df_cgb_items.to_csv('tmp/items.csv')
df_cgb_items.drop(columns=['line_numbers', 'full_comment', 'description'])

Unnamed: 0,contract,name,type,visibility,dev,notice
0,GovernorBravoDelegator,,ContractDefinition,,,
1,GovernorBravoDelegator,constructor,FunctionDefinition,public,,
2,GovernorBravoDelegator,_setImplementation,FunctionDefinition,public,,Called by the admin to update the implementati...
3,GovernorBravoDelegator,delegateTo,FunctionDefinition,internal,It returns to the external caller whatever the...,Internal method to delegate execution to anoth...
4,GovernorBravoDelegator,"function()externalpayable{(boolsuccess,)=imple...",FunctionDefinition,external,Delegates execution to an implementation contr...,
0,GovernorBravoEvents,,ContractDefinition,,,
1,GovernorBravoEvents,ProposalCreated,EventDefinition,,,
2,GovernorBravoEvents,VoteCast,EventDefinition,,,An event emitted when a vote has been cast on ...
3,GovernorBravoEvents,ProposalCanceled,EventDefinition,,,An event emitted when a proposal has been canc...
4,GovernorBravoEvents,ProposalQueued,EventDefinition,,,An event emitted when a proposal has been queu...


In [14]:
df_cgb_parameters.to_csv('tmp/parameters.csv')
df_cgb_parameters.drop(columns=['line_number', 'type', 'initialValue', 'full_comment'])

Unnamed: 0,parameter_name,function_name,contract,description,visibility
0,timelock_,constructor,GovernorBravoDelegator,,
1,comp_,constructor,GovernorBravoDelegator,,
2,admin_,constructor,GovernorBravoDelegator,,
3,implementation_,constructor,GovernorBravoDelegator,,
4,votingPeriod_,constructor,GovernorBravoDelegator,,
...,...,...,...,...,...
55,signature,executeTransaction,TimelockInterface,,
56,data,executeTransaction,TimelockInterface,,
57,eta,executeTransaction,TimelockInterface,,
58,account,getPriorVotes,CompInterface,,
