In [1]:
import argparse
import logging
import os
import sys

from redbaron import RedBaron

from toolz.curried import assoc, concatv, keyfilter, map, merge
from toolz.curried.operator import attrgetter


# Loading

In [2]:
source_dir = '../../openfisca-france/openfisca_france/model/'
filenames = []

for root, directories, files in os.walk(source_dir):    
    for filename in files:
        complete_filename = os.path.join(root, filename)
        assert complete_filename[:len(source_dir)] == source_dir
        complete_filename = complete_filename[len(source_dir):]
        filenames.append(complete_filename)
filenames

['__init__.py',
 'datatrees.py',
 'mesures.py',
 'base.py',
 'patrimoine/__init__.py',
 'revenus/__init__.py',
 'revenus/autres.py',
 'revenus/capital/financier.py',
 'revenus/capital/__init__.py',
 'revenus/capital/plus_value.py',
 'revenus/capital/foncier.py',
 'revenus/activite/non_salarie.py',
 'revenus/activite/__init__.py',
 'revenus/activite/salarie.py',
 'revenus/remplacement/retraite.py',
 'revenus/remplacement/__init__.py',
 'revenus/remplacement/chomage.py',
 'revenus/remplacement/indemnites_journalieres_securite_sociale.py',
 'prestations/__init__.py',
 'prestations/aides_logement.py',
 'prestations/education.py',
 'prestations/minima_sociaux/aah.py',
 'prestations/minima_sociaux/ppa.py',
 'prestations/minima_sociaux/ass.py',
 'prestations/minima_sociaux/rsa.py',
 'prestations/minima_sociaux/asi_aspa.py',
 'prestations/minima_sociaux/__init__.py',
 'prestations/minima_sociaux/cmu.py',
 'prestations/prestations_familiales/aeeh.py',
 'prestations/prestations_familiales/__init

In [3]:
filenames.remove('base.py')
filenames.remove('datatrees.py')
filenames.remove('prelevements_obligatoires/prelevements_sociaux/cotisations_sociales/preprocessing.py')

In [4]:
redbaron_trees = {}
for filename in filenames:
    with open(source_dir + filename) as source_file:
        source_code = source_file.read()
    red = RedBaron(source_code)
    redbaron_trees[filename] = red
    print('{} parsed'.format(filename))

__init__.py parsed
mesures.py parsed
patrimoine/__init__.py parsed
revenus/__init__.py parsed
revenus/autres.py parsed
revenus/capital/financier.py parsed
revenus/capital/__init__.py parsed
revenus/capital/plus_value.py parsed
revenus/capital/foncier.py parsed
revenus/activite/non_salarie.py parsed
revenus/activite/__init__.py parsed
revenus/activite/salarie.py parsed
revenus/remplacement/retraite.py parsed
revenus/remplacement/__init__.py parsed
revenus/remplacement/chomage.py parsed
revenus/remplacement/indemnites_journalieres_securite_sociale.py parsed
prestations/__init__.py parsed
prestations/aides_logement.py parsed
prestations/education.py parsed
prestations/minima_sociaux/aah.py parsed
prestations/minima_sociaux/ppa.py parsed
prestations/minima_sociaux/ass.py parsed
prestations/minima_sociaux/rsa.py parsed
prestations/minima_sociaux/asi_aspa.py parsed
prestations/minima_sociaux/__init__.py parsed
prestations/minima_sociaux/cmu.py parsed
prestations/prestations_familiales/aeeh.p

# Custom exceptions

In [5]:
angry_rbnode = None
angry_global_context = None
angry_local_context = None

In [6]:
class ParsingException(Exception):
    def __init__(self, message, rbnode, global_context, local_context):
        global angry_rbnode
        global angry_global_context
        global angry_local_context

        angry_rbnode = rbnode
        angry_global_context = global_context
        angry_local_context = local_context

        super(ParsingException, self).__init__(message)

In [7]:
class NotImplementedParsingError(ParsingException):
    pass

In [8]:
class AssertionParsingError(ParsingException):
    pass

In [9]:
def parsing_assert(cond, rbnode, global_context, local_context):
    if cond:
        return
    
    raise AssertionParsingError('', rbnode, global_context, local_context)

# Helpers

In [10]:
import unicodedata

def rbnode_to_exception(rbnode):
    '''Because exceptions are ASCII only in python2'''
    str1 = rbnode.dumps() # unicode string wrongly known as 'str'
    str2 = unicode(str1, 'utf-8') # unicode string as unicode
    str3 = unicodedata.normalize('NFKD', str2).encode('ascii', 'ignore') # ignore special chars
    
    return str3

In [11]:
def parse_date(atomtrailer, local_context, global_context):
    parsing_assert(atomtrailer.type == 'atomtrailers', atomtrailer, local_context, global_context)
    parsing_assert(len(atomtrailer.value) == 2, atomtrailer, local_context, global_context)
    parsing_assert(atomtrailer.value[0].type == 'name', atomtrailer, local_context, global_context)
    parsing_assert(atomtrailer.value[0].value == 'date', atomtrailer, local_context, global_context)
    call_node = atomtrailer.value[1]
    parsing_assert(call_node.type == 'call', atomtrailer, local_context, global_context)
    parsing_assert(len(call_node.value) == 3, atomtrailer, local_context, global_context)
    parsing_assert(call_node.value[0].type == 'call_argument', atomtrailer, local_context, global_context)
    parsing_assert(not call_node.value[0].target, atomtrailer, local_context, global_context)
    parsing_assert(call_node.value[0].value.type == 'int', atomtrailer, local_context, global_context)
    year = call_node.value[0].value.value
    parsing_assert(call_node.value[1].type == 'call_argument', atomtrailer, local_context, global_context)
    parsing_assert(not call_node.value[1].target, atomtrailer, local_context, global_context)
    parsing_assert(call_node.value[1].value.type == 'int', atomtrailer, local_context, global_context)
    month = call_node.value[1].value.value
    parsing_assert(call_node.value[2].type == 'call_argument', atomtrailer, local_context, global_context)
    parsing_assert(not call_node.value[2].target, atomtrailer, local_context, global_context)
    parsing_assert(call_node.value[2].value.type == 'int', atomtrailer, local_context, global_context)
    day = call_node.value[2].value.value
    
    return {'year': year, 'month': month, 'day': day}

In [12]:
def parse_enum(atomtrailers, local_context, global_context):
    parsing_assert(atomtrailers.type == 'atomtrailers', rbnode, global_context, local_context)
    
    parsing_assert(len(atomtrailers.value) == 2, rbnode, global_context, local_context)
    parsing_assert(atomtrailers.value[0].type == 'name', rbnode, global_context, local_context)
    parsing_assert(atomtrailers.value[0].value == 'Enum', rbnode, global_context, local_context)
    
    call_node = atomtrailers.value[1]
    parsing_assert(call_node.type == 'call', rbnode, global_context, local_context)
    parsing_assert(len(call_node.value) == 1, rbnode, global_context, local_context)
    parsing_assert(call_node.value[0].type == 'call_argument', rbnode, global_context, local_context)
    parsing_assert(not call_node.value[0].target, rbnode, global_context, local_context)
    
    enum_list_node = call_node.value[0].value
    parsing_assert(enum_list_node.type == 'list', rbnode, global_context, local_context)
    
    enum_list = []
    for element in enum_list_node.value:
        parsing_assert(element.type == 'unicode_string', rbnode, global_context, local_context)
        enum_list.append(element.value)
        
    return enum_list

# Module traversal functions

In [13]:
def visit_module_rbnode(rbnode, global_context, local_context):
    visitors = keyfilter(lambda key: key.startswith('visit_module_'), globals()) # should be defined once
    visitor = visitors.get('visit_module_' + rbnode.type)
    if visitor is None:
        raise NotImplementedParsingError(
            'Module visitor not declared for type="{type}"'.format(
                type=rbnode.type,
                ), rbnode, global_context, local_context)
    ofnode = visitor(rbnode, global_context, local_context)
    return ofnode


In [14]:
def visit_module_endl(rbnode, global_context, local_context):
    return

In [15]:
def visit_module_from_import(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'module', rbnode, global_context, local_context)
    
    # unmodified (TODO)
    local_context['imports'].append(rbnode)

In [16]:
def visit_module_import(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'module', rbnode, global_context, local_context)
    
    # unmodified (TODO)
    local_context['imports'].append(rbnode)

In [17]:
def visit_module_comment(rbnode, global_context, local_context):
    # comments are discarded for the moment (TODO)
    return

In [18]:
def visit_module_class(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'module', rbnode, global_context, local_context)
    
    name = rbnode.name
    
    parsing_assert(not rbnode.decorators, rbnode, global_context, local_context)
    
    upper_classes = []
    for upper_class in rbnode.inherit_from:
        parsing_assert(upper_class.type == 'name', rbnode, global_context, local_context)
        upper_classes.append(upper_class.value)
        
    class_obj = {
        'type': 'class',
        'name': name,
        'upper_classes': upper_classes,
        'content': rbnode.value,
        }

    local_context['classes'].append(class_obj)

In [19]:
def visit_module_def(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'module', rbnode, global_context, local_context)
    
    if rbnode.name in ['_revprim', 'preload_zone_apl']:
        return
    
    # unmodified (TODO)
    local_context['auxiliary_functions'].append(rbnode)

In [39]:
def visit_module_assignment(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'module', rbnode, global_context, local_context)

    parsing_assert(rbnode.operator == '', rbnode, global_context, local_context)
    
    parsing_assert(rbnode.target.type == 'name', rbnode, global_context, local_context)
    name = rbnode.target.value
    
    if name in ['zone_apl_by_depcom']:
        return
    
    if rbnode.value.type == 'int':
        local_context['constants'].append({
                'name': name,
                'type': 'int',
                'value': rbnode.value.value,
            })
        return
    
    if rbnode.value.type == 'name':
        parsing_assert(rbnode.value.value == 'None', rbnode, global_context, local_context)
        local_context['constants'].append({
                'name': name,
                'type': 'None',
                'value': None,
            })
        return
    
    if rbnode.value.type == 'atomtrailers':
        atomtrailers = rbnode.value

        parsing_assert(atomtrailers.value[0].type == 'name', rbnode, global_context, local_context)
        function_name = atomtrailers.value[0].value
        if function_name == 'Enum':
            enum_list = parse_enum(atomtrailers, global_context, local_context)

            local_context['enums'].append({
                'name': name,
                'enum_list': enum_list,
            })
            return

        if function_name == 'logging':
            # ignore logging
            return

        raise ParsingException('Unknown atomtrailers', rbnode, global_context, local_context)

    raise ParsingException('Unknown type', rbnode, global_context, local_context)


# Module parsing

In [40]:
global_context = {}

for name in filenames:
    print('Visiting ' + name)
    red = redbaron_trees[name]
    

    
    local_context = {
        'keyword': 'module',
        'module_name': name,
        'imports': [],
        'classes': [],
        'enums': [],
        'auxiliary_functions': [],
        'constants': [],
        }
    
    for rbnode in red:
        visit_module_rbnode(rbnode, global_context, local_context)
        
    global_context[name] = {
        'imports': local_context['imports'],
        'classes': local_context['classes'],
        'enums': local_context['enums'],
        'auxiliary_functions': local_context['auxiliary_functions'],
        'constants': local_context['constants'],
    }
    
parsed_modules = global_context

Visiting __init__.py
Visiting mesures.py
Visiting patrimoine/__init__.py
Visiting revenus/__init__.py
Visiting revenus/autres.py
Visiting revenus/capital/financier.py
Visiting revenus/capital/__init__.py
Visiting revenus/capital/plus_value.py
Visiting revenus/capital/foncier.py
Visiting revenus/activite/non_salarie.py
Visiting revenus/activite/__init__.py
Visiting revenus/activite/salarie.py
Visiting revenus/remplacement/retraite.py
Visiting revenus/remplacement/__init__.py
Visiting revenus/remplacement/chomage.py
Visiting revenus/remplacement/indemnites_journalieres_securite_sociale.py
Visiting prestations/__init__.py
Visiting prestations/aides_logement.py
Visiting prestations/education.py
Visiting prestations/minima_sociaux/aah.py
Visiting prestations/minima_sociaux/ppa.py
Visiting prestations/minima_sociaux/ass.py
Visiting prestations/minima_sociaux/rsa.py
Visiting prestations/minima_sociaux/asi_aspa.py
Visiting prestations/minima_sociaux/__init__.py
Visiting prestations/minima_soci

# Class traversal functions

In [61]:
def visit_class_rbnode(rbnode, global_context, local_context):
    visitors = keyfilter(lambda key: key.startswith('visit_class_'), globals()) # should be defined once
    visitor = visitors.get('visit_class_' + rbnode.type)
    if visitor is None:
        raise NotImplementedParsingError(
            'Class visitor not declared for type="{type}"'.format(
                type=rbnode.type,
                ), rbnode, global_context, local_context)
    ofnode = visitor(rbnode, global_context, local_context)
    return ofnode


In [62]:
def visit_class_endl(rbnode, global_context, local_context):
    return

In [68]:
def visit_class_assignment(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'class', rbnode, global_context, local_context)
    
    parsing_assert(rbnode.operator == '', rbnode, global_context, local_context)
    
    parsing_assert(rbnode.target.type == 'name', rbnode, global_context, local_context)
    target = rbnode.target.value
    
    if target == 'column':
        if rbnode.value.type == 'atomtrailers':
            parsing_assert(len(rbnode.value.value) == 2, rbnode, global_context, local_context)

            parsing_assert(rbnode.value.value[0].type == 'name', rbnode, global_context, local_context)
            column_name = rbnode.value.value[0].value

            call_node = rbnode.value.value[1]
            parsing_assert(call_node.type == 'call', rbnode, global_context, local_context)
            column_args = {}
            for arg in call_node.value:
                parsing_assert(arg.target.type == 'name', rbnode, global_context, local_context)
                column_args[arg.target.value] = arg.value.to_python()

            parsing_assert('column' not in local_context['class_variables'].keys(), rbnode, global_context, local_context)
            local_context['class_variables']['column'] = column_name
            local_context['class_variables']['column_args'] = column_args

        elif rbnode.value.type == 'name':
            column_name = rbnode.value.value

            parsing_assert('column' not in local_context['class_variables'].keys(), rbnode, global_context, local_context)
            local_context['class_variables']['column'] = column_name
        else:
            raise NotImplementedParsingError('Unknown type', rbnode, global_context, local_context)
    
    elif target == 'entity_class':
        parsing_assert(rbnode.value.type == 'name', rbnode, global_context, local_context)
        
        parsing_assert('entity_class' not in local_context, rbnode, global_context, local_context)
        local_context['entity_class'] = rbnode.value.value
              
    elif target == 'label':
        parsing_assert(rbnode.value.type == 'unicode_string', rbnode, global_context, local_context)
        
        parsing_assert('label' not in local_context, rbnode, global_context, local_context)
        local_context['label'] = rbnode.value.value
        
    elif target == 'start_date':
        date = parse_date(rbnode.value, global_context, local_context)
        
        parsing_assert('start_date' not in local_context, rbnode, global_context, local_context)
        local_context['start_date'] = date
         
    elif target == 'stop_date':
        date = parse_date(rbnode.value, global_context, local_context)

        parsing_assert('stop_date' not in local_context, rbnode, global_context, local_context)
        local_context['stop_date'] = date
        
    elif target == 'url':
        parsing_assert(rbnode.value.type == 'string', rbnode, global_context, local_context)
        
        parsing_assert('url' not in local_context, rbnode, global_context, local_context)
        local_context['url'] = rbnode.value.value
        
             
    elif target == 'operation':
        parsing_assert(rbnode.value.type == 'string', rbnode, global_context, local_context)
        
        parsing_assert('operation' not in local_context, rbnode, global_context, local_context)
        local_context['operation'] = rbnode.value.value
        
             
    elif target == 'variable':
        parsing_assert(rbnode.value.type == 'name', rbnode, global_context, local_context)
        
        parsing_assert('variable' not in local_context, rbnode, global_context, local_context)
        local_context['variable'] = rbnode.value.value
        
    else:            
        raise NotImplementedParsingError('Unknown class variable {}'.format(target), rbnode, global_context, local_context)
            


In [64]:
def visit_class_def(rbnode, global_context, local_context):
    parsing_assert(local_context['keyword'] == 'class', rbnode, global_context, local_context)
    name = rbnode.name
    
    decorators = rbnode.decorators
    
    parsing_assert(len(rbnode.arguments) == 3, rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[0].type == 'def_argument', rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[0].target.type == 'name', rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[0].target.value == 'self', rbnode, global_context, local_context)
    parsing_assert(not rbnode.arguments[0].value, rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[1].type == 'def_argument', rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[1].target.type == 'name', rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[1].target.value == 'simulation', rbnode, global_context, local_context)
    parsing_assert(not rbnode.arguments[1].value, rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[2].type == 'def_argument', rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[2].target.type == 'name', rbnode, global_context, local_context)
    parsing_assert(rbnode.arguments[2].target.value == 'period', rbnode, global_context, local_context)
    parsing_assert(not rbnode.arguments[2].value, rbnode, global_context, local_context)
    
    instructions = rbnode.value # unmodified (TODO)
    
    parsing_assert(name not in local_context['class_functions'], rbnode, global_context, local_context)
    local_context['class_functions'][name] = {
        'decorators': decorators,
        'instructions': value,
    }

In [65]:
def visit_class_comment(rbnode, global_context, local_context):
    # ignored (TODO)
    return

# Class parsing

In [70]:
global_context = {}

for module_name, module in parsed_modules.items():
    print('Visiting module {} to parse its classes.'.format(name))
    
    global_context[module_name] = {
        'parsed_classes': {},
    }
    
    for cl in module['classes']:
        class_name = cl['name']
        print('Visiting class {}'.format(class_name))
        
        local_context = {
            'keyword': 'class',
            'class_name': name,
            'class_variables': {},
            'class_functions': {},
            }
    
        for rbnode in cl['content']:
            visit_class_rbnode(rbnode, global_context, local_context)

        global_context[module_name]['parsed_classes'][name] = {
            'class_variables': {},
            'class_functions': {},
            }


Visiting module prelevements_obligatoires/prelevements_sociaux/contributions_sociales/base.py to parse its classes.
Visiting class jour_xyz
Visiting class rfr_n_1
Visiting class rfr_n_2
Visiting class nbptr_n_2
Visiting class age


NotImplementedParsingError: Unknown class variable base_function

In [67]:
angry_rbnode

In [None]:
angry_rbnode.help()

In [None]:
angry_local_context