In [35]:
%load_ext autoreload
%autoreload 2

import ast
from pathlib import Path
from pprint import pprint
from docstring_format import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
# file = Path('equation_parser/functions.py')
file = Path('./tests/dummy_tests_functions.py')

raw_text = file.read_text()
dirty_lines = raw_text.splitlines()
tree = ast.parse(raw_text)

classes = [item for item in tree.body if isinstance(item, ast.ClassDef)]
class_methods = [func for item in classes for func in item.body if isinstance(func, ast.FunctionDef)]
functions = [item for item in tree.body if isinstance(item, ast.FunctionDef)]

# File annotations

In [144]:
from docstring_format.base import get_docstring_sections

In [145]:
for func in functions:
    docstring = get_docstring(func, dirty_lines)
    sections = get_docstring_sections(docstring)

In [151]:
sections.parameters

'    values: values to compute from\n    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid\n     index from values.\n\n    '

In [142]:
sections.returns =  parse_returns(func) + sections.returns

In [143]:
pprint(sections.to_string())

('    """Compute the difference of values with respect to ref_id.\n'
 '\n'
 '    Parameters\n'
 '    ----------\n'
 '    values: values to compute from\n'
 '    ref_id: delta values are computed with respect to that reference. It '
 'should be valid index or a list of valid\n'
 '     index from values.\n'
 '\n'
 '    Returns\n'
 '    -------\n'
 'Union[pd.DataFrame, pd.Series]\n'
 '    """')


In [124]:
docstring

'    """Compute the difference of values with respect to ref_id.\n\n    Parameters\n    ----------\n    values: values to compute from\n    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid\n     index from values.\n\n    Returns\n    -------\n\n    """'

In [125]:
get_docstring_sections(docstring).to_string()

'    """Compute the difference of values with respect to ref_id.\n\n    Parameters\n    ----------\n    values: values to compute from\n    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid\n     index from values.\n\n    Returns\n    -------\n\n    """'

In [37]:
lines = dirty_lines.copy()
for func in functions:
    lines = annotate_function(func, lines)
    
for method in class_methods:
    lines = annotate_function(method, lines)

In [38]:
docstring = get_docstring(func, dirty_lines)

In [39]:
import re

In [104]:
param_token = 'Parameters\n\s*-+\n'
return_token = 'Returns\s*-+\n'

In [162]:
docstring = '    """Compute the difference of values with respect to ref_id.\n\n    Parameters\n    ----------\n    values: values to compute from\n    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid\n     index from values.\n\n    Returns\n    -------\n\n    """'

In [163]:
docstring = '    """Compute the difference of values with respect to ref_id.\n\n    Parameters\n    ----------\n    values: values to compute from\n    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid\n     index from values."""'

In [164]:
pprint(docstring)

('    """Compute the difference of values with respect to ref_id.\n'
 '\n'
 '    Parameters\n'
 '    ----------\n'
 '    values: values to compute from\n'
 '    ref_id: delta values are computed with respect to that reference. It '
 'should be valid index or a list of valid\n'
 '     index from values."""')


In [169]:
pattern = re.compile('(?P<offset>\s*)'
                     '(?P<summary>.*)'
                     f'(?P<param_delimiter>{param_token})'
                     f'(?P<parameters>(?:(?!{return_token}).)*)'  # negative lookahead of return token
                     f'(?P<return_delimiter>{return_token})?'
                     f'(?P<returns>.*)?',
                     flags=re.S)

sections = re.search(pattern, docstring).groupdict()
sections

{'offset': '    ',
 'summary': '"""Compute the difference of values with respect to ref_id.\n\n    ',
 'param_delimiter': 'Parameters\n    ----------\n',
 'parameters': '    values: values to compute from\n    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid\n     index from values."""',
 'return_delimiter': None,
 'returns': ''}

In [237]:
params = [{'name': item.arg, 'annotation': parse_annotation(item)} for item in func.args.args]
param_lines = sections['parameters'].splitlines()
total_lines = len(param_lines)

params_start_line = []
for _ in range(len(params)):
    param = params.pop()
    param_name = param['name']
    pattern = re.compile(f'\s*{param_name}')

    for n, line in enumerate(param_lines):
        match = re.search(pattern, line)
        if match:
            param['start'] = n
            params_start_line.append(param)
            break

params_start_line.sort(key=lambda x: x['start'])

for n, item in enumerate(params_start_line):
    if n < len(params_start_line)-1:
        item['length'] = params_start_line[n+1]['start']-item['start']
        item['lines'] = param_lines[item['start']:item['start']+item['length']]
    else:
        item['length'] = total_lines-item['start']
        item['lines'] = param_lines[item['start']:item['start']+item['length']]

In [238]:
params_start_line

[{'name': 'values',
  'annotation': 'Union[pd.DataFrame, pd.Series]',
  'start': 0,
  'length': 1,
  'lines': ['    values: values to compute from']},
 {'name': 'ref_id',
  'annotation': None,
  'start': 1,
  'length': 2,
  'lines': ['    ref_id: delta values are computed with respect to that reference. It should be valid index or a list of valid',
   '     index from values."""']}]

In [166]:
parse_returns(func)

'Union[pd.DataFrame, pd.Series]'

In [None]:
dirty_lines

In [None]:
lines

In [None]:
start, length = get_docstring_lines(func, dirty_lines)
corrected_docstring = annotate_function(func, dirty_lines)

In [None]:
def correct_lines(dirty_lines, start, length, corrected_docstring):
    corrected_lines = dirty_lines.copy()
    [corrected_lines.pop(start) for _ in range(length)]
    [corrected_lines.insert(start, line) for line in corrected_docstring.splitlines()[::-1]]
    
    return corrected_lines

In [None]:
corrected_lines = correct_lines(dirty_lines, start, length, corrected_docstring)

In [None]:
corrected_lines[start:start+length]

# Unittests

In [None]:
from docstring_format.base import *
import json

In [None]:
file = Path('./tests/dummy_tests_functions.py')

raw_text = file.read_text()
dirty_lines = raw_text.splitlines()
tree = ast.parse(raw_text)

classes = [item for item in tree.body if isinstance(item, ast.ClassDef)]
class_methods = [func for item in classes for func in item.body if isinstance(func, ast.FunctionDef)]

functions = [item for item in tree.body if isinstance(item, ast.FunctionDef)]

results = {}
for func in functions:
    start, length = get_docstring_lines(func, dirty_lines)
    results[func.name] = dict(zip(('start', 'length'), (start, length)))
    
    docstring = '\n'.join(dirty_lines[start:start+length])
    results[func.name]['sections'] = get_docstring_sections(docstring).to_dict()
    
    results[func.name]['docstring'] = annotate_function(func, dirty_lines)

In [None]:
with open('./tests/dummy_results.json', mode='w') as f:
    json.dump(results, f, indent=4)

# Dev 

In [None]:
from docstring_format.base import *
from docstring_format.constants import *
import json

In [None]:
file = Path('equation_parser/constants.py')
# file = Path('./tests/dummy_tests_functions.py')

raw_text = file.read_text()
dirty_lines = raw_text.splitlines()
tree = ast.parse(raw_text)

classes = [item for item in tree.body if isinstance(item, ast.ClassDef)]

class_methods = [func for item in classes for func in item.body if isinstance(func, ast.FunctionDef)]

functions = [item for item in tree.body if isinstance(item, ast.FunctionDef)]

In [None]:
func = functions[0]

docstring = get_docstring(func, dirty_lines)

pprint(docstring.splitlines())

pprint(annotate_function(func, dirty_lines).splitlines())