In [1]:
import re
import subprocess

## Pylint messages

In [2]:
print(subprocess.check_output(['pylint', '--version']).decode())

pylint 1.7.2, 
astroid 1.5.3
Python 3.6.2 | packaged by conda-forge | (default, Jul 23 2017, 22:59:30) 
[GCC 4.8.2 20140120 (Red Hat 4.8.2-15)]



In [3]:
output = subprocess.check_output(
    "pylint --list-msgs | grep '^:' | awk '{print $2}' | cut -c 2-6", 
    shell=True, universal_newlines=True)
pylint_messages = output.split()

In [4]:
len(pylint_messages)

221

## Ignored (.pylinrc) messages

In [5]:
import configparser
config = configparser.ConfigParser()
config.read('../pyta/python_ta/.pylintrc')
pylintrc_messages = [m.strip(",") for m in config['MESSAGES CONTROL']['disable'].split()]

In [6]:
len(pylintrc_messages)

113

## Covered messages

In [18]:
LINK_RE = re.compile('###.*\{#([A-Z][0-9]+)\}.*')  # looks for '{#E0601}'
HEADER_RE = re.compile('###.*\(([A-Z][0-9]+)\).*')  # looks for '### E0601'

In [8]:
link_messages = []
header_messages = []

with open('../website/index.md') as ifh:
    for line in ifh:
        link_messages += LINK_RE.findall(line)
        header_messages += HEADER_RE.findall(line)

In [9]:
assert link_messages == header_messages

In [10]:
covered_messages = link_messages

In [11]:
len(covered_messages)

93

## Missing messages

In [12]:
# Messages produced by pylint that are not covered by .pylintrc or index.md
missing_messages = set(pylint_messages) - set(pylintrc_messages) - set(covered_messages)

In [35]:
len(missing_messages)

38

## Sanity checks

In [13]:
# Messages in index.md but not in pylint
set(covered_messages) - set(pylint_messages)

{'E0002', 'E9991', 'E9996', 'E9998', 'E9999', 'R0101', 'R0102'}

In [14]:
# Messages in index.md and in .pylintrc
set(covered_messages) & set(pylintrc_messages)

{'E0202', 'E0241', 'E0704', 'W0211'}

## Generate CSV file for existing errors

In [25]:
ENTIRE_HEADER_RE = re.compile('### (.*) \(([A-Z][0-9]+)\).*')  # looks for '### E0601'

In [27]:
ENTIRE_HEADER_RE.findall("### Used before assignment (E0601) {#E0601}")

[('Used before assignment', 'E0601')]

In [30]:
csv_data = [("error_code", "error_summary", "error_description")]

with open('../website/index.md') as ifh:
    for line in ifh:
        if line.startswith('## '):
            csv_data.append((line[3:].strip(), None, None,))
        elif line.startswith('### '):
            error_summary, error_code = ENTIRE_HEADER_RE.findall(line)[0]
            csv_data.append((error_code, error_summary, None))

In [31]:
csv_data[:10]

[('error_code', 'error_summary', 'error_description'),
 ('Improper Python usage', None, None),
 ('E0601', 'Used before assignment', None),
 ('E0602', 'Undefined variable', None),
 ('W0631', 'Undefined loop variable', None),
 ('E0103', 'Not in loop', None),
 ('E0104', 'Return outside function', None),
 ('W0101', 'Unreachable', None),
 ('W0109', 'Duplicate key', None),
 ('Type errors', None, None)]

## Generate CSV file for missing errors

In [32]:
def parse_help_msg(help_msg):
    data = help_msg.split('\n')
    name = data[0].strip()
    description = ' '.join(l.strip() for l in data[1:]).strip()
    return name, description

In [33]:
parse_help_msg("""\
:method-check-failed (F0202): *Unable to check methods signature (%s / %s)*
  Used when Pylint has been unable to check methods signature compatibility for
  an unexpected reason. Please report this kind if you don't make sense of it.
  This message belongs to the classes checker.

""")

(':method-check-failed (F0202): *Unable to check methods signature (%s / %s)*',
 "Used when Pylint has been unable to check methods signature compatibility for an unexpected reason. Please report this kind if you don't make sense of it. This message belongs to the classes checker.")

In [45]:
csv_data_for_missing = [("Missing", None, None)]

whitelisted_messages = {
    # Keep and document
    'C0305', 'W0301', 'C0201', 'E0107', 'W0221', 'E0303', 'C0321', 'C0301', 'C0326', 'W0312', 'W0311', 
    'C0330', 'E1133', 'W0223', 'C0304', 'C0325', 'E1123',
    # Keep, don't document
    'F0001', 'I0001', 'F0010', 'F0002', 'F0202',
    # Investigate further
    'W0106',
}

for code in missing_messages - whitelisted_messages:
    print(code)
    stdout = subprocess.check_output("pylint --help-msg={}".format(code), universal_newlines=True, shell=True)
    csv_data_for_missing.append((code, ) + parse_help_msg(stdout))

E1137
R1703
R1704
R1707
R1702
R0123
R1701
E1138
W0235
R1706
E1139
E0118
C0205
W0111
E0402


In [47]:
assert len(csv_data_for_missing) == (len(missing_messages - whitelisted_messages) + 1)

## Write CSV file

In [48]:
import pandas as pd

In [49]:
df = pd.DataFrame(csv_data[1:] + csv_data_for_missing, columns=csv_data[0])

In [50]:
df.tail()

Unnamed: 0,error_code,error_summary,error_description
114,E1139,:invalid-metaclass (E1139): *Invalid metaclass...,Emitted whenever we can detect that a class is...
115,E0118,:used-prior-global-declaration (E0118): *Name ...,Emitted when a name is used prior a global dec...
116,C0205,:single-string-used-for-slots (C0205): *Class ...,Used when a class __slots__ is a simple string...
117,W0111,:assign-to-new-keyword (W0111): *Name %s will ...,Used when assignment will become invalid in fu...
118,E0402,:relative-beyond-top-level (E0402): *Attempted...,Used when a relative import tries to access to...


In [51]:
df.to_csv('error_list.csv', index=False)