In [1]:
import re
import subprocess

## Pylint messages

In [2]:
output = subprocess.check_output(
    "pylint --list-msgs | grep '^:' | awk '{print $2}' | cut -c 2-6", 
    shell=True, universal_newlines=True)
pylint_messages = output.split()

In [3]:
len(pylint_messages)

207

## Ignored (.pylinrc) messages

In [4]:
import configparser
config = configparser.ConfigParser()
config.read('../pyta/python_ta/.pylintrc')
pylintrc_messages = [m.strip(",") for m in config['MESSAGES CONTROL']['disable'].split()]

In [5]:
len(pylintrc_messages)

85

## Covered messages

In [6]:
LINK_RE = re.compile('.*\{#([A-Z][0-9]+)\}.*')  # looks for '{#E0601}'
HEADER_RE = re.compile('### ([A-Z][0-9]+):.*')  # looks for '### E0601'

In [7]:
link_messages = []
header_messages = []

with open('../website/index.md') as ifh:
    for line in ifh:
        link_messages += LINK_RE.findall(line)
        header_messages += HEADER_RE.findall(line)

In [8]:
assert link_messages == header_messages

In [9]:
covered_messages = link_messages

In [10]:
len(covered_messages)

92

## Missing messages

In [11]:
# Messages produced by pylint that are not covered by .pylintrc or index.md
missing_messages = set(pylint_messages) - set(pylintrc_messages) - set(covered_messages)

## Sanity checks

In [12]:
# Messages in index.md but not in pylint
set(covered_messages) - set(pylint_messages)

{'E9991', 'E9996', 'E9998', 'E9999'}

In [13]:
# Messages in index.md and in .pylintrc
set(covered_messages) & set(pylintrc_messages)

{'E0202', 'E0241', 'E0704', 'W0211'}

## Generate CSV file for existing errors

In [14]:
HEADER_RE = re.compile('### ([A-Z][0-9]+): ([a-zA-Z0-9 \-\_\`]+) \{')

In [15]:
HEADER_RE.findall("### E0601: Used before assignment {#E0601}")

[('E0601', 'Used before assignment')]

In [16]:
csv_data = [("error_code", "error_summary", "error_description")]

with open('../website/index.md') as ifh:
    for line in ifh:
        if line.startswith('## '):
            csv_data.append((line[3:].strip(), None, None,))
        elif line.startswith('### '):
            csv_data.append(tuple(HEADER_RE.findall(line)[0]) + (None, ))

In [17]:
csv_data[:10]

[('error_code', 'error_summary', 'error_description'),
 ('Improper Python usage', None, None),
 ('E0601', 'Used before assignment', None),
 ('E0602', 'Undefined variable', None),
 ('W0631', 'Undefined loop variable', None),
 ('E0103', 'Not in loop', None),
 ('E0104', 'Return outside function', None),
 ('W0101', 'Unreachable', None),
 ('W0109', 'Duplicate key', None),
 ('Type errors', None, None)]

## Generate CSV file for missing errors

In [18]:
def parse_help_msg(help_msg):
    data = help_msg.split('\n')
    name = data[0].strip()
    description = ' '.join(l.strip() for l in data[1:]).strip()
    return name, description

In [19]:
parse_help_msg("""\
:method-check-failed (F0202): *Unable to check methods signature (%s / %s)*
  Used when Pylint has been unable to check methods signature compatibility for
  an unexpected reason. Please report this kind if you don't make sense of it.
  This message belongs to the classes checker.

""")

(':method-check-failed (F0202): *Unable to check methods signature (%s / %s)*',
 "Used when Pylint has been unable to check methods signature compatibility for an unexpected reason. Please report this kind if you don't make sense of it. This message belongs to the classes checker.")

In [20]:
csv_data_for_missing = [("Missing", None, None)]

for code in missing_messages:
    print(code)
    stdout = subprocess.check_output("pylint --help-msg={}".format(code), universal_newlines=True, shell=True)
    csv_data_for_missing.append((code, ) + parse_help_msg(stdout))

C0305
F0001
I0001
W0106
W0301
E0236
I0021
E0110
F0010
E0238
C0201
E0107
W0221
C0204
E0303
C0321
C0301
E0703
R0801
C0203
C0326
E1133
I0013
R0203
W0223
E1134
R0202
W1202
I0012
I0022
C0302
C0304
I0011
F0002
C0202
I0010
E0012
I0020
C0325
F0202
E0237
W0312
W0311
E0301
E0011
E0240
E1003
C0330
E1123


In [21]:
assert len(csv_data_for_missing) == (len(missing_messages) + 1)

## Write CSV file

In [22]:
import pandas as pd

In [23]:
df = pd.DataFrame(csv_data[1:] + csv_data_for_missing, columns=csv_data[0])

In [25]:
df.tail()

Unnamed: 0,error_code,error_summary,error_description
147,E0011,:unrecognized-inline-option (E0011): *Unrecogn...,Used when an unknown inline option is encounte...
148,E0240,:inconsistent-mro (E0240): *Inconsistent metho...,Used when a class has an inconsistent method r...
149,E1003,:bad-super-call (E1003): *Bad first argument %...,Used when another argument than the current cl...
150,C0330,:bad-continuation (C0330): *Wrong %s indentati...,TODO This message belongs to the format checker.
151,E1123,:unexpected-keyword-arg (E1123): *Unexpected k...,Used when a function call passes a keyword arg...


In [26]:
df.to_csv('error_list.csv', index=False)