-
Notifications
You must be signed in to change notification settings - Fork 171
/
autodetect.py
145 lines (113 loc) · 4.68 KB
/
autodetect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import codecs
import warnings
import os
import re
from requirements_detector import find_requirements
from requirements_detector.detect import RequirementsNotFound
POSSIBLE_LIBRARIES = ('django', 'celery')
# see http://docs.python.org/2/reference/lexical_analysis.html#identifiers
_FROM_IMPORT_REGEX = re.compile(r'^\s*from ([\._a-zA-Z0-9]+) import .*$')
_IMPORT_REGEX = re.compile(r'^\s*import ([\._a-zA-Z0-9]+)$')
_IMPORT_MULTIPLE_REGEX = re.compile(r'^\s*import ([\._a-zA-Z0-9]+(, ){1})+')
_CODING_REGEX = re.compile(r'coding[:=]\s*([-\w.]+)')
def detect_by_bom(path):
"""
Determine and return file encoding using BOM character.
This is necessary for files such as setuptools' tests/script-with-bom.py
:returns bom-detected encoding only if discovered
"""
with open(path, 'rb') as fin:
raw = fin.read(4)
for enc, boms in (
('utf-8-sig', (codecs.BOM_UTF8,)),
('utf-16', (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE)),
('utf-32', (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE))):
if any(raw.startswith(bom) for bom in boms):
return enc
def determine_pyfile_encoding(path, default='utf8'):
"""
Determine and return file encoding of a python source file.
https://www.python.org/dev/peps/pep-0263/
:returns: file encoding if determined, otherwise ``default``.
"""
encoding = detect_by_bom(path)
if encoding:
# trust the byte-order mark
return encoding
with open(path, 'rb') as fip:
# look for string of form '# coding: <encoding>'
# in the first two lines of python source file.
for line in (_line for _line in
(fip.readline(), fip.readline())
if _line.decode(default).startswith('#')):
match = next(re.finditer(_CODING_REGEX, line.decode('ascii')), None)
if match:
return match.group(1)
return default
def find_from_imports(file_contents):
names = set()
for line in file_contents.split('\n'):
match = _IMPORT_MULTIPLE_REGEX.match(line)
if match:
import_names = []
first = match.group(1)
import_names.append(first[:-2])
for name in line.split(first)[1].split(','):
import_names.append(name.strip())
else:
match = _IMPORT_REGEX.match(line) or _FROM_IMPORT_REGEX.match(line)
if match is None:
continue
import_names = match.group(1).split('.')
for import_name in import_names:
if import_name in POSSIBLE_LIBRARIES:
names.add(import_name)
return names
def find_from_path(path):
names = set()
max_possible = len(POSSIBLE_LIBRARIES)
for item in os.listdir(path):
item_path = os.path.abspath(os.path.join(path, item))
if os.path.isdir(item_path):
names |= find_from_path(item_path)
elif not os.path.islink(item_path) and item_path.endswith('.py'):
try:
encoding = determine_pyfile_encoding(item_path, default='utf8')
with codecs.open(item_path, encoding=encoding) as fip:
names |= find_from_imports(fip.read())
except UnicodeDecodeError as err:
# this warning is issued: (1) in determine_pyfile_encoding for
# badly authored files (contains non-utf8 in a comment line), or
# in find_from_imports() for either (2) a coding is specified,
# but wrong and (3) no coding is specified, and the default
# 'utf8' fails to decode.
warnings.warn('{0}: {1}'.format(item_path, err), ImportWarning)
except LookupError as err:
# an invalid 'coding' statement specified in target source file
warnings.warn('{0}: {1}'.format(item_path, err), ImportWarning)
if len(names) == max_possible:
# don't continue on recursing, there's no point!
break
return names
def find_from_requirements(path):
reqs = find_requirements(path)
names = []
for requirement in reqs:
if (requirement.name is not None
and requirement.name.lower() in POSSIBLE_LIBRARIES):
names.append(requirement.name.lower())
return names
def autodetect_libraries(path):
if os.path.isfile(path):
path = os.path.dirname(path)
if path == '':
path = '.'
libraries = []
try:
libraries = find_from_requirements(path)
# pylint: disable=W0704
except RequirementsNotFound:
pass
if len(libraries) < len(POSSIBLE_LIBRARIES):
libraries = find_from_path(path)
return libraries