Skip to content

Commit

Permalink
Big parser refactor
Browse files Browse the repository at this point in the history
* broke the parser into a Requirement class
* refactored the unit test suite
* better handling of VCS
  • Loading branch information
davidfischer committed Aug 16, 2013
1 parent a12ffae commit a7a4910
Show file tree
Hide file tree
Showing 25 changed files with 1,991 additions and 304 deletions.
13 changes: 5 additions & 8 deletions README.rst
Expand Up @@ -18,15 +18,12 @@ Requirements parser can parse a file-like object or a text string.
::

>>> import requirements
>>> import pprint
>>> with open('requirements.txt', 'r') as f:
... for req in requirements.parse(f):
... pprint.pprint(req)
... print(req.name, req.specs, req.extras)
...
{'name': 'requirements',
'uri': 'https://github.com/davidfischer/requirements-parser.git',
'vcs': 'git'}
{'extras': [], 'name': 'Django', 'specs': [('>=', '1.5'), ('<', '1.6')]}
{'extras': [], 'name': 'numpy', 'specs': []}
{'extras': ['pdf'], 'name': 'DocParser', 'specs': []}
requirements [] []
Django [('>=', '1.5'), ('<', '1.6')] []
numpy [] []
DocParser [] ['pdf']

57 changes: 2 additions & 55 deletions requirements/parser.py
@@ -1,33 +1,6 @@
import re
import warnings
from pkg_resources import Requirement


# Compiled regular expressions

uri_regex = re.compile(r'^(svn|git|bzr|hg|http|https|file|ftp):(\.+)')
file_uri_regex = re.compile(
r'^(?P<path>[^#]+)#egg=(?P<name>[^&]+)$', re.MULTILINE)
editable_uri_regex = re.compile(r'^((?P<vcs>svn|git|bzr|hg)\+)?'
'(?P<uri>[^#&]+)#egg=(?P<name>[^&]+)$',
re.MULTILINE)
vcs_uri_regex = re.compile(r'^(?P<vcs>svn|git|bzr|hg)\+'
'(?P<uri>[^#&]+)#egg=(?P<name>[^&]+)$',
re.MULTILINE)

# Pip's pip/download.py:is_url() function doesn't check THAT closely


def is_uri(uri):
uri = uri.lower()
match = re.match(uri_regex, uri)
return match is not None


def is_vcs_uri(uri):
uri = uri.lower()
match = re.match(vcs_uri_regex, uri)
return match is not None
from .requirement import Requirement


# See pip/req.py:parse_requirements()
Expand Down Expand Up @@ -60,31 +33,5 @@ def parse(reqstr):
elif line.startswith('-Z') or line.startswith('--always-unzip'):
warnings.warn('Unused option --always-unzip. Skipping.')
continue
elif line.startswith('file:'):
match = re.match(file_uri_regex, line)
elif line.startswith('-e') or line.startswith('--editable') or \
is_uri(line) or is_vcs_uri(line):
if line.startswith('-e'):
tmpstr = line[len('-e'):].strip()
elif line.startswith('--editable'):
tmpstr = line[len('--editable'):].strip()
else:
tmpstr = line
match = re.match(editable_uri_regex, tmpstr)
else:
try:
# Handles inline comments despite not being strictly legal
req = Requirement.parse(line)
yield {
'name': req.project_name,
'extras': list(req.extras),
'specs': req.specs,
}
continue
except ValueError:
match = None

if match:
yield match.groupdict()
else:
raise ValueError('Invalid requirement line "%s"' % line)
yield Requirement.parse(line)
163 changes: 163 additions & 0 deletions requirements/requirement.py
@@ -0,0 +1,163 @@
import re
from pkg_resources import Requirement as Req

from .vcs import VCS, VCS_SCHEMES


URI_REGEX = re.compile(
r'^(?P<scheme>https?|file|ftps?)://(?P<path>[^#]+)'
r'(#egg=(?P<name>[^&]+))?$'
)

VCS_REGEX = re.compile(
r'^(?P<scheme>{0})://'.format(r'|'.join(
[scheme.replace('+', r'\+') for scheme in VCS_SCHEMES])) +
r'((?P<login>[^/@]+)@)?'
r'(?P<path>[^#@]+)'
r'(@(?P<revision>[^#]+))?'
r'(#egg=(?P<name>[^&]+))?$'
)

# This matches just about everyting
LOCAL_REGEX = re.compile(
r'^((?P<scheme>file)://)?'
r'(?P<path>[^#]+)'
r'(#egg=(?P<name>[^&]+))?$'
)


class Requirement(object):
"""
Represents a single requirement
Typically instances of this class are created with ``Requirement.parse``.
For local file requirements, there's no verification that the file
exists.
See: http://www.pip-installer.org/en/latest/logic.html
"""

def __init__(self, line):
# Do not call this private method
self.line = line
self.editable = False
self.local_file = False
self.specifier = False
self.vcs = None
self.name = None
self.uri = None
self.path = None
self.revision = None
self.extras = []
self.specs = []

def __repr__(self):
return u'<Requirement: "{0}">'.format(self.line)

def __getitem__(self, key):
return getattr(self, key)

def keys(self):
return self.__dict__.keys()

@classmethod
def parse_editable(cls, line):
"""
Parses a Requirement from an "editable" requirement which is either
a local project path or a VCS project URI.
See: pip/req.py:from_editable()
:param line: an "editable" requirement
:returns: a Requirement instance for the given line
:raises: ValueError on an invalid requirement
"""

req = cls(u'-e {0}'.format(line))
req.editable = True
vcs_match = VCS_REGEX.match(line)
local_match = LOCAL_REGEX.match(line)

if vcs_match is not None:
groups = vcs_match.groupdict()
req.uri = u'{scheme}://{path}'.format(**groups)
req.revision = groups['revision']
req.name = groups['name']
for vcs in VCS:
if req.uri.startswith(vcs):
req.vcs = vcs
else:
assert local_match is not None, 'This should match everything'
groups = local_match.groupdict()
req.local_file = True
req.name = groups['name']
req.path = groups['path']

return req

@classmethod
def parse_line(cls, line):
"""
Parses a Requirement from a non-editable requirement.
See: pip/req.py:from_line()
:param line: a "non-editable" requirement
:returns: a Requirement instance for the given line
:raises: ValueError on an invalid requirement
"""

req = cls(line)

vcs_match = VCS_REGEX.match(line)
uri_match = URI_REGEX.match(line)
local_match = LOCAL_REGEX.match(line)

if vcs_match is not None:
groups = vcs_match.groupdict()
req.uri = u'{scheme}://{path}'.format(**groups)
req.revision = groups['revision']
req.name = groups['name']
for vcs in VCS:
if req.uri.startswith(vcs):
req.vcs = vcs
elif uri_match is not None:
groups = uri_match.groupdict()
req.uri = u'{scheme}://{path}'.format(**groups)
req.name = groups['name']
if groups['scheme'] == 'file':
req.local_file = True
elif u'#egg=' in line:
# Assume a local file match
assert local_match is not None, 'This should match everything'
groups = local_match.groupdict()
req.local_file = True
req.name = groups['name']
req.path = groups['path']
else:
# This is a requirement specifier.
# Delegate to pkg_resources and hope for the best
req.specifier = True
pkg_req = Req.parse(line)
req.name = pkg_req.project_name
req.extras = list(pkg_req.extras)
req.specs = pkg_req.specs
return req

@classmethod
def parse(cls, line):
"""
Parses a Requirement from a line of a requirement file.
:param line: a line of a requirement file
:returns: a Requirement instance for the given line
:raises: ValueError on an invalid requirement
"""

if line.startswith('-e') or line.startswith('--editable'):
# Editable installs are either a local project path
# or a VCS project URI
return cls.parse_editable(
re.sub(r'^(-e|--editable=?)\s*', '', line))

return cls.parse_line(line)
28 changes: 28 additions & 0 deletions requirements/vcs.py
@@ -0,0 +1,28 @@
VCS = [
'git',
'hg',
'svn',
'bzr',
]

VCS_SCHEMES = [
'git',
'git+https',
'git+ssh',
'git+git',
'hg+http',
'hg+https',
'hg+static-http',
'hg+ssh',
'svn',
'svn+svn',
'svn+http',
'svn+https',
'svn+ssh',
'bzr+http',
'bzr+https',
'bzr+ssh',
'bzr+sftp',
'bzr+ftp',
'bzr+lp',
]
12 changes: 10 additions & 2 deletions tests/reqfiles/comment_2.expected
@@ -1,12 +1,20 @@
[
{
"extras": [],
"specifier": true,
"local_file": false,
"name": "req",
"editable": false,
"uri": null,
"extras": [],
"vcs": null,
"path": null,
"line": "req==1.0 # comment",
"specs": [
[
"==",
"1.0"
]
]
],
"revision": null
}
]

0 comments on commit a7a4910

Please sign in to comment.