Skip to content

Commit

Permalink
maint: improves domain module
Browse files Browse the repository at this point in the history
- Uses type hints, improve docs
- Regards [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034) and [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782)
- Updates corresponding test functions

**Related items**

*Issues*

- Closes #52
- Closes #74
- Closes #81
- Closes #89
- Closes #95
- Closes #120
- Closes #124
- Closes #141
- Closes #143
- Closes #199
- Closes #204

*PRs*

- Closes #114
- Closes #179
  • Loading branch information
yozachar committed Mar 2, 2023
1 parent 27eed7b commit 7489857
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 75 deletions.
82 changes: 48 additions & 34 deletions tests/test_domain.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,56 @@
"""Test Domain."""
# -*- coding: utf-8 -*-

# standard
import pytest

# local
from validators import domain, ValidationFailure


@pytest.mark.parametrize('value', [
'example.com',
'xn----gtbspbbmkef.xn--p1ai',
'underscore_subdomain.example.com',
'something.versicherung',
'someThing.versicherung',
'11.com',
'3.cn',
'a.cn',
'sub1.sub2.sample.co.uk',
'somerandomexample.xn--fiqs8s',
'kräuter.com',
'über.com'
])
def test_returns_true_on_valid_domain(value):
assert domain(value)
@pytest.mark.parametrize(
("value", "rfc_1034", "rfc_2782"),
[
("example.com", False, False),
("xn----gtbspbbmkef.xn--p1ai", False, False),
("underscore_subdomain.example.com", False, False),
("something.versicherung", False, False),
("someThing.versicherung.", True, False),
("11.com", False, False),
("3.cn.", True, False),
("_example.com", False, True),
("a.cn", False, False),
("sub1.sub2.sample.co.uk", False, False),
("somerandomexample.xn--fiqs8s", False, False),
("kräuter.com.", True, False),
("über.com", False, False),
],
)
def test_returns_true_on_valid_domain(value: str, rfc_1034: bool, rfc_2782: bool):
"""Test returns true on valid domain."""
assert domain(value, rfc_1034=rfc_1034, rfc_2782=rfc_2782)


@pytest.mark.parametrize('value', [
'example.com/',
'example.com:4444',
'example.-com',
'example.',
'-example.com',
'example-.com',
'_example.com',
'example_.com',
'example',
'a......b.com',
'a.123',
'123.123',
'123.123.123',
'123.123.123.123'
])
def test_returns_failed_validation_on_invalid_domain(value):
assert isinstance(domain(value), ValidationFailure)
@pytest.mark.parametrize(
("value", "rfc_1034", "rfc_2782"),
[
("example.com/.", True, False),
("example.com:4444", False, False),
("example.-com", False, False),
("example.", False, False),
("-example.com", False, False),
("example-.com.", True, False),
("_example.com", False, False),
("_example._com", False, False),
("example_.com", False, False),
("example", False, False),
("a......b.com", False, False),
("a.123", False, False),
("123.123", False, False),
("123.123.123.", True, False),
("123.123.123.123", False, False),
],
)
def test_returns_failed_validation_on_invalid_domain(value: str, rfc_1034: bool, rfc_2782: bool):
"""Test returns failed validation on invalid domain."""
assert isinstance(domain(value, rfc_1034=rfc_1034, rfc_2782=rfc_2782), ValidationFailure)
86 changes: 45 additions & 41 deletions validators/domain.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,58 @@
"""Domain."""

# standard
import re

# local
from .utils import validator

pattern = re.compile(
r'^(?:[a-zA-Z0-9]' # First character of the domain
r'(?:[a-zA-Z0-9-_]{0,61}[A-Za-z0-9])?\.)' # Sub domain + hostname
r'+[A-Za-z0-9][A-Za-z0-9-_]{0,61}' # First 61 characters of the gTLD
r'[A-Za-z]$' # Last character of the gTLD
)


def to_unicode(obj, charset='utf-8', errors='strict'):
if obj is None:
return None
if not isinstance(obj, bytes):
return str(obj)
return obj.decode(charset, errors)


@validator
def domain(value):
"""
Return whether or not given value is a valid domain.
If the value is valid domain name this function returns ``True``, otherwise
:class:`~validators.utils.ValidationFailure`.
Examples::
def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False):
"""Return whether or not given value is a valid domain.
Examples:
>>> domain('example.com')
True
# Output: True
>>> domain('example.com/')
ValidationFailure(func=domain, ...)
Supports IDN domains as well::
# Output: ValidationFailure(func=domain, ...)
>>> # Supports IDN domains as well::
>>> domain('xn----gtbspbbmkef.xn--p1ai')
True
.. versionadded:: 0.9
.. versionchanged:: 0.10
Added support for internationalized domain name (IDN) validation.
:param value: domain string to validate
# Output: True
Args:
value:
Domain string to validate.
rfc_1034:
Allow trailing dot in domain name.
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
rfc_2782:
Domain name is of type service record.
Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
Returns:
(Literal[True]):
If `value` is a valid domain name.
(ValidationFailure):
If `value` is an invalid domain name.
Note:
- *In version 0.10.0*:
- Added support for internationalized domain name (IDN) validation.
> *New in version 0.9.0*.
"""
try:
return pattern.match(to_unicode(value).encode('idna').decode('ascii'))
except (UnicodeError, AttributeError):
return not re.search(r"\s", value) and re.compile(

This comment has been minimized.

Copy link
@kvesteri

kvesteri Mar 3, 2023

Collaborator

Why not compile the regex outside of this function so that it doesn't need to be compiled everytime this function is called?

This comment has been minimized.

Copy link
@yozachar

yozachar Mar 3, 2023

Author Collaborator

Please ref: #232 (comment)

# First character of the domain
rf"^(?:[a-zA-Z0-9{'_'if rfc_2782 else ''}]"
# Sub domain + hostname
+ r"(?:[a-zA-Z0-9-_]{0,61}[A-Za-z0-9])?\.)"
# First 61 characters of the gTLD
+ r"+[A-Za-z0-9][A-Za-z0-9-_]{0,61}"
# Last character of the gTLD
+ rf"[A-Za-z]{r'.$' if rfc_1034 else r'$'}"
).match(value.encode("idna").decode("ascii"))
except UnicodeError:
return False

0 comments on commit 7489857

Please sign in to comment.