Core refactoring and cleanup.

niksite · Nov 25, 2018 · d11eab3 · d11eab3
1 parent e144500
commit d11eab3
Show file tree

Hide file tree

Showing 25 changed files with 752 additions and 360 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,38 +1,9 @@
-*.py[cod]
-
-# C extensions
-*.so
-
-# Packages
-*.egg
-*.eggs
-*.egg-info
-dist
-build
-eggs
-parts
-bin
-var
-sdist
-develop-eggs
-.installed.cfg
-lib
-lib64
-
-# Installer logs
-pip-log.txt
-
-# Unit test / coverage reports
 .coverage
+.*cache
 .tox
-nosetests.xml
-
-# Translations
-*.mo
-
-# Mr Developer
-.mr.developer.cfg
-.project
-.pydevproject
 .vscode
-.cache
+dist
+*.lock
+__pycache__
+*.pyc
+*.egg-info
diff --git a/.travis.yml b/.travis.yml
@@ -1,10 +1,11 @@
 language: python
+sudo: required
+dist: xenial
 python:
-    - "3.6"
+  - "2.7"
+  - "3.7"
 install:
-    - "pip install coverage"
-    - "pip install coveralls"
-script:
-    - "coverage run --source=url_normalize setup.py test"
-after_success:
-    coveralls
+  - "pip install coveralls poetry"
+  - "poetry install -v"
+script: "pytest"
+after_success: coveralls
diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+tox:
+	@tox
+
+test:
+	@py.test
+
+build:
+	@poetry build
+
+publish:
+	@poetry publish
diff --git a/README.md b/README.md
@@ -5,21 +5,23 @@ url-normalize
 [![Coverage Status](https://coveralls.io/repos/github/niksite/url-normalize/badge.svg?branch=master)](https://coveralls.io/github/niksite/url-normalize?branch=master)
 
 URI Normalization function:
-   * Take care of IDN domains.
-   * Always provide the URI scheme in lowercase characters.
-   * Always provide the host, if any, in lowercase characters.
-   * Only perform percent-encoding where it is essential.
-   * Always use uppercase A-through-F characters when percent-encoding.
-   * Prevent dot-segments appearing in non-relative URI paths.
-   * For schemes that define a default authority, use an empty authority if the default is desired.
-   * For schemes that define an empty path to be equivalent to a path of "/", use "/".
-   * For schemes that define a port, use an empty port if the default is desired
-   * All portions of the URI must be utf-8 encoded NFC from Unicode strings
+
+* Take care of IDN domains.
+* Always provide the URI scheme in lowercase characters.
+* Always provide the host, if any, in lowercase characters.
+* Only perform percent-encoding where it is essential.
+* Always use uppercase A-through-F characters when percent-encoding.
+* Prevent dot-segments appearing in non-relative URI paths.
+* For schemes that define a default authority, use an empty authority if the default is desired.
+* For schemes that define an empty path to be equivalent to a path of "/", use "/".
+* For schemes that define a port, use an empty port if the default is desired
+* All portions of the URI must be utf-8 encoded NFC from Unicode strings
 
 Inspired by Sam Ruby's urlnorm.py: http://intertwingly.net/blog/2004/08/04/Urlnorm
 
 Example:
-```
+
+```sh
 $ pip install url-normalize
 Collecting url-normalize
 ...
@@ -30,17 +32,20 @@ Python 3.6.1 (default, Jul  8 2017, 05:00:20)
 Type "help", "copyright", "credits" or "license" for more information.
 > from url_normalize import url_normalize
 > print(url_normalize('www.foo.com:80/foo'))
-> http://www.foo.com/foo
+> https://www.foo.com/foo
 ```
 
 History:
-   * 07 Jul 2017: Python 2/3 compatibility.
-   * 05 Jan 2016: Python 3 compatibility
-   * 29 Dec 2015: PEP8, setup.py
-   * 10 Mar 2010: support for shebang (#!) urls
-   * 28 Feb 2010: using 'http' schema by default when appropriate
-   * 28 Feb 2010: added handling of IDN domains
-   * 28 Feb 2010: code pep8-zation
-   * 27 Feb 2010: forked from Sam Ruby's urlnorm.py
+
+* 1.4.0: A bit of code refactoring and cleanup
+* 1.3.2: Support empty string and double slash urls (//domain.tld)
+* 1.3.1: Same code support both Python 3 and Python 2.
+* 1.3: Python 3 compatibility
+* 1.2: PEP8, setup.py
+* 1.1.2: support for shebang (#!) urls
+* 1.1.1: using 'http' schema by default when appropriate
+* 1.1: added handling of IDN domains
+* 1.0: code pep8-zation
+* 0.1: forked from Sam Ruby's urlnorm.py
 
 License: "Python" (PSF) License
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.poetry]
+name = "url-normalize"
+version = "1.4.0"
+description = "URL normalization for Python"
+authors = ["Nikolay Panov <github@npanov.com>"]
+license = "PSF"
+readme = "README.md"
+repository = "https://github.com/niksite/url-normalize"
+homepage = "https://github.com/niksite/url-normalize"
+keywords = ['url', 'normalization', 'normalize']
+
+[tool.poetry.dependencies]
+python = "~2.7 || ^3.6"
+six = "^1.11"
+
+[tool.poetry.dev-dependencies]
+pytest = "^3.0"
+pytest-cov = "^2.6"
+tox = "^3.5"
+pytest-flakes = "^4.0"
+pytest-socket = "^0.3.1"
+
+[build-system]
+requires = ["poetry>=0.12"]
+build-backend = "poetry.masonry.api"
diff --git a/setup.cfg b/setup.cfg
diff --git a/setup.py b/setup.py
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_deconstruct_url.py b/tests/test_deconstruct_url.py
@@ -0,0 +1,32 @@
+"""Deconstruct url tests."""
+from url_normalize.tools import deconstruct_url, URL
+
+EXPECTED_DATA = {
+    "http://site.com": URL(
+        fragment="",
+        host="site.com",
+        path="",
+        port="",
+        query="",
+        scheme="http",
+        userinfo="",
+    ),
+    "http://user@www.example.com:8080/path/index.html?param=val#fragment": URL(
+        fragment="fragment",
+        host="www.example.com",
+        path="/path/index.html",
+        port="8080",
+        query="param=val",
+        scheme="http",
+        userinfo="user@",
+    ),
+}
+
+
+def test_deconstruct_url_result_is_expected():
+    """Assert we got expected results from the deconstruct_url function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = deconstruct_url(url)
+
+        assert result == expected, url
diff --git a/tests/test_generic_url_cleanup.py b/tests/test_generic_url_cleanup.py
@@ -0,0 +1,20 @@
+"""Tests for generic_url_cleanup function."""
+from url_normalize.url_normalize import generic_url_cleanup
+
+EXPECTED_DATA = {
+    "//site/#!fragment": "//site/?_escaped_fragment_=fragment",
+    "//site/?utm_source=some source&param=value": "//site/?param=value",
+    "//site/?utm_source=some source": "//site/",
+    "//site/?param=value&utm_source=some source": "//site/?param=value",
+    "//site/page": "//site/page",
+    "//site/?& ": "//site/",
+}
+
+
+def test_generic_url_cleanup_result_is_expected():
+    """Assert we got expected results from the generic_url_cleanup function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = generic_url_cleanup(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_fragment.py b/tests/test_normalize_fragment.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+"""Tests for normalize_fragment function."""
+from url_normalize.url_normalize import normalize_fragment
+
+EXPECTED_DATA = {
+    "": "",
+    "fragment": "fragment",
+    "пример": "%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80",
+    "!fragment": "%21fragment",
+    "~fragment": "~fragment",
+}
+
+
+def test_normalize_fragment_result_is_expected():
+    """Assert we got expected results from the normalize_fragment function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_fragment(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_host.py b/tests/test_normalize_host.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+"""Tests for normalize_host function."""
+from url_normalize.url_normalize import normalize_host
+
+EXPECTED_DATA = {
+    "site.com": "site.com",
+    "SITE.COM": "site.com",
+    "site.com.": "site.com",
+    "пример.испытание": "xn--e1afmkfd.xn--80akhbyknj4f",
+}
+
+
+def test_normalize_host_result_is_expected():
+    """Assert we got expected results from the normalize_host function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_host(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_path.py b/tests/test_normalize_path.py
@@ -0,0 +1,39 @@
+"""Tests for normalize_path function."""
+from url_normalize.url_normalize import normalize_path
+
+EXPECTED_DATA = {
+    "": "/",
+    "/": "/",
+    "..": "/",
+    "/foo/bar/.": "/foo/bar/",
+    "/foo/bar/./": "/foo/bar/",
+    "/foo/bar/..": "/foo/",
+    "/foo/bar/../": "/foo/",
+    "/foo/bar/../baz": "/foo/baz",
+    "/foo/bar/../..": "/",
+    "/foo/bar/../../": "/",
+    "/foo/bar/../../baz": "/baz",
+    "/foo/bar/../../../baz": "/baz",
+    "/foo/bar/../../../../baz": "/baz",
+    "/./foo": "/foo",
+    "/../foo": "/foo",
+    "/foo.": "/foo.",
+    "/.foo": "/.foo",
+    "/foo..": "/foo..",
+    "/..foo": "/..foo",
+    "/./../foo": "/foo",
+    "/./foo/.": "/foo/",
+    "/foo/./bar": "/foo/bar",
+    "/foo/../bar": "/bar",
+    "/foo//": "/foo/",
+    "/foo///bar//": "/foo/bar/",
+}
+
+
+def test_normalize_host_result_is_expected():
+    """Assert we got expected results from the normalize_path function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_path(url, "http")
+
+        assert result == expected, url
diff --git a/tests/test_normalize_port.py b/tests/test_normalize_port.py
@@ -0,0 +1,13 @@
+"""Tests for normalize_port function."""
+from url_normalize.url_normalize import normalize_port
+
+EXPECTED_DATA = {"8080": "8080", "": "", "80": "", "string": "string"}
+
+
+def test_normalize_port_result_is_expected():
+    """Assert we got expected results from the normalize_port function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_port(url, "http")
+
+        assert result == expected, url
diff --git a/tests/test_normalize_query.py b/tests/test_normalize_query.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+"""Tests for normalize_query function."""
+
+from url_normalize.url_normalize import normalize_query
+
+EXPECTED_DATA = {
+    "": "",
+    "param1=val1&param2=val2": "param1=val1&param2=val2",
+    "Ç=Ç": "%C3%87=%C3%87",
+    "%C3%87=%C3%87": "%C3%87=%C3%87",
+    "q=C%CC%A7": "q=%C3%87",
+}
+
+
+def test_normalize_query_result_is_expected():
+    """Assert we got expected results from the normalize_query function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_query(url)
+
+        assert result == expected, url
diff --git a/tests/test_normalize_scheme.py b/tests/test_normalize_scheme.py
@@ -0,0 +1,13 @@
+"""Tests for normalize_scheme function."""
+from url_normalize.url_normalize import normalize_scheme
+
+EXPECTED_DATA = {"http": "http", "HTTP": "http"}
+
+
+def test_normalize_scheme_result_is_expected():
+    """Assert we got expected results from the normalize_scheme function."""
+    for url, expected in EXPECTED_DATA.items():
+
+        result = normalize_scheme(url)
+
+        assert result == expected, url