Skip to content

Commit

Permalink
Improve the support for SourgeForge download URLs #26
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <tdruez@nexb.com>
  • Loading branch information
tdruez committed Jan 4, 2024
1 parent 01cf3b0 commit fa7c37d
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 26 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -6,6 +6,9 @@ Release notes
- Improve the stability of the "Check for new Package versions" feature.
https://github.com/nexB/dejacode/issues/17

- Improve the support for SourgeForge download URLs.
https://github.com/nexB/dejacode/issues/26

### Version 5.0.0

Initial release.
5 changes: 4 additions & 1 deletion component_catalog/tests/test_admin.py
Expand Up @@ -2465,7 +2465,10 @@ def test_package_changeform_save_and_collect_data_on_addition(self, mock_get):
}

mock_get.return_value = mock.Mock(
content=b"\x00", headers={"content-length": 1}, status_code=200
content=b"\x00",
headers={"content-length": 1},
status_code=200,
url="http://domain.com/a.zip",
)

response = self.client.post(add_url, data, follow=True)
Expand Down
15 changes: 9 additions & 6 deletions component_catalog/tests/test_models.py
Expand Up @@ -2213,7 +2213,7 @@ def test_collect_package_data(self, mock_get):
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data("ftp://ftp.denx.de/pub/u-boot/u-boot-2017.11.tar.bz2")

package_url = "http://domain.com/a.zip;<params>?<query>#<fragment>"
download_url = "http://domain.com/a.zip;<params>?<query>#<fragment>"

default_max_length = download.CONTENT_MAX_LENGTH
download.CONTENT_MAX_LENGTH = 0
Expand All @@ -2223,11 +2223,14 @@ def test_collect_package_data(self, mock_get):
content=b"\x00", headers={"content-length": 300000000}, status_code=200
)
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data(package_url)
collect_package_data(download_url)

download.CONTENT_MAX_LENGTH = default_max_length
mock_get.return_value = mock.Mock(
content=b"\x00", headers={"content-length": 1}, status_code=200
content=b"\x00",
headers={"content-length": 1},
status_code=200,
url=download_url,
)
expected_data = {
"download_url": "http://domain.com/a.zip;<params>?<query>#<fragment>",
Expand All @@ -2241,7 +2244,7 @@ def test_collect_package_data(self, mock_get):
"4a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee"
),
}
self.assertEqual(expected_data, collect_package_data(package_url))
self.assertEqual(expected_data, collect_package_data(download_url))

expected_message = (
"Exception Value: HTTPConnectionPool"
Expand All @@ -2253,7 +2256,7 @@ def test_collect_package_data(self, mock_get):
)
mock_get.return_value = response
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data(package_url)
collect_package_data(download_url)

headers = {
"content-length": 1,
Expand All @@ -2272,7 +2275,7 @@ def test_collect_package_data(self, mock_get):
"4a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee"
),
}
self.assertEqual(expected_data, collect_package_data(package_url))
self.assertEqual(expected_data, collect_package_data(download_url))

def test_package_create_save_set_usage_policy_from_license(self):
from policy.models import AssociatedPolicy
Expand Down
11 changes: 7 additions & 4 deletions dejacode_toolkit/download.py
Expand Up @@ -7,8 +7,8 @@
#

import cgi
import os
import socket
from pathlib import Path
from urllib.parse import urlparse

from django.template.defaultfilters import filesizeformat
Expand All @@ -29,7 +29,7 @@ class DataCollectionException(Exception):

def collect_package_data(url):
try:
response = requests.get(url, timeout=10, stream=True)
response = requests.get(url, timeout=5, stream=True)
except (requests.RequestException, socket.timeout) as e:
raise DataCollectionException(e)

Expand All @@ -54,8 +54,11 @@ def collect_package_data(url):
)

content_disposition = response.headers.get("content-disposition", "")
value, params = cgi.parse_header(content_disposition)
filename = params.get("filename") or os.path.basename(urlparse(url).path)
_, params = cgi.parse_header(content_disposition)

# Using ``response.url`` in place of provided ``url`` arg since the former
# will be more accurate in case of HTTP redirect.
filename = params.get("filename") or Path(urlparse(response.url).path).name

package_data = {
"download_url": url,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -155,7 +155,7 @@ install_requires =
openpyxl==3.1.2
et-xmlfile==1.1.0
# PackageURL
packageurl-python==0.12.0
packageurl-python==0.13.2
# Gunicorn
gunicorn==21.2.0
# SPDX validation
Expand Down
Binary file not shown.
14 changes: 0 additions & 14 deletions thirdparty/dist/packageurl_python-0.12.0-py3-none-any.whl.ABOUT

This file was deleted.

Binary file not shown.
12 changes: 12 additions & 0 deletions thirdparty/dist/packageurl_python-0.13.2-py3-none-any.whl.ABOUT
@@ -0,0 +1,12 @@
about_resource: packageurl_python-0.13.2-py3-none-any.whl
name: packageurl-python
version: 0.13.2
download_url: https://files.pythonhosted.org/packages/3a/61/719f92219f67025f31b1144f2f49532e0e550d057abefdddd09a3c73d98a/packageurl_python-0.13.2-py3-none-any.whl
package_url: pkg:pypi/packageurl-python@0.13.2
license_expression: mit
copyright: Copyright packageurl-python project contributors
attribute: yes
licenses:
- key: mit
name: MIT License
file: mit.LICENSE

0 comments on commit fa7c37d

Please sign in to comment.