diff --git a/fixtures/downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.log.gz b/fixtures/downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.log.gz index 113b78f..1769ac7 100644 Binary files a/fixtures/downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.log.gz and b/fixtures/downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.log.gz differ diff --git a/linehaul/ua/datastructures.py b/linehaul/ua/datastructures.py index 0003232..19f345c 100644 --- a/linehaul/ua/datastructures.py +++ b/linehaul/ua/datastructures.py @@ -19,6 +19,7 @@ class Installer: name = attr.ib(type=Optional[str], default=None) version = attr.ib(type=Optional[str], default=None) + subcommand = attr.ib(type=Optional[list[str]], default=None) @attr.s(slots=True, frozen=True) diff --git a/main.py b/main.py index 1e0f9f4..d6a3223 100644 --- a/main.py +++ b/main.py @@ -4,12 +4,15 @@ import os import json import gzip +import shlex from tempfile import NamedTemporaryFile from contextlib import ExitStack from linehaul.events.parser import parse, Download, Simple +from linehaul.ua.datastructures import Installer +from cattr.gen import make_dict_unstructure_fn, override import sentry_sdk from sentry_sdk.integrations.serverless import serverless_function from google.api_core import exceptions @@ -24,6 +27,20 @@ datetime.datetime, lambda o: o.strftime("%Y-%m-%d %H:%M:%S +00:00") ) + +def _unstructure_subcommand(subcommand: list[str] | None) -> str | None: + if subcommand is None: + return None + return shlex.join(subcommand) + + +_cattr.register_unstructure_hook( + Installer, + make_dict_unstructure_fn( + Installer, _cattr, subcommand=override(unstruct_hook=_unstructure_subcommand) + ), +) + DEFAULT_PROJECT = os.environ.get("GCP_PROJECT", "the-psf") RESULT_BUCKET = os.environ.get("RESULT_BUCKET") PUBSUB_TOPIC = os.environ.get("PUBSUB_TOPIC") diff --git a/test_functions.py b/test_functions.py index 8e8b967..3d9022b 100644 --- a/test_functions.py +++ b/test_functions.py @@ -1,7 +1,7 @@ import contextlib import datetime -from pathlib import Path from importlib import reload +from pathlib import Path import pretend import pytest @@ -17,17 +17,18 @@ [ ( "downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.log.gz", - b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl", "project": "threadpoolctl", "file": {"filename": "threadpoolctl-2.1.0-py3-none-any.whl", "project": "threadpoolctl", "version": "2.1.0", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.1.1"}, "python": "3.7.9", "implementation": {"name": "CPython", "version": "3.7.9"}, "distro": {"name": "Debian GNU/Linux", "version": "9", "id": "stretch", "libc": {"lib": "glibc", "version": "2.24"}}, "system": {"name": "Linux", "release": "4.15.0-112-generic"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.1.0l 10 Sep 2019", "setuptools_version": "47.1.0", "rustc_version": null, "ci": null}}\n' - b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/cd/f9/8fad70a3bd011a6be7c5c6067278f006a25341eb39d901fbda307e26804c/django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "file": {"filename": "django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "version": "0.7.9", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.0.2"}, "python": "3.8.5", "implementation": {"name": "CPython", "version": "3.8.5"}, "distro": {"name": "Ubuntu", "version": "16.04", "id": "xenial", "libc": {"lib": "glibc", "version": "2.23"}}, "system": {"name": "Linux", "release": "4.4.0-1113-aws"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.0.2g 1 Mar 2016", "setuptools_version": "44.1.0", "rustc_version": null, "ci": null}}\n' - b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/cd/f9/8fad70a3bd011a6be7c5c6067278f006a25341eb39d901fbda307e26804c/django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "file": {"filename": "django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "version": "0.7.9", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "pip", "version": "22.0.3"}, "python": "3.9.10", "implementation": {"name": "CPython", "version": "3.9.10"}, "distro": {"name": "macOS", "version": "12.3", "id": null, "libc": null}, "system": {"name": "Darwin", "release": "21.4.0"}, "cpu": "arm64", "openssl_version": "OpenSSL 1.1.1m 14 Dec 2021", "setuptools_version": "60.9.0", "rustc_version": "1.59.0", "ci": true}}\n', + b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl", "project": "threadpoolctl", "file": {"filename": "threadpoolctl-2.1.0-py3-none-any.whl", "project": "threadpoolctl", "version": "2.1.0", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.1.1", "subcommand": null}, "python": "3.7.9", "implementation": {"name": "CPython", "version": "3.7.9"}, "distro": {"name": "Debian GNU/Linux", "version": "9", "id": "stretch", "libc": {"lib": "glibc", "version": "2.24"}}, "system": {"name": "Linux", "release": "4.15.0-112-generic"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.1.0l 10 Sep 2019", "setuptools_version": "47.1.0", "rustc_version": null, "ci": null}}\n' + b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/cd/f9/8fad70a3bd011a6be7c5c6067278f006a25341eb39d901fbda307e26804c/django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "file": {"filename": "django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "version": "0.7.9", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.0.2", "subcommand": ""}, "python": "3.8.5", "implementation": {"name": "CPython", "version": "3.8.5"}, "distro": {"name": "Ubuntu", "version": "16.04", "id": "xenial", "libc": {"lib": "glibc", "version": "2.23"}}, "system": {"name": "Linux", "release": "4.4.0-1113-aws"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.0.2g 1 Mar 2016", "setuptools_version": "44.1.0", "rustc_version": null, "ci": null}}\n' + b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/cd/f9/8fad70a3bd011a6be7c5c6067278f006a25341eb39d901fbda307e26804c/django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "file": {"filename": "django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "version": "0.7.9", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "pip", "version": "22.0.3", "subcommand": "install \'something with a space\'"}, "python": "3.9.10", "implementation": {"name": "CPython", "version": "3.9.10"}, "distro": {"name": "macOS", "version": "12.3", "id": null, "libc": null}, "system": {"name": "Darwin", "release": "21.4.0"}, "cpu": "arm64", "openssl_version": "OpenSSL 1.1.1m 14 Dec 2021", "setuptools_version": "60.9.0", "rustc_version": "1.59.0", "ci": true}}\n' + b'{"timestamp": "2021-01-07 20:54:54 +00:00", "url": "/packages/cd/f9/8fad70a3bd011a6be7c5c6067278f006a25341eb39d901fbda307e26804c/django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "file": {"filename": "django_crum-0.7.9-py2.py3-none-any.whl", "project": "django-crum", "version": "0.7.9", "type": "bdist_wheel"}, "tls_protocol": "TLSv1.2", "tls_cipher": "ECDHE-RSA-AES128-GCM-SHA256", "country_code": "US", "details": {"installer": {"name": "uv", "version": "0.9.11", "subcommand": "pip install"}, "python": "3.9.10", "implementation": {"name": "CPython", "version": "3.9.10"}, "distro": {"name": "macOS", "version": "12.3", "id": null, "libc": null}, "system": {"name": "Darwin", "release": "21.4.0"}, "cpu": "arm64", "openssl_version": "OpenSSL 1.1.1m 14 Dec 2021", "setuptools_version": "60.9.0", "rustc_version": "1.59.0", "ci": true}}\n', b"download|Thu, 07 Jan 2021 20:54:56 GMT|US|/packages/c5/db/e56e6b4bbac7c4a06de1c50de6fe1ef3810018ae11732a50f15f62c7d050/enum34-1.1.6-py2-none-any.whl|TLSv1.2|ECDHE-RSA-AES128-GCM-SHA256|enum34|1.1.6|bdist_wheel|(null)\n", "unprocessed/20210107/downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.txt", "processed/20210107/downloads-downloads-2021-01-07-20-55-2021-01-07T20-55-00.000-B8Hs_G6d6xN61En2ypwk.json", ), ( "simple-2021-01-07-20-55-2021-01-07T20-55-00.000-3wuB00t9tqgbGLFI2fSI.log.gz", - b'{"timestamp": "2021-01-07 20:54:52 +00:00", "url": "/simple/azureml-model-management-sdk/", "project": "azureml-model-management-sdk", "tls_protocol": "TLSv1.3", "tls_cipher": "AES256-GCM", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.0.2"}, "python": "3.7.5", "implementation": {"name": "CPython", "version": "3.7.5"}, "distro": {"name": "Ubuntu", "version": "18.04", "id": "bionic", "libc": {"lib": "glibc", "version": "2.27"}}, "system": {"name": "Linux", "release": "4.15.0-1092-azure"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.1.1 11 Sep 2018", "setuptools_version": "45.2.0", "rustc_version": null, "ci": null}}\n' - b'{"timestamp": "2021-01-07 20:54:52 +00:00", "url": "/simple/pyrsistent/", "project": "pyrsistent", "tls_protocol": "TLSv1.3", "tls_cipher": "AES256-GCM", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.0.2"}, "python": "3.8.5", "implementation": {"name": "CPython", "version": "3.8.5"}, "distro": {"name": "Ubuntu", "version": "20.04", "id": "focal", "libc": {"lib": "glibc", "version": "2.31"}}, "system": {"name": "Linux", "release": "5.4.72-flatcar"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.1.1f 31 Mar 2020", "setuptools_version": "45.2.0", "rustc_version": null, "ci": true}}\n', + b'{"timestamp": "2021-01-07 20:54:52 +00:00", "url": "/simple/azureml-model-management-sdk/", "project": "azureml-model-management-sdk", "tls_protocol": "TLSv1.3", "tls_cipher": "AES256-GCM", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.0.2", "subcommand": null}, "python": "3.7.5", "implementation": {"name": "CPython", "version": "3.7.5"}, "distro": {"name": "Ubuntu", "version": "18.04", "id": "bionic", "libc": {"lib": "glibc", "version": "2.27"}}, "system": {"name": "Linux", "release": "4.15.0-1092-azure"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.1.1 11 Sep 2018", "setuptools_version": "45.2.0", "rustc_version": null, "ci": null}}\n' + b'{"timestamp": "2021-01-07 20:54:52 +00:00", "url": "/simple/pyrsistent/", "project": "pyrsistent", "tls_protocol": "TLSv1.3", "tls_cipher": "AES256-GCM", "country_code": "US", "details": {"installer": {"name": "pip", "version": "20.0.2", "subcommand": null}, "python": "3.8.5", "implementation": {"name": "CPython", "version": "3.8.5"}, "distro": {"name": "Ubuntu", "version": "20.04", "id": "focal", "libc": {"lib": "glibc", "version": "2.31"}}, "system": {"name": "Linux", "release": "5.4.72-flatcar"}, "cpu": "x86_64", "openssl_version": "OpenSSL 1.1.1f 31 Mar 2020", "setuptools_version": "45.2.0", "rustc_version": null, "ci": true}}\n', b"simple|Thu, 07 Jan 2021 20:54:52 GMT|US|/simple/numpy/|TLSv1.2|ECDHE-RSA-AES128-GCM-SHA256||||(null)\n", "unprocessed/20210107/simple-2021-01-07-20-55-2021-01-07T20-55-00.000-3wuB00t9tqgbGLFI2fSI.txt", "processed/20210107/simple-simple-2021-01-07-20-55-2021-01-07T20-55-00.000-3wuB00t9tqgbGLFI2fSI.json",