diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml index 6ef55bc..35b4f08 100644 --- a/.github/workflows/daily.yaml +++ b/.github/workflows/daily.yaml @@ -1,6 +1,7 @@ name: Daily Collection on: + workflow_dispatch: schedule: - cron: '0 0 * * *' @@ -8,11 +9,11 @@ jobs: collect: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: '3.13' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/dryrun.yaml b/.github/workflows/dryrun.yaml index 3463660..6953831 100644 --- a/.github/workflows/dryrun.yaml +++ b/.github/workflows/dryrun.yaml @@ -9,11 +9,11 @@ jobs: collect: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: '3.13' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..e84f94b --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,23 @@ +name: Style Checks + +on: + push: + pull_request: + types: [opened, reopened] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install invoke .[dev] + - name: Run lint checks + run: invoke lint + diff --git a/.github/workflows/manual.yaml b/.github/workflows/manual.yaml index f6b2d0e..6d9b8d2 100644 --- a/.github/workflows/manual.yaml +++ b/.github/workflows/manual.yaml @@ -24,11 +24,11 @@ jobs: collect: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: '3.13' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/Makefile b/Makefile index 461cf0f..f3672fb 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ install: clean-build clean-pyc ## install the package to the active Python's sit .PHONY: install-develop install-develop: clean-build clean-pyc ## install the package in editable mode and dependencies for development - pip install -r dev-requirements.txt -e . + pip install -e .[dev] # LINT TARGETS @@ -67,3 +67,7 @@ install-develop: clean-build clean-pyc ## install the package in editable mode a .PHONY: lint lint: ## check style with flake8 and isort invoke lint + +.PHONY: fix-lint +fix-lint: + invoke fix-lint diff --git a/dev-requirements.txt b/dev-requirements.txt deleted file mode 100644 index fa7827c..0000000 --- a/dev-requirements.txt +++ /dev/null @@ -1,26 +0,0 @@ -invoke -pytest -pycodestyle<2.8.0>=2.7.0 -pyflakes<2.4.0>=2.3.0 -flake8>=3.7.7<4 -flake8-absolute-import>=1.0<2 -flake8-builtins>=1.5.3<1.6 -flake8-comprehensions>=3.6.1<3.7 -flake8-debugger>=4.0.0<4.1 -flake8-docstrings>=1.5.0<2 -flake8-mock>=0.3<0.4 -flake8-variables-names>=0.0.4<0.1 -dlint>=0.11.0<0.12 -flake8-fixme>=1.1.1<1.2 -flake8-eradicate>=1.1.0<1.2 -flake8-mutable>=1.2.0<1.3 -flake8-print>=4.0.0<4.1 -isort>=4.3.4<5 -pylint>=2.5.3<3 -pandas-vet>=0.2.2<0.3 -flake8-pytest-style>=1.5.0<2 -flake8-quotes>=3.3.0<4 -flake8-expression-complexity>=0.0.9<0.1 -pep8-naming>=0.12.1<0.13 -pydocstyle>=6.1.1<6.2 -flake8-sfs>=0.0.3<0.1 diff --git a/download_analytics/__main__.py b/download_analytics/__main__.py index 3fc15d2..312184c 100644 --- a/download_analytics/__main__.py +++ b/download_analytics/__main__.py @@ -71,15 +71,21 @@ def _valid_date(arg): def _get_parser(): # Logging logging_args = argparse.ArgumentParser(add_help=False) - logging_args.add_argument('-v', '--verbose', action='count', default=0, - help='Be verbose. Use `-vv` for increased verbosity.') - logging_args.add_argument('-l', '--logfile', - help='If given, file where the logs will be written.') + logging_args.add_argument( + '-v', + '--verbose', + action='count', + default=0, + help='Be verbose. Use `-vv` for increased verbosity.', + ) + logging_args.add_argument( + '-l', '--logfile', help='If given, file where the logs will be written.' + ) parser = argparse.ArgumentParser( prog='download-analytics', description='Download Analytics Command Line Interface', - parents=[logging_args] + parents=[logging_args], ) parser.set_defaults(action=None) action = parser.add_subparsers(title='action') @@ -90,36 +96,67 @@ def _get_parser(): collect.set_defaults(action=_collect) collect.add_argument( - '-o', '--output-folder', type=str, required=False, + '-o', + '--output-folder', + type=str, + required=False, help=( 'Path to the folder where data will be stored. It can be a local path or a' ' Google Drive folder path in the format gdrive://' - ) + ), ) collect.add_argument( - '-a', '--authentication-credentials', type=str, required=False, - help='Path to the GCP (BigQuery) credentials file to use.') + '-a', + '--authentication-credentials', + type=str, + required=False, + help='Path to the GCP (BigQuery) credentials file to use.', + ) collect.add_argument( - '-c', '--config-file', type=str, default='config.yaml', - help='Path to the configuration file.') + '-c', + '--config-file', + type=str, + default='config.yaml', + help='Path to the configuration file.', + ) collect.add_argument( - '-p', '--projects', nargs='*', - help='List of projects to collect. If not given use the configured ones.') + '-p', + '--projects', + nargs='*', + help='List of projects to collect. If not given use the configured ones.', + ) collect.add_argument( - '-s', '--start-date', type=_valid_date, required=False, - help='Date from which to start pulling data.') + '-s', + '--start-date', + type=_valid_date, + required=False, + help='Date from which to start pulling data.', + ) collect.add_argument( - '-m', '--max-days', type=int, required=False, - help='Max days of data to pull if start-date is not given.') + '-m', + '--max-days', + type=int, + required=False, + help='Max days of data to pull if start-date is not given.', + ) collect.add_argument( - '-d', '--dry-run', action='store_true', - help='Do not run the actual query, only simulate it.') + '-d', + '--dry-run', + action='store_true', + help='Do not run the actual query, only simulate it.', + ) collect.add_argument( - '-f', '--force', action='store_true', - help='Force the download even if the data already exists or there is a gap') + '-f', + '--force', + action='store_true', + help='Force the download even if the data already exists or there is a gap', + ) collect.add_argument( - '-M', '--add-metrics', action='store_true', - help='Compute the aggregation metrics and create the corresponding spreadsheets.') + '-M', + '--add-metrics', + action='store_true', + help='Compute the aggregation metrics and create the corresponding spreadsheets.', + ) return parser diff --git a/download_analytics/bq.py b/download_analytics/bq.py index dcfa384..7e00a09 100644 --- a/download_analytics/bq.py +++ b/download_analytics/bq.py @@ -30,7 +30,10 @@ def _get_bq_client(credentials_file): scopes=['https://www.googleapis.com/auth/cloud-platform'], ) - return bigquery.Client(credentials=credentials, project=credentials.project_id,) + return bigquery.Client( + credentials=credentials, + project=credentials.project_id, + ) def run_query(query, dry_run=False, credentials_file=None): @@ -41,14 +44,14 @@ def run_query(query, dry_run=False, credentials_file=None): job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) dry_run_job = client.query(query, job_config=job_config) - LOGGER.info('Estimated processed GBs: %.2f', dry_run_job.total_bytes_processed / 1024 ** 3) + LOGGER.info('Estimated processed GBs: %.2f', dry_run_job.total_bytes_processed / 1024**3) if dry_run: return None query_job = client.query(query) data = query_job.to_dataframe() - LOGGER.info('Total processed GBs: %.2f', query_job.total_bytes_processed / 1024 ** 3) - LOGGER.info('Total billed GBs: %.2f', query_job.total_bytes_billed / 1024 ** 3) + LOGGER.info('Total processed GBs: %.2f', query_job.total_bytes_processed / 1024**3) + LOGGER.info('Total billed GBs: %.2f', query_job.total_bytes_billed / 1024**3) return data diff --git a/download_analytics/drive.py b/download_analytics/drive.py index cc1d2e0..b29362c 100644 --- a/download_analytics/drive.py +++ b/download_analytics/drive.py @@ -91,11 +91,7 @@ def upload(content, filename, folder, convert=False): except FileNotFoundError: file_config = { 'title': filename, - 'parents': [ - { - 'id': folder - } - ], + 'parents': [{'id': folder}], } drive_file = drive.CreateFile(file_config) diff --git a/download_analytics/main.py b/download_analytics/main.py index 5962e0c..f58b19f 100644 --- a/download_analytics/main.py +++ b/download_analytics/main.py @@ -9,8 +9,16 @@ LOGGER = logging.getLogger(__name__) -def collect_downloads(projects, output_folder, start_date=None, max_days=1, credentials_file=None, - dry_run=False, force=False, add_metrics=True): +def collect_downloads( + projects, + output_folder, + start_date=None, + max_days=1, + credentials_file=None, + dry_run=False, + force=False, + add_metrics=True, +): """Pull data about the downloads of a list of projects. Args: @@ -49,7 +57,7 @@ def collect_downloads(projects, output_folder, start_date=None, max_days=1, cred max_days=max_days, credentials_file=credentials_file, dry_run=dry_run, - force=force + force=force, ) if pypi_downloads.empty: diff --git a/download_analytics/metrics.py b/download_analytics/metrics.py index 595ae88..a50569f 100644 --- a/download_analytics/metrics.py +++ b/download_analytics/metrics.py @@ -3,6 +3,8 @@ import logging import re +import pandas as pd + from download_analytics.output import create_spreadsheet LOGGER = logging.getLogger(__name__) @@ -35,7 +37,7 @@ def _historical_groupby(downloads, groupbys=None): for groupby in groupbys: grouped = downloads.groupby([year_month, groupby]) - grouped_sizes = grouped.size().unstack(-1) + grouped_sizes = grouped.size().unstack(-1) # noqa: PD010 if len(groupbys) > 1: grouped_sizes.columns = f"{groupby}='" + grouped_sizes.columns + "'" @@ -43,16 +45,13 @@ def _historical_groupby(downloads, groupbys=None): totals = base.sum() totals.name = 'total' - base = base.append(totals) + base = pd.concat([base, totals], ignore_index=True) return base.reset_index().iloc[::-1] def _get_sheet_name(column): - words = [ - f'{word[0].upper()}{word[1:]}' - for word in column.split('_') - ] + words = [f'{word[0].upper()}{word[1:]}' for word in column.split('_')] return ' '.join(['By'] + words) @@ -121,7 +120,7 @@ def _version_element_order_key(version): # while it shouldn't enter the `if`. pass - components.append(last_component[len(last_numeric):]) + components.append(last_component[len(last_numeric) :]) return components @@ -133,7 +132,7 @@ def _version_order_key(version_column): def _mangle_columns(downloads): downloads = downloads.rename(columns=RENAME_COLUMNS) downloads['full_python_version'] = downloads['python_version'] - downloads['python_version'] = downloads['python_version'].str.rsplit('.', 1).str[0] + downloads['python_version'] = downloads['python_version'].str.rsplit('.', n=1).str[0] downloads['project_version'] = downloads['project'] + '-' + downloads['version'] downloads['distro_version'] = downloads['distro_name'] + ' ' + downloads['distro_version'] downloads['distro_kernel'] = downloads['distro_version'] + ' - ' + downloads['distro_kernel'] @@ -150,9 +149,7 @@ def compute_metrics(downloads, output_path=None): downloads = _mangle_columns(downloads) LOGGER.debug('Aggregating by month') - sheets = { - 'By Month': _by_month(downloads) - } + sheets = {'By Month': _by_month(downloads)} for column in GROUPBY_COLUMNS: name = _get_sheet_name(column) diff --git a/download_analytics/pypi.py b/download_analytics/pypi.py index 4463697..9aec589 100644 --- a/download_analytics/pypi.py +++ b/download_analytics/pypi.py @@ -57,8 +57,7 @@ def _get_query(projects, start_date, end_date): if isinstance(projects, str): projects = f"('{projects}')" - LOGGER.info('Querying for projects `%s` between `%s` and `%s`', - projects, start_date, end_date) + LOGGER.info('Querying for projects `%s` between `%s` and `%s`', projects, start_date, end_date) return QUERY_TEMPLATE.format( projects=projects, @@ -72,11 +71,14 @@ def _get_query_dates(start_date, min_date, max_date, max_days, force=False): if start_date is None: start_date = end_date - timedelta(days=max_days) - if pd.notna(min_date) and min_date > start_date: - if not force: - end_date = min_date + if pd.notna(min_date): + min_date = pd.Timestamp(min_date).date() + if min_date > start_date: + if not force: + end_date = min_date elif pd.notna(max_date) and not force: + max_date = pd.Timestamp(max_date).date() if max_date > start_date: start_date = max_date else: @@ -85,8 +87,16 @@ def _get_query_dates(start_date, min_date, max_date, max_days, force=False): return start_date, end_date -def get_pypi_downloads(projects, start_date=None, end_date=None, previous=None, - max_days=1, credentials_file=None, dry_run=False, force=False): +def get_pypi_downloads( + projects, + start_date=None, + end_date=None, + previous=None, + max_days=1, + credentials_file=None, + dry_run=False, + force=False, +): """Get PyPI downloads data from the Big Query dataset. Args: @@ -118,7 +128,7 @@ def get_pypi_downloads(projects, start_date=None, end_date=None, previous=None, """ if previous is not None: if isinstance(projects, str): - projects = (projects, ) + projects = (projects,) previous_projects = previous[previous.project.isin(projects)] min_date = previous_projects.timestamp.min() @@ -147,7 +157,7 @@ def get_pypi_downloads(projects, start_date=None, end_date=None, previous=None, before = new_downloads after = previous[previous.timestamp > new_downloads.timestamp.max()] - all_downloads = before.append(after, ignore_index=True) + all_downloads = pd.concat([before, after], ignore_index=True) LOGGER.info('Obtained %s new downloads', len(all_downloads) - len(previous)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1b7d17d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,85 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "download-analytics" +version = "0.0.1.dev0" +description = "Scripts to extract metrics about OSS project downloads." +readme = "README.md" +authors = [ + { name = "DataCebo", email = "info@datacebo.com" } +] +requires-python = ">=3.13" +dependencies = [ + "pandas>=2.2.3", + "tqdm", + "openpyxl", + "xlsxwriter", + "requests", + "python-benedict", + "PyYAML", + "PyDrive", + "google-cloud-bigquery", + "db-dtypes" +] + +[project.urls] +Homepage = "https://github.com/DataCebo/download-analytics" + +[project.scripts] +download-analytics = "download_analytics.__main__:main" + +[tool.setuptools.packages.find] +include = ['download_analytics', 'download_analytics.*'] + +[project.optional-dependencies] +dev = [ + "ruff>=0.9.8", + "invoke" +] + +[tool.ruff] +preview = true +line-length = 100 +indent-width = 4 +src = ["download_analytics"] +exclude = [ + "docs", + ".tox", + ".git", + "__pycache__", + "*.ipynb", + ".ipynb_checkpoints", +] + +[tool.ruff.lint] +select = [ + "F", "E", "W", "D", "I001", "T201", "PD", "NPY201" +] +ignore = [ + "D107", "D417", "PD901", "PD101" +] + +[tool.ruff.format] +quote-style = "single" +indent-style = "space" +preview = true +docstring-code-format = true +docstring-code-line-length = "dynamic" + +[tool.ruff.lint.isort] +known-first-party = ["download_analytics"] +lines-between-types = 0 + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"] +"errors.py" = ["D105"] +"tests/**.py" = ["D"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.pycodestyle] +max-doc-length = 100 +max-line-length = 100 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index e985831..0000000 --- a/setup.cfg +++ /dev/null @@ -1,28 +0,0 @@ -[bdist_wheel] -universal = 1 - -[flake8] -max-line-length = 99 -inline-quotes = single -extend-ignore = D107, SFS3, PD005, D105, PD011, PD010 -exclude = .tox, .git, __pycache__, .ipynb_checkpoints - -[isort] -line_length = 99 -lines_between_types = 0 -multi_line_output = 4 -use_parentheses = True - -[pydocstyle] -convention = google -add-ignore = D107, D407, D417 - -[pylint] -extension-pkg-whitelist = numpy -min-similarity-lines = 5 -max-args = 8 -max-attributes = 11 -ignore-comments = yes -ignore-docstrings = yes -ignore-imports = yes -disable = R0801, R0903, R0913, R0914, C0209, W0223, W0221, W0237, E1136, E1137, E1101 diff --git a/setup.py b/setup.py deleted file mode 100644 index 08f3711..0000000 --- a/setup.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""The setup script.""" - -from setuptools import setup, find_packages - -with open('README.md', encoding='utf-8') as readme_file: - readme = readme_file.read() - -install_requires = [ - 'pandas==1.4.2', - 'google-cloud-bigquery==3.0.1', - 'db-dtypes==1.0.0', - 'openpyxl==3.0.9', - 'xlsxwriter==3.0.3', - 'PyYAML==6.0', - 'PyDrive==1.3.1', - 'httplib2==0.15.0', # noqa: https://stackoverflow.com/questions/59815620/gcloud-upload-httplib2-redirectmissinglocation-redirected-but-the-response-is-m -] - - -setup( - author='DataCebo', - author_email='info@datacebo.com', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - ], - description='Scripts to extract metrics about OSS project downloads.', - entry_points={ - 'console_scripts': [ - 'download-analytics=download_analytics.__main__:main' - ] - }, - include_package_data=True, - install_requires=install_requires, - keywords='download-analytics', - long_description=readme, - long_description_content_type='text/markdown', - name='download-analytics', - packages=find_packages(include=['download_analytics', 'download_analytics.*']), - python_requires='>=3.7,<3.10', - version='0.0.1.dev0', - zip_safe=False, -) diff --git a/tasks.py b/tasks.py index 83f6850..cb98d01 100644 --- a/tasks.py +++ b/tasks.py @@ -1,9 +1,17 @@ +"""Invoke task definitions.""" + from invoke import task @task def lint(c): - c.run('flake8 download_analytics') - c.run('pydocstyle download_analytics') - c.run('isort -c download_analytics') - c.run('pylint download_analytics --rcfile=setup.cfg') + """Run lint checks using ruff.""" + c.run('ruff check .') + c.run('ruff format --check --diff .') + + +@task +def fix_lint(c): + """Automatically fix lint issues using ruff.""" + c.run('ruff check --fix .') + c.run('ruff format .') diff --git a/tests/test_pypi.py b/tests/test_pypi.py deleted file mode 100644 index 72fd69c..0000000 --- a/tests/test_pypi.py +++ /dev/null @@ -1,256 +0,0 @@ -from datetime import datetime -from unittest.mock import patch - -import pytest - -from download_analytics.pypi import _get_query_dates - - -@patch('download_analytics.pypi.datetime') -def test__get_query_dates_all_dates_none(datetime_mock): - """If all dates are none, end is utcnow and start is end - max_days. - - Setup: - - mock utcnow to 2021-11-30 - - Input: - - all Nones, max_days = 3 - - Expected Output: - - start_date = 2021-11-27 - - end_date = 2021-11-30 - """ - # setup - datetime_mock.utcnow.return_value = datetime(2021, 11, 30) - - # run - start_date, end_date = _get_query_dates( - start_date=None, - min_date=None, - max_date=None, - max_days=3, - ) - - # assert - assert start_date == datetime(2021, 11, 27) - assert end_date == datetime(2021, 11, 30) - - -@patch('download_analytics.pypi.datetime') -def test__get_query_dates_start_date_given_min_max_none(datetime_mock): - """If only start_date is given and min/max are None, return start_date and utcnow. - - Setup: - - mock utcnow to 2021-11-30 - - Input: - - start_date given - - min and max dates None - - max days = 3 - - Expected Output: - - start_date = given start date - - end_date = utcnow - """ - # setup - datetime_mock.utcnow.return_value = datetime(2021, 11, 30) - - # run - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 1), - min_date=None, - max_date=None, - max_days=3, - ) - - # assert - assert start_date == datetime(2021, 11, 1) - assert end_date == datetime(2021, 11, 30) - - -def test__get_query_dates_start_date_given_min_after_start(): - """If start_date is given and min_date is after, return start_date and min_date. - - Input: - - start_date given - - min_date after start_date - - max_date after min_date - - max days = 3 - - Expected Output: - - start_date = given start date - - end_date = min_date - """ - # run - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 1), - min_date=datetime(2021, 11, 5), - max_date=datetime(2021, 11, 15), - max_days=3, - ) - - # assert - assert start_date == datetime(2021, 11, 1) - assert end_date == datetime(2021, 11, 5) - - -@patch('download_analytics.pypi.datetime') -def test__get_query_dates_start_date_given_min_after_start_force_true(datetime_mock): - """If start_date is given and force is true, return start_date utcnow always. - - Setup: - - mock utcnow to 2021-11-30 - - Input: - - start_date given - - min_date after start_date - - max_date after min_date - - max days = 3 - - force = True - - Expected Output: - - start_date = given start date - - end_date = utcnow - """ - # setup - datetime_mock.utcnow.return_value = datetime(2021, 11, 30) - - # run - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 1), - min_date=datetime(2021, 11, 5), - max_date=datetime(2021, 11, 15), - max_days=3, - force=True, - ) - - # assert - assert start_date == datetime(2021, 11, 1) - assert end_date == datetime(2021, 11, 30) - - -@patch('download_analytics.pypi.datetime') -def test__get_query_dates_start_date_given_max_after(datetime_mock): - """If start_date is given, min_date before and max_date is after, return max_date and utcnow. - - Setup: - - mock utcnow to 2021-11-30 - - Input: - - start_date given - - min_date before start_date - - max_date after start_date - - max days = 3 - - Expected Output: - - start_date = max_date - - end_date = utcnow - """ - # setup - datetime_mock.utcnow.return_value = datetime(2021, 11, 30) - - # run - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 10), - min_date=datetime(2021, 11, 1), - max_date=datetime(2021, 11, 15), - max_days=3, - ) - - # assert - assert start_date == datetime(2021, 11, 15) - assert end_date == datetime(2021, 11, 30) - - -@patch('download_analytics.pypi.datetime') -def test__get_query_dates_start_date_given_max_after_force_true(datetime_mock): - """If start_date is given and force is true, return start_date and utcnow always. - - Setup: - - mock utcnow to 2021-11-30 - - Input: - - start_date given - - min_date before start_date - - max_date after start_date - - max days = 3 - - force = True - - Expected Output: - - start_date = start_date - - end_date = utcnow - """ - # setup - datetime_mock.utcnow.return_value = datetime(2021, 11, 30) - - # run - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 10), - min_date=datetime(2021, 11, 1), - max_date=datetime(2021, 11, 15), - max_days=3, - force=True, - ) - - # assert - assert start_date == datetime(2021, 11, 10) - assert end_date == datetime(2021, 11, 30) - - -def test__get_query_dates_start_date_given_max_before(): - """If start_date is given, max is before and force is false, raise an error. - - Input: - - start_date given - - min_date before start_date - - max_date before start_date - - max days = 3 - - Expected Output: - - start_date = start_date - - end_date = utcnow - """ - # run - msg = 'start_date=2021-11-15 00:00:00 and max_date=2021-11-10 00:00:00 are creating a gap' - with pytest.raises(ValueError, match=msg): - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 15), - min_date=datetime(2021, 11, 1), - max_date=datetime(2021, 11, 10), - max_days=3, - ) - - -@patch('download_analytics.pypi.datetime') -def test__get_query_dates_start_date_given_max_before_force_true(datetime_mock): - """If start_date is given, max is before and force is true, return start_date, utcnow. - - Setup: - - mock utcnow to 2021-11-30 - - Input: - - start_date given - - min_date before start_date - - max_date before start_date - - max days = 3 - - force = True - - Expected Output: - - start_date = start_date - - end_date = utcnow - """ - # setup - datetime_mock.utcnow.return_value = datetime(2021, 11, 30) - - # run - start_date, end_date = _get_query_dates( - start_date=datetime(2021, 11, 15), - min_date=datetime(2021, 11, 1), - max_date=datetime(2021, 11, 10), - max_days=3, - force=True, - ) - - # assert - assert start_date == datetime(2021, 11, 15) - assert end_date == datetime(2021, 11, 30)