From f2282d755a24084e15f86772a1e98f9f3df5ce94 Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev Date: Fri, 28 Feb 2025 16:48:49 +0100 Subject: [PATCH 1/6] Add additional libraries to the download analytics collection --- config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config.yaml b/config.yaml index fd83bcf..5fd787a 100644 --- a/config.yaml +++ b/config.yaml @@ -14,3 +14,8 @@ projects: - datomize - gretel-trainer - ydata-sdk + - mostlyai + - synthcity + - smartnoise-synth + - realtabformer + - be-great From 97592308e92047d1799897026c1c879e4292875f Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev Date: Mon, 3 Mar 2025 18:19:59 +0100 Subject: [PATCH 2/6] Fix daily build --- download_analytics/pypi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/download_analytics/pypi.py b/download_analytics/pypi.py index 9aec589..dd834c7 100644 --- a/download_analytics/pypi.py +++ b/download_analytics/pypi.py @@ -150,7 +150,7 @@ def get_pypi_downloads( if max_date is None: all_downloads = new_downloads else: - if max_date < end_date: + if pd.Timestamp(max_date) < pd.Timestamp(end_date): before = previous[previous.timestamp < new_downloads.timestamp.min()] after = new_downloads else: From 1efe5f0ed0342e6056e5301877f84d3cc0c25998 Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev Date: Mon, 3 Mar 2025 18:32:39 +0100 Subject: [PATCH 3/6] Use fixed httplib2 version --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1b7d17d..e28e030 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,8 @@ dependencies = [ "PyYAML", "PyDrive", "google-cloud-bigquery", - "db-dtypes" + "db-dtypes", + "httplib2==0.15.0", # https://stackoverflow.com/questions/59815620/gcloud-upload-httplib2-redirectmissinglocation-redirected-but-the-response-is-m ] [project.urls] From 44848437c86f2159c080d896c1018119e69b313b Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev Date: Wed, 5 Mar 2025 19:42:51 +0100 Subject: [PATCH 4/6] Improve way of inserting columns --- download_analytics/metrics.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/download_analytics/metrics.py b/download_analytics/metrics.py index a50569f..b68bc81 100644 --- a/download_analytics/metrics.py +++ b/download_analytics/metrics.py @@ -35,17 +35,21 @@ def _historical_groupby(downloads, groupbys=None): if groupbys is None: groupbys = downloads.set_index('timestamp').columns + new_columns = [] # Collect grouped DataFrames here + for groupby in groupbys: grouped = downloads.groupby([year_month, groupby]) grouped_sizes = grouped.size().unstack(-1) # noqa: PD010 if len(groupbys) > 1: grouped_sizes.columns = f"{groupby}='" + grouped_sizes.columns + "'" + new_columns.append(grouped_sizes.fillna(0)) # Store for later - base[grouped_sizes.columns] = grouped_sizes.fillna(0) + if new_columns: + base = pd.concat([base] + new_columns, axis=1) # Add all columns at once totals = base.sum() totals.name = 'total' - base = pd.concat([base, totals], ignore_index=True) + base = pd.concat([base, totals.to_frame().T], ignore_index=True) return base.reset_index().iloc[::-1] From 29910ee30ab73f330424dd9269e752589a30fc93 Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev <41479552+pvk-developer@users.noreply.github.com> Date: Wed, 5 Mar 2025 23:52:37 +0100 Subject: [PATCH 5/6] Update metrics.py --- download_analytics/metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/download_analytics/metrics.py b/download_analytics/metrics.py index b68bc81..21e90c6 100644 --- a/download_analytics/metrics.py +++ b/download_analytics/metrics.py @@ -35,17 +35,17 @@ def _historical_groupby(downloads, groupbys=None): if groupbys is None: groupbys = downloads.set_index('timestamp').columns - new_columns = [] # Collect grouped DataFrames here - + new_columns = [] for groupby in groupbys: grouped = downloads.groupby([year_month, groupby]) grouped_sizes = grouped.size().unstack(-1) # noqa: PD010 if len(groupbys) > 1: grouped_sizes.columns = f"{groupby}='" + grouped_sizes.columns + "'" - new_columns.append(grouped_sizes.fillna(0)) # Store for later + + new_columns.append(grouped_sizes.fillna(0)) if new_columns: - base = pd.concat([base] + new_columns, axis=1) # Add all columns at once + base = pd.concat([base] + new_columns, axis=1) totals = base.sum() totals.name = 'total' From 90548db9471f599fc8eca93741835febb64944a8 Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev <41479552+pvk-developer@users.noreply.github.com> Date: Thu, 6 Mar 2025 00:02:02 +0100 Subject: [PATCH 6/6] Update metrics.py --- download_analytics/metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/download_analytics/metrics.py b/download_analytics/metrics.py index 21e90c6..3b21667 100644 --- a/download_analytics/metrics.py +++ b/download_analytics/metrics.py @@ -41,7 +41,6 @@ def _historical_groupby(downloads, groupbys=None): grouped_sizes = grouped.size().unstack(-1) # noqa: PD010 if len(groupbys) > 1: grouped_sizes.columns = f"{groupby}='" + grouped_sizes.columns + "'" - new_columns.append(grouped_sizes.fillna(0)) if new_columns: