diff --git a/services/libs/tinybird/pipes/health_score_active_contributors.pipe b/services/libs/tinybird/pipes/health_score_active_contributors.pipe index 965bd8da8f..f7482f8751 100644 --- a/services/libs/tinybird/pipes/health_score_active_contributors.pipe +++ b/services/libs/tinybird/pipes/health_score_active_contributors.pipe @@ -11,7 +11,7 @@ SQL > memberId != '' AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -30,7 +30,7 @@ SQL > AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) AND timestamp >= toStartOfQuarter(now() - toIntervalQuarter(1)) AND timestamp < toStartOfQuarter(now()) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId {% end %} diff --git a/services/libs/tinybird/pipes/health_score_active_days.pipe b/services/libs/tinybird/pipes/health_score_active_days.pipe index f0c36ebab2..60edc4833a 100644 --- a/services/libs/tinybird/pipes/health_score_active_days.pipe +++ b/services/libs/tinybird/pipes/health_score_active_days.pipe @@ -6,7 +6,7 @@ SQL > FROM activityRelations_bucket_routing WHERE segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -23,7 +23,7 @@ SQL > WHERE timestamp >= toStartOfDay(now() - toIntervalDay(365)) AND timestamp < toStartOfDay(now()) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId {% end %} diff --git a/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe b/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe index 457fdc6a37..e4384cfb2a 100644 --- a/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe +++ b/services/libs/tinybird/pipes/health_score_contributions_outside_work_hours.pipe @@ -12,7 +12,7 @@ SQL > 1 = 1 {% if defined(project) %} AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -25,7 +25,7 @@ SQL > {% else %} AND timestamp >= toStartOfDay(now() - toIntervalDay(365)) AND timestamp < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% end %} GROUP BY segmentId diff --git a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe index f7f4be8082..605802aba3 100644 --- a/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_contributor_dependency.pipe @@ -8,7 +8,7 @@ SQL > memberId != '' AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -28,7 +28,7 @@ SQL > AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId, memberId ORDER by contributionCount DESC {% end %} diff --git a/services/libs/tinybird/pipes/health_score_forks.pipe b/services/libs/tinybird/pipes/health_score_forks.pipe index 4da8f9204e..c36cc77817 100644 --- a/services/libs/tinybird/pipes/health_score_forks.pipe +++ b/services/libs/tinybird/pipes/health_score_forks.pipe @@ -10,7 +10,7 @@ SQL > WHERE type = 'fork' AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -24,7 +24,7 @@ SQL > {% else %} SELECT segmentId, count() AS forks FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE type = 'fork' AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + WHERE type = 'fork' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId {% end %} diff --git a/services/libs/tinybird/pipes/health_score_issues_resolution.pipe b/services/libs/tinybird/pipes/health_score_issues_resolution.pipe index 27ac4ba089..625fdf386d 100644 --- a/services/libs/tinybird/pipes/health_score_issues_resolution.pipe +++ b/services/libs/tinybird/pipes/health_score_issues_resolution.pipe @@ -17,7 +17,7 @@ SQL > WHERE segmentId = (SELECT segmentId FROM segments_filtered) AND closedAt IS NOT NULL - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND openedAt @@ -34,7 +34,7 @@ SQL > openedAt >= toStartOfDay(now()) - INTERVAL 365 DAY AND openedAt < toStartOfDay(now()) + INTERVAL 1 DAY AND closedAt IS NOT NULL - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId {% end %} diff --git a/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe b/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe index 9e86ea97f1..9b1e4eb7b7 100644 --- a/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe +++ b/services/libs/tinybird/pipes/health_score_merge_lead_time.pipe @@ -10,7 +10,7 @@ SQL > 1 = 1 {% if defined(project) %} AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND openedAt @@ -22,7 +22,7 @@ SQL > {% else %} AND openedAt >= toStartOfDay(now() - toIntervalDay(365)) AND openedAt < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% end %} GROUP BY segmentId diff --git a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe index a09d5e5822..4400d0f697 100644 --- a/services/libs/tinybird/pipes/health_score_organization_dependency.pipe +++ b/services/libs/tinybird/pipes/health_score_organization_dependency.pipe @@ -8,7 +8,7 @@ SQL > organizationId != '' AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -27,7 +27,7 @@ SQL > AND (type, platform) IN (SELECT activityType, platform FROM activityTypes_filtered) AND timestamp >= toStartOfDay(now() - INTERVAL 365 DAY) AND timestamp < toStartOfDay(now() + INTERVAL 1 DAY) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId, organizationId {% end %} diff --git a/services/libs/tinybird/pipes/health_score_pull_requests.pipe b/services/libs/tinybird/pipes/health_score_pull_requests.pipe index b56c78842b..4fc5c7ca80 100644 --- a/services/libs/tinybird/pipes/health_score_pull_requests.pipe +++ b/services/libs/tinybird/pipes/health_score_pull_requests.pipe @@ -14,7 +14,7 @@ SQL > OR type = 'changeset-created' ) AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -36,7 +36,7 @@ SQL > ) AND timestamp >= toStartOfDay(now() - toIntervalDay(365)) AND timestamp < toStartOfDay(now() + toIntervalDay(1)) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId {% end %} diff --git a/services/libs/tinybird/pipes/health_score_retention.pipe b/services/libs/tinybird/pipes/health_score_retention.pipe index e0145508a2..c679eab798 100644 --- a/services/libs/tinybird/pipes/health_score_retention.pipe +++ b/services/libs/tinybird/pipes/health_score_retention.pipe @@ -7,7 +7,7 @@ SQL > WHERE memberId != '' AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(endDate) %} AND timestamp >= toStartOfQuarter( @@ -30,7 +30,7 @@ SQL > SELECT segmentId, groupUniqArray(memberId) AS currentQuarterMembers FROM activityRelations_deduplicated_cleaned_bucket_union WHERE - memberId != '' AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + memberId != '' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(endDate) %} AND timestamp >= toStartOfQuarter( parseDateTimeBestEffort( @@ -59,7 +59,7 @@ SQL > WHERE memberId != '' AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(endDate) %} AND timestamp >= toStartOfQuarter( parseDateTimeBestEffort( @@ -82,7 +82,7 @@ SQL > SELECT segmentId, groupUniqArray(memberId) AS previousQuarterMembers FROM activityRelations_deduplicated_cleaned_bucket_union WHERE - memberId != '' AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + memberId != '' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(endDate) %} AND timestamp >= toStartOfQuarter( parseDateTimeBestEffort( diff --git a/services/libs/tinybird/pipes/health_score_stars.pipe b/services/libs/tinybird/pipes/health_score_stars.pipe index 2debdaaaff..731c463ac2 100644 --- a/services/libs/tinybird/pipes/health_score_stars.pipe +++ b/services/libs/tinybird/pipes/health_score_stars.pipe @@ -10,7 +10,7 @@ SQL > WHERE type = 'star' AND segmentId = (SELECT segmentId FROM segments_filtered) - AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) {% if defined(repos) %} AND channel IN (SELECT channel FROM repos_to_channels) {% end %} {% if defined(startDate) %} AND timestamp @@ -24,7 +24,7 @@ SQL > {% else %} SELECT segmentId, count() AS stars FROM activityRelations_deduplicated_cleaned_bucket_union - WHERE type = 'star' AND channel NOT IN (SELECT channel FROM repos_to_channels(excluded = True)) + WHERE type = 'star' AND channel NOT IN (SELECT channel FROM repos_to_channels_excluded) GROUP BY segmentId {% end %} diff --git a/services/libs/tinybird/pipes/repos_to_channels_excluded.pipe b/services/libs/tinybird/pipes/repos_to_channels_excluded.pipe new file mode 100644 index 0000000000..d4a808875b --- /dev/null +++ b/services/libs/tinybird/pipes/repos_to_channels_excluded.pipe @@ -0,0 +1,62 @@ +DESCRIPTION > + - `repos_to_channels_excluded.pipe` expands excluded repository URLs to all possible activity channel formats. + - For Gerrit repos, generates both the original URL and the /q/project: variant since activity channels use this format. + - Non-Gerrit repos are passed through unchanged. + - Used by activity filtering pipes to build exclusion filters for channels that belong to excluded repositories. + - Response: `channel` - all possible channel URL formats for excluded repos + +TAGS "Repository URLs", "Gerrit" + +NODE repos_to_expand +DESCRIPTION > + Get excluded repository URLs to expand + +SQL > + SELECT url FROM repositories FINAL WHERE isNull (deletedAt) AND enabled = true AND excluded = true + +NODE gerrit_repos +DESCRIPTION > + Identify Gerrit repositories by joining with integrations table + +SQL > + SELECT r.url + FROM repositories r FINAL + JOIN integrations i FINAL ON r.sourceIntegrationId = i.id + WHERE i.platform = 'gerrit' AND isNull (r.deletedAt) AND r.url IN (SELECT url FROM repos_to_expand) + +NODE expanded_urls +DESCRIPTION > + Output original URLs plus Gerrit channel variants + +SQL > + -- Original URLs (all excluded repos) + SELECT url AS channel + FROM repos_to_expand + UNION ALL + -- Gerrit channel variants: insert q/project: after the base path + SELECT + CASE + -- Pattern: https://host/r/{project} → https://host/r/q/project:{project} + WHEN position(url, '/r/') > 0 + THEN replaceOne(url, '/r/', '/r/q/project:') + -- Pattern: https://host/gerrit/{project} → https://host/gerrit/q/project:{project} + WHEN position(url, '/gerrit/') > 0 + THEN replaceOne(url, '/gerrit/', '/gerrit/q/project:') + -- Pattern: https://host/{project} → https://host/q/project:{project} + ELSE + concat( + protocol(url), + '://', + domain(url), + '/q/project:', + if(path(url) = '/', '', substring(path(url), 2)) + ) + END AS channel + FROM gerrit_repos + +NODE channels_deduplicated +DESCRIPTION > + Final deduplicated list of all possible channel URLs for excluded repos + +SQL > + SELECT DISTINCT channel FROM expanded_urls