linuxfoundation · gaspergrom · Nov 19, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/services/libs/tinybird/datasources/insights_projects_populated_ds.datasource b/services/libs/tinybird/datasources/insights_projects_populated_ds.datasource
@@ -54,7 +54,10 @@ SCHEMA >
     `softwareValue` UInt64,
     `contributorCount` UInt64,
     `organizationCount` UInt64,
-    `healthScore` Float64
+    `healthScore` Float64,
+    `communityPlatforms` Array(String),
+    `communityKeywords` Array(String),
+    `communityLanguages` Array(String)
 
 ENGINE MergeTree
 ENGINE_PARTITION_KEY toYear(createdAt)

diff --git a/services/libs/tinybird/datasources/mentions.datasource b/services/libs/tinybird/datasources/mentions.datasource
@@ -0,0 +1,53 @@
+DESCRIPTION >
+    - `mentions` contains community mentions from various sources tracked via Octolens integration.
+    - Raw datasource only exists in Tinybird - pushed directly from Octolens webhook processing.
+    - Tracks mentions across platforms like Reddit, HackerNews, Twitter, and other community sources.
+    - Includes sentiment analysis and relevance scoring for each mention.
+    - `sourceId` is the unique identifier from the source platform.
+    - `url` is the direct link to the mention on the source platform.
+    - `timestamp` is when the mention occurred on the source platform.
+    - `source` indicates the source platform (reddit, hackernews, twitter, etc.) using LowCardinality.
+    - `author` is the username/display name of the person who created the mention.
+    - `authorProfileLink` is the URL to the author's profile on the source platform.
+    - `title` contains the mention's title or subject line.
+    - `body` contains the full text content of the mention.
+    - `imageUrl` contains the URL to any associated image (empty string if not available).
+    - `relevanceScore` is the computed relevance score from Octolens (string representation).
+    - `relevanceComment` contains the explanation for the relevance score.
+    - `keyword` is the keyword that triggered this mention match.
+    - `sentimentLabel` provides the sentiment classification (positive, negative, neutral, mixed).
+    - `subreddit` contains the subreddit name for Reddit mentions (empty string for other sources).
+    - `viewId` is the Octolens view identifier that captured this mention.
+    - `viewName` is the human-readable name of the Octolens view.
+    - `projectSlug` identifies which project this mention belongs to.
+    - `createdAt` is the timestamp when the record was created in Tinybird.
+
+TAGS "" Octolens integration", Community", "Sentiment analysis"
+
+SCHEMA >
+    `sourceId` String `json:$.sourceId` DEFAULT '',
+    `url` String `json:$.url` DEFAULT '',
+    `timestamp` DateTime `json:$.timestamp`,
+    `source` LowCardinality(String) `json:$.source` DEFAULT '',
+    `author` String `json:$.author` DEFAULT '',
+    `authorProfileLink` String `json:$.authorProfileLink` DEFAULT '',
+    `title` String `json:$.title` DEFAULT '',
+    `body` String `json:$.body` DEFAULT '',
+    `imageUrl` String `json:$.imageUrl` DEFAULT '',
+    `relevanceScore` String `json:$.relevanceScore` DEFAULT '',
+    `relevanceComment` String `json:$.relevanceComment` DEFAULT '',
+    `keyword` String `json:$.keyword` DEFAULT '',
+    `sentimentLabel` LowCardinality(String) `json:$.sentimentLabel` DEFAULT '',
+    `subreddit` String `json:$.subreddit` DEFAULT '',
+    `viewId` Int64 `json:$.viewId` DEFAULT 0,
+    `viewName` String `json:$.viewName` DEFAULT '',
+    `language` String `json:$.language` DEFAULT '',
+    `projectSlug` LowCardinality(String) `json:$.projectSlug` DEFAULT '',
+    `createdAt` DateTime64(3) `json:$.createdAt` DEFAULT now64(3),
+    `bookmarked` UInt8 `json:$.bookmarked`,
+    `keywords` Array(String) `json:$.keywords[:]`
+
+ENGINE ReplacingMergeTree
+ENGINE_PARTITION_KEY toYear(timestamp)
+ENGINE_SORTING_KEY projectSlug, timestamp, sourceId
+ENGINE_VER createdAt
diff --git a/services/libs/tinybird/pipes/health_score_sink.pipe b/services/libs/tinybird/pipes/health_score_sink.pipe
@@ -1,12 +1,17 @@
 NODE health_score_select_fields
 SQL >
+    SELECT
+        id,
+        segmentId,
+        slug,
+        if(isNaN(overallScore), null, overallScore) as overallScore,
+        toStartOfDay(now()) as date
+    FROM health_score_copy_ds
 
-    SELECT id, segmentId, slug, if (isNaN(overallScore), null, overallScore) as overallScore, toStartOfDay(now()) as date FROM health_score_copy_ds
-
-TYPE sink
+TYPE SINK
 EXPORT_SERVICE kafka
 EXPORT_CONNECTION_NAME lfx-oracle-kafka-streaming
-EXPORT_KAFKA_TOPIC health_score_sink
 EXPORT_SCHEDULE 30 0 * * *
-
-
+EXPORT_FORMAT csv
+EXPORT_STRATEGY @new
+EXPORT_KAFKA_TOPIC health_score_sink
diff --git a/services/libs/tinybird/pipes/insightsProjects_filtered.pipe b/services/libs/tinybird/pipes/insightsProjects_filtered.pipe
@@ -1,9 +1,6 @@
 DESCRIPTION >
     Provides filters for projects. Merges collection slug from associated collections. Merges segment aggregates from segmentsAggregatedMV
 
-TOKEN "insights-app-token" READ
-TOKEN "insighsProjects_filtered_endpoint_read_2583" READ
-
 NODE insightsProjects_filtered_1
 SQL >
     %
@@ -31,7 +28,10 @@ SQL >
         insights_projects_populated_ds.connectedPlatforms,
         insights_projects_populated_ds.firstCommit,
         insights_projects_populated_ds.repoData,
-        insights_projects_populated_ds.healthScore
+        insights_projects_populated_ds.healthScore,
+        insights_projects_populated_ds.communityPlatforms,
+        insights_projects_populated_ds.communityKeywords,
+        insights_projects_populated_ds.communityLanguages
     FROM insights_projects_populated_ds
     where
         insights_projects_populated_ds.enabled = 1
@@ -92,4 +92,7 @@ SQL >
         insights_projects_populated_ds.connectedPlatforms,
         insights_projects_populated_ds.firstCommit,
         insights_projects_populated_ds.repoData,
-        insights_projects_populated_ds.healthScore
+        insights_projects_populated_ds.healthScore,
+        insights_projects_populated_ds.communityPlatforms,
+        insights_projects_populated_ds.communityKeywords,
+        insights_projects_populated_ds.communityLanguages
diff --git a/services/libs/tinybird/pipes/insights_projects_populated_copy.pipe b/services/libs/tinybird/pipes/insights_projects_populated_copy.pipe
@@ -121,6 +121,16 @@ SQL >
     WHERE archived = true OR excluded = true
     GROUP BY segmentId, insightsProjectId
 
+NODE insights_projects_populated_copy_mentions
+SQL >
+    SELECT
+        projectSlug,
+        groupArrayIf(DISTINCT source, source != '') as communityPlatforms,
+        groupArrayIf(DISTINCT keyword, keyword != '') as communityKeywords,
+        groupArrayIf(DISTINCT language, language != '') as communityLanguages
+    FROM mentions FINAL
+    GROUP BY projectSlug
+
 NODE insights_projects_populated_copy_results
 DESCRIPTION >
     Join everything together
@@ -156,7 +166,10 @@ SQL >
         insights_projects_populated_copy_aggregates.organizationCount as organizationCount,
         insights_projects_populated_copy_health_score_deduplicated.healthScore as healthScore,
         archived_excluded_repositories.archivedRepositories as archivedRepositories,
-        archived_excluded_repositories.excludedRepositories as excludedRepositories
+        archived_excluded_repositories.excludedRepositories as excludedRepositories,
+        insights_projects_populated_copy_mentions.communityPlatforms as communityPlatforms,
+        insights_projects_populated_copy_mentions.communityKeywords as communityKeywords,
+        insights_projects_populated_copy_mentions.communityLanguages as communityLanguages
     FROM insightsProjects FINAL
     LEFT JOIN
         insights_projects_populated_copy_collections_slugs
@@ -179,6 +192,9 @@ SQL >
     LEFT JOIN
         archived_excluded_repositories
         ON archived_excluded_repositories.insightsProjectId = insightsProjects.id
+    LEFT JOIN
+        insights_projects_populated_copy_mentions
+        ON insights_projects_populated_copy_mentions.projectSlug = insightsProjects.slug
     WHERE isNull (insightsProjects.deletedAt)
 
 TYPE COPY

diff --git a/services/libs/tinybird/pipes/insights_projects_populated_sink.pipe b/services/libs/tinybird/pipes/insights_projects_populated_sink.pipe
@@ -1,13 +1,12 @@
 NODE insights_projects_select_fields
 SQL >
-
     SELECT id, collectionsSlugs, name, slug, segmentId, softwareValue, toStartOfDay(now()) as date
     FROM insights_projects_populated_ds
 
-TYPE sink
+TYPE SINK
 EXPORT_SERVICE kafka
 EXPORT_CONNECTION_NAME lfx-oracle-kafka-streaming
-EXPORT_KAFKA_TOPIC insights_projects_populated_sink
 EXPORT_SCHEDULE 30 0 * * *
-
-
+EXPORT_FORMAT csv
+EXPORT_STRATEGY @new
+EXPORT_KAFKA_TOPIC insights_projects_populated_sink
diff --git a/services/libs/tinybird/pipes/mentions_list.pipe b/services/libs/tinybird/pipes/mentions_list.pipe
@@ -0,0 +1,30 @@
+NODE mentions_list_results
+SQL >
+    %
+    SELECT *
+    FROM mentions FINAL
+    WHERE
+        1 = 1
+        {% if defined(projectSlug) %}
+            AND projectSlug
+            = {{ String(projectSlug, description="Filter by project slug", required=False) }}
+        {% end %}
+        {% if defined(platforms) %}
+            AND source
+            IN {{ Array(platforms, 'String', description="Filter by platforms", required=False) }}
+        {% end %}
+        {% if defined(keywords) %}
+            AND keyword
+            IN {{ Array(keywords, 'String', description="Filter by keywords", required=False) }}
+        {% end %}
+        {% if defined(sentiments) %}
+            AND sentimentLabel
+            IN {{ Array(sentiments, 'String', description="Filter by sentiments", required=False) }}
+        {% end %}
+        {% if defined(languages) %}
+            AND language
+            IN {{ Array(languages, 'String', description="Filter by languages", required=False) }}
+        {% end %}
+    ORDER BY timestamp DESC
+    LIMIT {{ Int32(pageSize, 20) }}
+    OFFSET {{ Int32(page, 0) * Int32(pageSize, 20) }}