diff --git a/alter_default_value_in_column/datasources/analytics_events.datasource b/alter_default_value_in_column/datasources/analytics_events.datasource new file mode 100644 index 00000000..241f6984 --- /dev/null +++ b/alter_default_value_in_column/datasources/analytics_events.datasource @@ -0,0 +1,16 @@ +TOKEN "tracker" APPEND + +DESCRIPTION > + Analytics events landing data source + +SCHEMA > + `timestamp` DateTime `json:$.timestamp`, + `session_id` String `json:$.session_id`, + `action` LowCardinality(String) `json:$.action`, + `version` LowCardinality(String) `json:$.version`, + `payload` String `json:$.payload` + +ENGINE "MergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(timestamp)" +ENGINE_SORTING_KEY "timestamp" +ENGINE_TTL "timestamp + toIntervalDay(60)" diff --git a/alter_default_value_in_column/datasources/analytics_pages_mv.datasource b/alter_default_value_in_column/datasources/analytics_pages_mv.datasource new file mode 100644 index 00000000..68a76061 --- /dev/null +++ b/alter_default_value_in_column/datasources/analytics_pages_mv.datasource @@ -0,0 +1,13 @@ + +SCHEMA > + `date` Date, + `device` String, + `browser` String, + `location` String, + `pathname` String, + `visits` AggregateFunction(uniq, String), + `hits` AggregateFunction(count) + +ENGINE "AggregatingMergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(date)" +ENGINE_SORTING_KEY "date, device, browser, location, pathname" diff --git a/alter_default_value_in_column/datasources/analytics_sessions_mv.datasource b/alter_default_value_in_column/datasources/analytics_sessions_mv.datasource new file mode 100644 index 00000000..267771df --- /dev/null +++ b/alter_default_value_in_column/datasources/analytics_sessions_mv.datasource @@ -0,0 +1,14 @@ + +SCHEMA > + `date` Date, + `session_id` String, + `device` SimpleAggregateFunction(any, String), + `browser` SimpleAggregateFunction(any, String), + `location` SimpleAggregateFunction(any, String), + `first_hit` SimpleAggregateFunction(min, DateTime), + `latest_hit` SimpleAggregateFunction(max, DateTime), + `hits` AggregateFunction(count) + +ENGINE "AggregatingMergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(date)" +ENGINE_SORTING_KEY "date, session_id" diff --git a/alter_default_value_in_column/datasources/analytics_sources_mv.datasource b/alter_default_value_in_column/datasources/analytics_sources_mv.datasource new file mode 100644 index 00000000..7235d468 --- /dev/null +++ b/alter_default_value_in_column/datasources/analytics_sources_mv.datasource @@ -0,0 +1,13 @@ + +SCHEMA > + `date` Date, + `device` String, + `browser` String, + `location` String, + `referrer` String, + `visits` AggregateFunction(uniq, String), + `hits` AggregateFunction(count) + +ENGINE "AggregatingMergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(date)" +ENGINE_SORTING_KEY "date, device, browser, location, referrer" diff --git a/alter_default_value_in_column/pipes/analytics_hits.pipe b/alter_default_value_in_column/pipes/analytics_hits.pipe new file mode 100644 index 00000000..05400081 --- /dev/null +++ b/alter_default_value_in_column/pipes/analytics_hits.pipe @@ -0,0 +1,65 @@ +DESCRIPTION > + Parsed `page_hit` events, implementing `browser` and `device` detection logic. + + +TOKEN "dashboard" READ + +NODE parsed_hits +DESCRIPTION > + Parse raw page_hit events + +SQL > + + SELECT + timestamp, + action, + version, + coalesce(session_id, '0') as session_id, + JSONExtractString(payload, 'locale') as locale, + JSONExtractString(payload, 'location') as location, + JSONExtractString(payload, 'referrer') as referrer, + JSONExtractString(payload, 'pathname') as pathname, + JSONExtractString(payload, 'href') as href, + lower(JSONExtractString(payload, 'user-agent')) as user_agent + FROM analytics_events + where action = 'page_hit' + + + +NODE endpoint +SQL > + + SELECT + timestamp, + action, + version, + session_id, + location, + referrer, + pathname, + href, + case + when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot') + then 'bot' + when match(user_agent, 'android') + then 'mobile-android' + when match(user_agent, 'ipad|iphone|ipod') + then 'mobile-ios' + else 'desktop' + END as device, + case + when match(user_agent, 'firefox') + then 'firefox' + when match(user_agent, 'chrome|crios') + then 'chrome' + when match(user_agent, 'opera') + then 'opera' + when match(user_agent, 'msie|trident') + then 'ie' + when match(user_agent, 'iphone|ipad|safari') + then 'safari' + else 'Unknown' + END as browser + FROM parsed_hits + + diff --git a/alter_default_value_in_column/pipes/analytics_pages.pipe b/alter_default_value_in_column/pipes/analytics_pages.pipe new file mode 100644 index 00000000..7c034886 --- /dev/null +++ b/alter_default_value_in_column/pipes/analytics_pages.pipe @@ -0,0 +1,21 @@ +NODE analytics_pages_1 +DESCRIPTION > + Aggregate by pathname and calculate session and hits + +SQL > + + SELECT + toDate(timestamp) AS date, + device, + browser, + location, + pathname, + uniqState(session_id) AS visits, + countState() AS hits + FROM analytics_hits + GROUP BY date, device, browser, location, pathname + +TYPE materialized +DATASOURCE analytics_pages_mv + + diff --git a/alter_default_value_in_column/pipes/analytics_sessions.pipe b/alter_default_value_in_column/pipes/analytics_sessions.pipe new file mode 100644 index 00000000..2ad6a3a4 --- /dev/null +++ b/alter_default_value_in_column/pipes/analytics_sessions.pipe @@ -0,0 +1,22 @@ +NODE analytics_sessions_1 +DESCRIPTION > + Aggregate by session_id and calculate session metrics + +SQL > + + SELECT + toDate(timestamp) AS date, + session_id, + anySimpleState(device) AS device, + anySimpleState(browser) AS browser, + anySimpleState(location) AS location, + minSimpleState(timestamp) AS first_hit, + maxSimpleState(timestamp) AS latest_hit, + countState() AS hits + FROM analytics_hits + GROUP BY date, session_id + +TYPE materialized +DATASOURCE analytics_sessions_mv + + diff --git a/alter_default_value_in_column/pipes/analytics_sources.pipe b/alter_default_value_in_column/pipes/analytics_sources.pipe new file mode 100644 index 00000000..9a9acb5e --- /dev/null +++ b/alter_default_value_in_column/pipes/analytics_sources.pipe @@ -0,0 +1,23 @@ +NODE analytics_sources_1 +DESCRIPTION > + Aggregate by referral and calculate session and hits + +SQL > + + WITH (SELECT domainWithoutWWW(href) FROM analytics_hits LIMIT 1) AS currenct_domain + SELECT + toDate(timestamp) AS date, + device, + browser, + location, + referrer, + uniqState(session_id) AS visits, + countState() AS hits + FROM analytics_hits + WHERE domainWithoutWWW(referrer) != currenct_domain + GROUP BY date, device, browser, location, referrer + +TYPE materialized +DATASOURCE analytics_sources_mv + + diff --git a/alter_default_value_in_column/pipes/kpis.pipe b/alter_default_value_in_column/pipes/kpis.pipe new file mode 100644 index 00000000..3dbfb8e8 --- /dev/null +++ b/alter_default_value_in_column/pipes/kpis.pipe @@ -0,0 +1,138 @@ +DESCRIPTION > + Summary with general KPIs per date, including visits, page views, bounce rate and average session duration. + Accepts `date_from` and `date_to` date filter, all historical data if not passed. + Daily granularity, except when filtering one single day (hourly) + + +TOKEN "dashboard" READ + +NODE timeseries +DESCRIPTION > + Generate a timeseries for the specified time range, so we call fill empty data points. + Filters "future" data points. + +SQL > + + % + {% set _single_day = defined(date_from) and day_diff(date_from, date_to) == 0 %} + with + {% if defined(date_from) %} + toStartOfDay( + toDate( + {{ + Date( + date_from, + description="Starting day for filtering a date range", + required=False, + ) + }} + ) + ) as start, + {% else %} toStartOfDay(timestampAdd(today(), interval -7 day)) as start, + {% end %} + {% if defined(date_to) %} + toStartOfDay( + toDate( + {{ + Date( + date_to, + description="Finishing day for filtering a date range", + required=False, + ) + }} + ) + ) as end + {% else %} toStartOfDay(today()) as end + {% end %} + {% if _single_day %} + select + arrayJoin( + arrayMap( + x -> toDateTime(x), + range( + toUInt32(toDateTime(start)), toUInt32(timestampAdd(end, interval 1 day)), 3600 + ) + ) + ) as date + {% else %} + select + arrayJoin( + arrayMap( + x -> toDate(x), + range(toUInt32(start), toUInt32(timestampAdd(end, interval 1 day)), 24 * 3600) + ) + ) as date + {% end %} + where date <= now() + + + +NODE hits +DESCRIPTION > + Group by sessions and calculate metrics at that level + +SQL > + + % + {% if defined(date_from) and day_diff(date_from, date_to) == 0 %} + select + toStartOfHour(timestamp) as date, + session_id, + uniq(session_id) as visits, + count() as pageviews, + case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce, + max(timestamp) as latest_hit_aux, + min(timestamp) as first_hit_aux + from analytics_hits + where toDate(timestamp) = {{ Date(date_from) }} + group by toStartOfHour(timestamp), session_id + {% else %} + select + date, + session_id, + uniq(session_id) as visits, + countMerge(hits) as pageviews, + case when min(first_hit) = max(latest_hit) then 1 else 0 end as is_bounce, + max(latest_hit) as latest_hit_aux, + min(first_hit) as first_hit_aux + from analytics_sessions_mv + where + {% if defined(date_from) %} date >= {{ Date(date_from) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} and date <= {{ Date(date_to) }} + {% else %} and date <= today() + {% end %} + group by date, session_id + {% end %} + + + +NODE data +DESCRIPTION > + General KPIs per date, works for both summary metrics and trends charts. + +SQL > + + select + date, + uniq(session_id) as visits, + sum(pageviews) as pageviews, + sum(case when latest_hit_aux = first_hit_aux then 1 end) / visits as bounce_rate, + avg(latest_hit_aux - first_hit_aux) as avg_session_sec + from hits + group by date + + + +NODE endpoint +DESCRIPTION > + Join and generate timeseries with metrics + +SQL > + + select a.date, b.visits, b.pageviews, b.bounce_rate, b.avg_session_sec + from timeseries a + left join data b using date + + diff --git a/alter_default_value_in_column/pipes/top_browsers.pipe b/alter_default_value_in_column/pipes/top_browsers.pipe new file mode 100644 index 00000000..03c89f0c --- /dev/null +++ b/alter_default_value_in_column/pipes/top_browsers.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top Browsers ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by browser and calcualte hits and visits + +SQL > + + % + select browser, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_sources_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by browser + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_default_value_in_column/pipes/top_devices.pipe b/alter_default_value_in_column/pipes/top_devices.pipe new file mode 100644 index 00000000..400623a9 --- /dev/null +++ b/alter_default_value_in_column/pipes/top_devices.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top Device Types ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by device and calcualte hits and visits + +SQL > + + % + select device, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_sources_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by device + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_default_value_in_column/pipes/top_locations.pipe b/alter_default_value_in_column/pipes/top_locations.pipe new file mode 100644 index 00000000..a433f6c6 --- /dev/null +++ b/alter_default_value_in_column/pipes/top_locations.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top visting Countries ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by pagepath and calcualte hits and visits + +SQL > + + % + select location, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_pages_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by location + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_default_value_in_column/pipes/top_pages.pipe b/alter_default_value_in_column/pipes/top_pages.pipe new file mode 100644 index 00000000..a8527bf5 --- /dev/null +++ b/alter_default_value_in_column/pipes/top_pages.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Most visited pages for a given period. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by pagepath and calcualte hits and visits + +SQL > + + % + select pathname, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_pages_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by pathname + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_default_value_in_column/pipes/top_sources.pipe b/alter_default_value_in_column/pipes/top_sources.pipe new file mode 100644 index 00000000..6b241aed --- /dev/null +++ b/alter_default_value_in_column/pipes/top_sources.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top traffic sources (domains), ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by referral and calcualte hits and visits + +SQL > + + % + select domainWithoutWWW(referrer) as referrer, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_sources_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by referrer + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_default_value_in_column/pipes/trend.pipe b/alter_default_value_in_column/pipes/trend.pipe new file mode 100644 index 00000000..018074f6 --- /dev/null +++ b/alter_default_value_in_column/pipes/trend.pipe @@ -0,0 +1,42 @@ +DESCRIPTION > + Visits trend over time for the last 30 minutes, filling the blanks. + Works great for the realtime chart. + + +TOKEN "dashboard" READ + +NODE timeseries +DESCRIPTION > + Generate a timeseries for the last 30 minutes, so we call fill empty data points + +SQL > + + with (now() - interval 30 minute) as start + select addMinutes(toStartOfMinute(start), number) as t + from (select arrayJoin(range(1, 31)) as number) + + + +NODE hits +DESCRIPTION > + Get last 30 minutes metrics gropued by minute + +SQL > + + select toStartOfMinute(timestamp) as t, uniq(session_id) as visits + from analytics_hits + where timestamp >= (now() - interval 30 minute) + group by toStartOfMinute(timestamp) + order by toStartOfMinute(timestamp) + + + +NODE endpoint +DESCRIPTION > + Join and generate timeseries with metrics for the last 30 minutes + +SQL > + + select a.t, b.visits from timeseries a left join hits b on a.t = b.t order by a.t + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..86035df9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +tinybird-cli>=4,<5 \ No newline at end of file