Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
TOKEN "tracker" APPEND

DESCRIPTION >
Analytics events landing data source

SCHEMA >
`timestamp` DateTime `json:$.timestamp`,
`session_id` String `json:$.session_id`,
`action` LowCardinality(String) `json:$.action`,
`version` LowCardinality(String) `json:$.version`,
`payload` String `json:$.payload`

ENGINE "MergeTree"
ENGINE_PARTITION_KEY "toYYYYMM(timestamp)"
ENGINE_SORTING_KEY "timestamp"
ENGINE_TTL "timestamp + toIntervalDay(60)"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

SCHEMA >
`date` Date,
`device` String,
`browser` String,
`location` String,
`pathname` String,
`visits` AggregateFunction(uniq, String),
`hits` AggregateFunction(count)

ENGINE "AggregatingMergeTree"
ENGINE_PARTITION_KEY "toYYYYMM(date)"
ENGINE_SORTING_KEY "date, device, browser, location, pathname"
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

SCHEMA >
`date` Date,
`session_id` String,
`device` SimpleAggregateFunction(any, String),
`browser` SimpleAggregateFunction(any, String),
`location` SimpleAggregateFunction(any, String),
`first_hit` SimpleAggregateFunction(min, DateTime),
`latest_hit` SimpleAggregateFunction(max, DateTime),
`hits` AggregateFunction(count)

ENGINE "AggregatingMergeTree"
ENGINE_PARTITION_KEY "toYYYYMM(date)"
ENGINE_SORTING_KEY "date, session_id"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

SCHEMA >
`date` Date,
`device` String,
`browser` String,
`location` String,
`referrer` String,
`visits` AggregateFunction(uniq, String),
`hits` AggregateFunction(count)

ENGINE "AggregatingMergeTree"
ENGINE_PARTITION_KEY "toYYYYMM(date)"
ENGINE_SORTING_KEY "date, device, browser, location, referrer"
65 changes: 65 additions & 0 deletions alter_default_value_in_column/pipes/analytics_hits.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
DESCRIPTION >
Parsed `page_hit` events, implementing `browser` and `device` detection logic.


TOKEN "dashboard" READ

NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events

SQL >

SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'



NODE endpoint
SQL >

SELECT
timestamp,
action,
version,
session_id,
location,
referrer,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits


21 changes: 21 additions & 0 deletions alter_default_value_in_column/pipes/analytics_pages.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
NODE analytics_pages_1
DESCRIPTION >
Aggregate by pathname and calculate session and hits

SQL >

SELECT
toDate(timestamp) AS date,
device,
browser,
location,
pathname,
uniqState(session_id) AS visits,
countState() AS hits
FROM analytics_hits
GROUP BY date, device, browser, location, pathname

TYPE materialized
DATASOURCE analytics_pages_mv


22 changes: 22 additions & 0 deletions alter_default_value_in_column/pipes/analytics_sessions.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
NODE analytics_sessions_1
DESCRIPTION >
Aggregate by session_id and calculate session metrics

SQL >

SELECT
toDate(timestamp) AS date,
session_id,
anySimpleState(device) AS device,
anySimpleState(browser) AS browser,
anySimpleState(location) AS location,
minSimpleState(timestamp) AS first_hit,
maxSimpleState(timestamp) AS latest_hit,
countState() AS hits
FROM analytics_hits
GROUP BY date, session_id

TYPE materialized
DATASOURCE analytics_sessions_mv


23 changes: 23 additions & 0 deletions alter_default_value_in_column/pipes/analytics_sources.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
NODE analytics_sources_1
DESCRIPTION >
Aggregate by referral and calculate session and hits

SQL >

WITH (SELECT domainWithoutWWW(href) FROM analytics_hits LIMIT 1) AS currenct_domain
SELECT
toDate(timestamp) AS date,
device,
browser,
location,
referrer,
uniqState(session_id) AS visits,
countState() AS hits
FROM analytics_hits
WHERE domainWithoutWWW(referrer) != currenct_domain
GROUP BY date, device, browser, location, referrer

TYPE materialized
DATASOURCE analytics_sources_mv


138 changes: 138 additions & 0 deletions alter_default_value_in_column/pipes/kpis.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
DESCRIPTION >
Summary with general KPIs per date, including visits, page views, bounce rate and average session duration.
Accepts `date_from` and `date_to` date filter, all historical data if not passed.
Daily granularity, except when filtering one single day (hourly)


TOKEN "dashboard" READ

NODE timeseries
DESCRIPTION >
Generate a timeseries for the specified time range, so we call fill empty data points.
Filters "future" data points.

SQL >

%
{% set _single_day = defined(date_from) and day_diff(date_from, date_to) == 0 %}
with
{% if defined(date_from) %}
toStartOfDay(
toDate(
{{
Date(
date_from,
description="Starting day for filtering a date range",
required=False,
)
}}
)
) as start,
{% else %} toStartOfDay(timestampAdd(today(), interval -7 day)) as start,
{% end %}
{% if defined(date_to) %}
toStartOfDay(
toDate(
{{
Date(
date_to,
description="Finishing day for filtering a date range",
required=False,
)
}}
)
) as end
{% else %} toStartOfDay(today()) as end
{% end %}
{% if _single_day %}
select
arrayJoin(
arrayMap(
x -> toDateTime(x),
range(
toUInt32(toDateTime(start)), toUInt32(timestampAdd(end, interval 1 day)), 3600
)
)
) as date
{% else %}
select
arrayJoin(
arrayMap(
x -> toDate(x),
range(toUInt32(start), toUInt32(timestampAdd(end, interval 1 day)), 24 * 3600)
)
) as date
{% end %}
where date <= now()



NODE hits
DESCRIPTION >
Group by sessions and calculate metrics at that level

SQL >

%
{% if defined(date_from) and day_diff(date_from, date_to) == 0 %}
select
toStartOfHour(timestamp) as date,
session_id,
uniq(session_id) as visits,
count() as pageviews,
case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce,
max(timestamp) as latest_hit_aux,
min(timestamp) as first_hit_aux
from analytics_hits
where toDate(timestamp) = {{ Date(date_from) }}
group by toStartOfHour(timestamp), session_id
{% else %}
select
date,
session_id,
uniq(session_id) as visits,
countMerge(hits) as pageviews,
case when min(first_hit) = max(latest_hit) then 1 else 0 end as is_bounce,
max(latest_hit) as latest_hit_aux,
min(first_hit) as first_hit_aux
from analytics_sessions_mv
where
{% if defined(date_from) %} date >= {{ Date(date_from) }}
{% else %} date >= timestampAdd(today(), interval -7 day)
{% end %}
{% if defined(date_to) %} and date <= {{ Date(date_to) }}
{% else %} and date <= today()
{% end %}
group by date, session_id
{% end %}



NODE data
DESCRIPTION >
General KPIs per date, works for both summary metrics and trends charts.

SQL >

select
date,
uniq(session_id) as visits,
sum(pageviews) as pageviews,
sum(case when latest_hit_aux = first_hit_aux then 1 end) / visits as bounce_rate,
avg(latest_hit_aux - first_hit_aux) as avg_session_sec
from hits
group by date



NODE endpoint
DESCRIPTION >
Join and generate timeseries with metrics

SQL >

select a.date, b.visits, b.pageviews, b.bounce_rate, b.avg_session_sec
from timeseries a
left join data b using date


35 changes: 35 additions & 0 deletions alter_default_value_in_column/pipes/top_browsers.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
DESCRIPTION >
Top Browsers ordered by most visits.
Accepts `date_from` and `date_to` date filter. Defaults to last 7 days.
Also `skip` and `limit` parameters for pagination.


TOKEN "dashboard" READ

NODE endpoint
DESCRIPTION >
Group by browser and calcualte hits and visits

SQL >

%
select browser, uniqMerge(visits) as visits, countMerge(hits) as hits
from analytics_sources_mv
where
{% if defined(date_from) %}
date
>=
{{ Date(date_from, description="Starting day for filtering a date range", required=False) }}
{% else %} date >= timestampAdd(today(), interval -7 day)
{% end %}
{% if defined(date_to) %}
and date
<=
{{ Date(date_to, description="Finishing day for filtering a date range", required=False) }}
{% else %} and date <= today()
{% end %}
group by browser
order by visits desc
limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }}


Loading