diff --git a/alter_column_to_nullable/.tinyenv b/alter_column_to_nullable/.tinyenv new file mode 100644 index 00000000..3281ddfa --- /dev/null +++ b/alter_column_to_nullable/.tinyenv @@ -0,0 +1,31 @@ + +# VERSION format is major.minor.patch-post where major, minor, patch and post are integer numbers +# bump post to deploy to the current live Release, rollback to previous post version is not available +# bump patch or minor to deploy a new Release and auto-promote it to live. Add TB_AUTO_PROMOTE=0 to create the Release in preview status +# bump major to deploy a new Release in preview status +VERSION=0.0.0 + + + +########## +# OPTIONAL env vars +# Deploy a new Release in preview status (default is 1) +# TB_AUTO_PROMOTE=0 + +# Check if deploy requires backfilling on preview (default is 1) +# TB_CHECK_BACKFILL_REQUIRED=0 + +# Force old Releases deletion on promote (default is 0) +# Setting it to 1 will remove oldest rollback Releases even when some resource is still in use +# TB_FORCE_REMOVE_OLDEST_ROLLBACK=0 + +# Don't print CLI version warning message if there's a new available version +# TB_VERSION_WARNING=0 + +# Skip regression tests +# TB_SKIP_REGRESSION=0 + +# Use `OBFUSCATE_REGEX_PATTERN` and `OBFUSCATE_PATTERN_SEPARATOR` environment variables to define a regex pattern and a separator (in case of a single string with multiple regex) to obfuscate secrets in the CLI output. +# OBFUSCATE_REGEX_PATTERN="https://(www\.)?[^/]+||^Follow these instructions =>" +# OBFUSCATE_PATTERN_SEPARATOR=|| +########## diff --git a/alter_column_to_nullable/datasources/analytics_events.datasource b/alter_column_to_nullable/datasources/analytics_events.datasource new file mode 100644 index 00000000..241f6984 --- /dev/null +++ b/alter_column_to_nullable/datasources/analytics_events.datasource @@ -0,0 +1,16 @@ +TOKEN "tracker" APPEND + +DESCRIPTION > + Analytics events landing data source + +SCHEMA > + `timestamp` DateTime `json:$.timestamp`, + `session_id` String `json:$.session_id`, + `action` LowCardinality(String) `json:$.action`, + `version` LowCardinality(String) `json:$.version`, + `payload` String `json:$.payload` + +ENGINE "MergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(timestamp)" +ENGINE_SORTING_KEY "timestamp" +ENGINE_TTL "timestamp + toIntervalDay(60)" diff --git a/alter_column_to_nullable/datasources/analytics_pages_mv.datasource b/alter_column_to_nullable/datasources/analytics_pages_mv.datasource new file mode 100644 index 00000000..68a76061 --- /dev/null +++ b/alter_column_to_nullable/datasources/analytics_pages_mv.datasource @@ -0,0 +1,13 @@ + +SCHEMA > + `date` Date, + `device` String, + `browser` String, + `location` String, + `pathname` String, + `visits` AggregateFunction(uniq, String), + `hits` AggregateFunction(count) + +ENGINE "AggregatingMergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(date)" +ENGINE_SORTING_KEY "date, device, browser, location, pathname" diff --git a/alter_column_to_nullable/datasources/analytics_sessions_mv.datasource b/alter_column_to_nullable/datasources/analytics_sessions_mv.datasource new file mode 100644 index 00000000..267771df --- /dev/null +++ b/alter_column_to_nullable/datasources/analytics_sessions_mv.datasource @@ -0,0 +1,14 @@ + +SCHEMA > + `date` Date, + `session_id` String, + `device` SimpleAggregateFunction(any, String), + `browser` SimpleAggregateFunction(any, String), + `location` SimpleAggregateFunction(any, String), + `first_hit` SimpleAggregateFunction(min, DateTime), + `latest_hit` SimpleAggregateFunction(max, DateTime), + `hits` AggregateFunction(count) + +ENGINE "AggregatingMergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(date)" +ENGINE_SORTING_KEY "date, session_id" diff --git a/alter_column_to_nullable/datasources/analytics_sources_mv.datasource b/alter_column_to_nullable/datasources/analytics_sources_mv.datasource new file mode 100644 index 00000000..7235d468 --- /dev/null +++ b/alter_column_to_nullable/datasources/analytics_sources_mv.datasource @@ -0,0 +1,13 @@ + +SCHEMA > + `date` Date, + `device` String, + `browser` String, + `location` String, + `referrer` String, + `visits` AggregateFunction(uniq, String), + `hits` AggregateFunction(count) + +ENGINE "AggregatingMergeTree" +ENGINE_PARTITION_KEY "toYYYYMM(date)" +ENGINE_SORTING_KEY "date, device, browser, location, referrer" diff --git a/alter_column_to_nullable/datasources/fixtures/.gitkeep b/alter_column_to_nullable/datasources/fixtures/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/alter_column_to_nullable/pipes/analytics_hits.pipe b/alter_column_to_nullable/pipes/analytics_hits.pipe new file mode 100644 index 00000000..05400081 --- /dev/null +++ b/alter_column_to_nullable/pipes/analytics_hits.pipe @@ -0,0 +1,65 @@ +DESCRIPTION > + Parsed `page_hit` events, implementing `browser` and `device` detection logic. + + +TOKEN "dashboard" READ + +NODE parsed_hits +DESCRIPTION > + Parse raw page_hit events + +SQL > + + SELECT + timestamp, + action, + version, + coalesce(session_id, '0') as session_id, + JSONExtractString(payload, 'locale') as locale, + JSONExtractString(payload, 'location') as location, + JSONExtractString(payload, 'referrer') as referrer, + JSONExtractString(payload, 'pathname') as pathname, + JSONExtractString(payload, 'href') as href, + lower(JSONExtractString(payload, 'user-agent')) as user_agent + FROM analytics_events + where action = 'page_hit' + + + +NODE endpoint +SQL > + + SELECT + timestamp, + action, + version, + session_id, + location, + referrer, + pathname, + href, + case + when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot') + then 'bot' + when match(user_agent, 'android') + then 'mobile-android' + when match(user_agent, 'ipad|iphone|ipod') + then 'mobile-ios' + else 'desktop' + END as device, + case + when match(user_agent, 'firefox') + then 'firefox' + when match(user_agent, 'chrome|crios') + then 'chrome' + when match(user_agent, 'opera') + then 'opera' + when match(user_agent, 'msie|trident') + then 'ie' + when match(user_agent, 'iphone|ipad|safari') + then 'safari' + else 'Unknown' + END as browser + FROM parsed_hits + + diff --git a/alter_column_to_nullable/pipes/analytics_pages.pipe b/alter_column_to_nullable/pipes/analytics_pages.pipe new file mode 100644 index 00000000..7c034886 --- /dev/null +++ b/alter_column_to_nullable/pipes/analytics_pages.pipe @@ -0,0 +1,21 @@ +NODE analytics_pages_1 +DESCRIPTION > + Aggregate by pathname and calculate session and hits + +SQL > + + SELECT + toDate(timestamp) AS date, + device, + browser, + location, + pathname, + uniqState(session_id) AS visits, + countState() AS hits + FROM analytics_hits + GROUP BY date, device, browser, location, pathname + +TYPE materialized +DATASOURCE analytics_pages_mv + + diff --git a/alter_column_to_nullable/pipes/analytics_sessions.pipe b/alter_column_to_nullable/pipes/analytics_sessions.pipe new file mode 100644 index 00000000..2ad6a3a4 --- /dev/null +++ b/alter_column_to_nullable/pipes/analytics_sessions.pipe @@ -0,0 +1,22 @@ +NODE analytics_sessions_1 +DESCRIPTION > + Aggregate by session_id and calculate session metrics + +SQL > + + SELECT + toDate(timestamp) AS date, + session_id, + anySimpleState(device) AS device, + anySimpleState(browser) AS browser, + anySimpleState(location) AS location, + minSimpleState(timestamp) AS first_hit, + maxSimpleState(timestamp) AS latest_hit, + countState() AS hits + FROM analytics_hits + GROUP BY date, session_id + +TYPE materialized +DATASOURCE analytics_sessions_mv + + diff --git a/alter_column_to_nullable/pipes/analytics_sources.pipe b/alter_column_to_nullable/pipes/analytics_sources.pipe new file mode 100644 index 00000000..9a9acb5e --- /dev/null +++ b/alter_column_to_nullable/pipes/analytics_sources.pipe @@ -0,0 +1,23 @@ +NODE analytics_sources_1 +DESCRIPTION > + Aggregate by referral and calculate session and hits + +SQL > + + WITH (SELECT domainWithoutWWW(href) FROM analytics_hits LIMIT 1) AS currenct_domain + SELECT + toDate(timestamp) AS date, + device, + browser, + location, + referrer, + uniqState(session_id) AS visits, + countState() AS hits + FROM analytics_hits + WHERE domainWithoutWWW(referrer) != currenct_domain + GROUP BY date, device, browser, location, referrer + +TYPE materialized +DATASOURCE analytics_sources_mv + + diff --git a/alter_column_to_nullable/pipes/kpis.pipe b/alter_column_to_nullable/pipes/kpis.pipe new file mode 100644 index 00000000..3dbfb8e8 --- /dev/null +++ b/alter_column_to_nullable/pipes/kpis.pipe @@ -0,0 +1,138 @@ +DESCRIPTION > + Summary with general KPIs per date, including visits, page views, bounce rate and average session duration. + Accepts `date_from` and `date_to` date filter, all historical data if not passed. + Daily granularity, except when filtering one single day (hourly) + + +TOKEN "dashboard" READ + +NODE timeseries +DESCRIPTION > + Generate a timeseries for the specified time range, so we call fill empty data points. + Filters "future" data points. + +SQL > + + % + {% set _single_day = defined(date_from) and day_diff(date_from, date_to) == 0 %} + with + {% if defined(date_from) %} + toStartOfDay( + toDate( + {{ + Date( + date_from, + description="Starting day for filtering a date range", + required=False, + ) + }} + ) + ) as start, + {% else %} toStartOfDay(timestampAdd(today(), interval -7 day)) as start, + {% end %} + {% if defined(date_to) %} + toStartOfDay( + toDate( + {{ + Date( + date_to, + description="Finishing day for filtering a date range", + required=False, + ) + }} + ) + ) as end + {% else %} toStartOfDay(today()) as end + {% end %} + {% if _single_day %} + select + arrayJoin( + arrayMap( + x -> toDateTime(x), + range( + toUInt32(toDateTime(start)), toUInt32(timestampAdd(end, interval 1 day)), 3600 + ) + ) + ) as date + {% else %} + select + arrayJoin( + arrayMap( + x -> toDate(x), + range(toUInt32(start), toUInt32(timestampAdd(end, interval 1 day)), 24 * 3600) + ) + ) as date + {% end %} + where date <= now() + + + +NODE hits +DESCRIPTION > + Group by sessions and calculate metrics at that level + +SQL > + + % + {% if defined(date_from) and day_diff(date_from, date_to) == 0 %} + select + toStartOfHour(timestamp) as date, + session_id, + uniq(session_id) as visits, + count() as pageviews, + case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce, + max(timestamp) as latest_hit_aux, + min(timestamp) as first_hit_aux + from analytics_hits + where toDate(timestamp) = {{ Date(date_from) }} + group by toStartOfHour(timestamp), session_id + {% else %} + select + date, + session_id, + uniq(session_id) as visits, + countMerge(hits) as pageviews, + case when min(first_hit) = max(latest_hit) then 1 else 0 end as is_bounce, + max(latest_hit) as latest_hit_aux, + min(first_hit) as first_hit_aux + from analytics_sessions_mv + where + {% if defined(date_from) %} date >= {{ Date(date_from) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} and date <= {{ Date(date_to) }} + {% else %} and date <= today() + {% end %} + group by date, session_id + {% end %} + + + +NODE data +DESCRIPTION > + General KPIs per date, works for both summary metrics and trends charts. + +SQL > + + select + date, + uniq(session_id) as visits, + sum(pageviews) as pageviews, + sum(case when latest_hit_aux = first_hit_aux then 1 end) / visits as bounce_rate, + avg(latest_hit_aux - first_hit_aux) as avg_session_sec + from hits + group by date + + + +NODE endpoint +DESCRIPTION > + Join and generate timeseries with metrics + +SQL > + + select a.date, b.visits, b.pageviews, b.bounce_rate, b.avg_session_sec + from timeseries a + left join data b using date + + diff --git a/alter_column_to_nullable/pipes/top_browsers.pipe b/alter_column_to_nullable/pipes/top_browsers.pipe new file mode 100644 index 00000000..03c89f0c --- /dev/null +++ b/alter_column_to_nullable/pipes/top_browsers.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top Browsers ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by browser and calcualte hits and visits + +SQL > + + % + select browser, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_sources_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by browser + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_column_to_nullable/pipes/top_devices.pipe b/alter_column_to_nullable/pipes/top_devices.pipe new file mode 100644 index 00000000..400623a9 --- /dev/null +++ b/alter_column_to_nullable/pipes/top_devices.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top Device Types ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by device and calcualte hits and visits + +SQL > + + % + select device, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_sources_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by device + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_column_to_nullable/pipes/top_locations.pipe b/alter_column_to_nullable/pipes/top_locations.pipe new file mode 100644 index 00000000..a433f6c6 --- /dev/null +++ b/alter_column_to_nullable/pipes/top_locations.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top visting Countries ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by pagepath and calcualte hits and visits + +SQL > + + % + select location, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_pages_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by location + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_column_to_nullable/pipes/top_pages.pipe b/alter_column_to_nullable/pipes/top_pages.pipe new file mode 100644 index 00000000..a8527bf5 --- /dev/null +++ b/alter_column_to_nullable/pipes/top_pages.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Most visited pages for a given period. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by pagepath and calcualte hits and visits + +SQL > + + % + select pathname, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_pages_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by pathname + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_column_to_nullable/pipes/top_sources.pipe b/alter_column_to_nullable/pipes/top_sources.pipe new file mode 100644 index 00000000..146b5a98 --- /dev/null +++ b/alter_column_to_nullable/pipes/top_sources.pipe @@ -0,0 +1,35 @@ +DESCRIPTION > + Top traffic sources (domains), ordered by most visits. + Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. + Also `skip` and `limit` parameters for pagination. + + +TOKEN "dashboard" READ + +NODE endpoint +DESCRIPTION > + Group by referral and calcualte hits and visits. + +SQL > + + % + select domainWithoutWWW(referrer) as referrer, uniqMerge(visits) as visits, countMerge(hits) as hits + from analytics_sources_mv + where + {% if defined(date_from) %} + date + >= + {{ Date(date_from, description="Starting day for filtering a date range", required=False) }} + {% else %} date >= timestampAdd(today(), interval -7 day) + {% end %} + {% if defined(date_to) %} + and date + <= + {{ Date(date_to, description="Finishing day for filtering a date range", required=False) }} + {% else %} and date <= today() + {% end %} + group by referrer + order by visits desc + limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }} + + diff --git a/alter_column_to_nullable/pipes/trend.pipe b/alter_column_to_nullable/pipes/trend.pipe new file mode 100644 index 00000000..018074f6 --- /dev/null +++ b/alter_column_to_nullable/pipes/trend.pipe @@ -0,0 +1,42 @@ +DESCRIPTION > + Visits trend over time for the last 30 minutes, filling the blanks. + Works great for the realtime chart. + + +TOKEN "dashboard" READ + +NODE timeseries +DESCRIPTION > + Generate a timeseries for the last 30 minutes, so we call fill empty data points + +SQL > + + with (now() - interval 30 minute) as start + select addMinutes(toStartOfMinute(start), number) as t + from (select arrayJoin(range(1, 31)) as number) + + + +NODE hits +DESCRIPTION > + Get last 30 minutes metrics gropued by minute + +SQL > + + select toStartOfMinute(timestamp) as t, uniq(session_id) as visits + from analytics_hits + where timestamp >= (now() - interval 30 minute) + group by toStartOfMinute(timestamp) + order by toStartOfMinute(timestamp) + + + +NODE endpoint +DESCRIPTION > + Join and generate timeseries with metrics for the last 30 minutes + +SQL > + + select a.t, b.visits from timeseries a left join hits b on a.t = b.t order by a.t + + diff --git a/alter_column_to_nullable/requirements.txt b/alter_column_to_nullable/requirements.txt new file mode 100644 index 00000000..86035df9 --- /dev/null +++ b/alter_column_to_nullable/requirements.txt @@ -0,0 +1 @@ +tinybird-cli>=4,<5 \ No newline at end of file diff --git a/alter_column_to_nullable/scripts/append_fixtures.sh b/alter_column_to_nullable/scripts/append_fixtures.sh new file mode 100755 index 00000000..e8745565 --- /dev/null +++ b/alter_column_to_nullable/scripts/append_fixtures.sh @@ -0,0 +1,21 @@ + +#!/usr/bin/env bash +set -euxo pipefail + +directory="datasources/fixtures" +extensions=("csv" "ndjson") + +absolute_directory=$(realpath "$directory") + +for extension in "${extensions[@]}"; do + file_list=$(find "$absolute_directory" -type f -name "*.$extension") + + for file_path in $file_list; do + file_name=$(basename "$file_path") + file_name_without_extension="${file_name%.*}" + + command="tb datasource append $file_name_without_extension datasources/fixtures/$file_name" + echo $command + $command + done +done diff --git a/alter_column_to_nullable/scripts/exec_test.sh b/alter_column_to_nullable/scripts/exec_test.sh new file mode 100755 index 00000000..50571d95 --- /dev/null +++ b/alter_column_to_nullable/scripts/exec_test.sh @@ -0,0 +1,58 @@ + +#!/usr/bin/env bash +set -euxo pipefail + +export TB_VERSION_WARNING=0 + +run_test() { + t=$1 + echo "** Running $t **" + echo "** $(cat $t)" + tmpfile=$(mktemp) + retries=0 + TOTAL_RETRIES=3 + + # When appending fixtures, we need to retry in case of the data is not replicated in time + while [ $retries -lt $TOTAL_RETRIES ]; do + # Run the test and store the output in a temporary file + bash $t $2 >$tmpfile + exit_code=$? + if [ "$exit_code" -eq 0 ]; then + # If the test passed, break the loop + if diff -B ${t}.result $tmpfile >/dev/null 2>&1; then + break + # If the test failed, increment the retries counter and try again + else + retries=$((retries+1)) + fi + # If the bash command failed, print an error message and break the loop + else + break + fi + done + + if diff -B ${t}.result $tmpfile >/dev/null 2>&1; then + echo "✅ Test $t passed" + rm $tmpfile + return 0 + elif [ $retries -eq $TOTAL_RETRIES ]; then + echo "🚨 ERROR: Test $t failed, diff:"; + diff -B ${t}.result $tmpfile + rm $tmpfile + return 1 + else + echo "🚨 ERROR: Test $t failed with bash command exit code $?" + cat $tmpfile + rm $tmpfile + return 1 + fi + echo "" +} +export -f run_test + +fail=0 +find ./tests -name "*.test" -print0 | xargs -0 -I {} -P 4 bash -c 'run_test "$@"' _ {} || fail=1 + +if [ $fail == 1 ]; then + exit -1; +fi diff --git a/alter_column_to_nullable/tests/.gitkeep b/alter_column_to_nullable/tests/.gitkeep new file mode 100644 index 00000000..e69de29b