# [GA360] Audience KPIs

## Initiate connection

In [1]:
# Import libraries
import pandas as pd
from google.cloud import bigquery

# Launch BigQuery client from the JSON credentials file
bq_client = bigquery.Client.from_service_account_json('../bigquery_creds.json')

# Helper function for queries to be returned as a Dataframe
def bquery(sql):
    return bq_client.query(sql).to_dataframe()

## Users, sessions and pageviews

In [2]:
# Sessions and page views for 10 days
sql = """
SELECT 
    PARSE_DATE("%Y%m%d", date) AS date,
    COUNT(DISTINCT fullVisitorId) AS visitors,
    SUM(totals.visits) AS sessions,
    SUM(totals.pageviews) AS pageviews,
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
WHERE 
    _TABLE_SUFFIX BETWEEN '20170701' AND '20170710'
GROUP BY 1
ORDER BY 1
"""
bquery(sql)

Unnamed: 0,date,visitors,sessions,pageviews
0,2017-07-01,1946,2048,6562
1,2017-07-02,1791,1895,5637
2,2017-07-03,1904,2046,6492
3,2017-07-04,1799,1938,5740
4,2017-07-05,2683,2885,9927
5,2017-07-06,2478,2658,8924
6,2017-07-07,2213,2450,9266
7,2017-07-08,1732,1859,6087
8,2017-07-09,1761,1921,6523
9,2017-07-10,2512,2769,10183


## Breakdown by devices and channels

In [3]:
# Traffic KPIs broken down by device and acquisition channel
sql = """
SELECT 
    device.deviceCategory AS device,
    channelGrouping AS channel_grouping,
    SUM(totals.visits) AS sessions,
    SUM(totals.pageviews) AS pageviews
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
WHERE 
    _TABLE_SUFFIX BETWEEN '20170701' AND '20170710'
GROUP BY 1,2
ORDER BY 1,2
"""
bquery(sql)

Unnamed: 0,device,channel_grouping,sessions,pageviews
0,desktop,Affiliates,557,1543
1,desktop,Direct,1800,6611
2,desktop,Display,74,317
3,desktop,Organic Search,6504,22545
4,desktop,Paid Search,333,1630
5,desktop,Referral,2172,12220
6,desktop,Social,1893,4186
7,mobile,Affiliates,119,451
8,mobile,Direct,2016,5235
9,mobile,Display,32,134


## Traffic stickiness indicators

In [4]:
# Pages per session, average sessions duration (in seconds) and bounce rate
sql = """
SELECT 
    PARSE_DATE("%Y%m%d", date) AS date,
    SUM(totals.visits) AS sessions,
    SUM(totals.pageviews)/SUM(totals.visits) AS page_per_session,
    SUM(totals.timeOnSite)/SUM(totals.visits) AS avg_session_duration,
    SUM(totals.bounces)/SUM(totals.visits) AS bounce_rate
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
WHERE 
    _TABLE_SUFFIX BETWEEN '20170701' AND '20170710'
GROUP BY 1
ORDER BY 1
"""
bquery(sql)

Unnamed: 0,date,sessions,page_per_session,avg_session_duration,bounce_rate
0,2017-07-01,2048,3.204102,103.76416,0.581055
1,2017-07-02,1895,2.97467,101.568338,0.568865
2,2017-07-03,2046,3.173021,119.06305,0.558162
3,2017-07-04,1938,2.961816,107.820433,0.567079
4,2017-07-05,2885,3.440901,119.314038,0.515425
5,2017-07-06,2658,3.357412,115.75997,0.504515
6,2017-07-07,2450,3.782041,146.904082,0.489796
7,2017-07-08,1859,3.274341,114.330285,0.53631
8,2017-07-09,1921,3.395627,114.941176,0.54659
9,2017-07-10,2769,3.677501,142.385338,0.514265


## Transactions

In [5]:
# Transactions and revenue
sql = """
SELECT 
    PARSE_DATE("%Y%m%d", date) AS date,
    SUM(totals.visits) AS sessions,
    SUM(totals.transactions) AS transactions,
    SUM(totals.transactions)/SUM(totals.visits) AS conversion_rate,
    SUM(totals.transactionRevenue)/1e6 AS revenue,
    SUM(totals.transactionRevenue)/1e6/SUM(totals.transactions) AS avg_purchase_value
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`
WHERE 
    _TABLE_SUFFIX BETWEEN '20170701' AND '20170710'
GROUP BY 1
ORDER BY 1
"""
bquery(sql)

Unnamed: 0,date,sessions,transactions,conversion_rate,revenue,avg_purchase_value
0,2017-07-01,2048,3,0.001465,84.54,28.18
1,2017-07-02,1895,8,0.004222,634.99,79.37375
2,2017-07-03,2046,15,0.007331,1225.81,81.720667
3,2017-07-04,1938,7,0.003612,379.98,54.282857
4,2017-07-05,2885,42,0.014558,8029.36,191.175238
5,2017-07-06,2658,31,0.011663,3883.85,125.285484
6,2017-07-07,2450,40,0.016327,4339.02,108.4755
7,2017-07-08,1859,14,0.007531,454.96,32.497143
8,2017-07-09,1921,19,0.009891,751.1,39.531579
9,2017-07-10,2769,47,0.016974,4718.07,100.384468


## Goals

Several types of **Goals** can be [defined](https://support.google.com/analytics/answer/1012040)  in Google Analytics:
* Destination page: e.g. */registersuccess.html*
* Duration: e.g. mininum 5 minutes per session
* Pages per session: e.g. >10 pages per session
* Event: e.g. click on a video

Goals are not computed in BigQuery raw logs, they have to be extracted with a query. Unlike queries above, that made use of the pre-aggregated metrics in the `totals` field, we now need to unnest the `hits` nested field to extract pages paths, so we compute sessions differently, by counting the number of distinct sessions IDs.

In [6]:
# Compute if a goal has been reached in each session
sql = """
WITH sub1 AS (
    SELECT 
        PARSE_DATE("%Y%m%d", date) AS date,
        CONCAT(fullVisitorId, CAST(visitId AS STRING)) AS session_id,
        -- Goal: browsed to page /registersuccess.html
        MAX(IF(hits.page.pagePath = '/registersuccess.html', 1, 0)) AS goal_destination,
        -- Goal: session duration >= 5 min
        MAX(IF(totals.timeOnSite >= 300, 1, 0)) AS goal_duration,
        -- Goal: pages per session > 10
        MAX(IF(totals.pageviews > 10, 1, 0)) AS goal_pageviews
    FROM 
        `bigquery-public-data.google_analytics_sample.ga_sessions_*`,
        UNNEST(hits) AS hits
    WHERE 
        _TABLE_SUFFIX BETWEEN '20170701' AND '20170710'
        AND totals.visits > 0
    GROUP BY date, session_id
)

SELECT 
    date,
    COUNT(session_id) AS sessions,
    SUM(goal_destination) AS goals_destination,
    SUM(goal_duration) AS goals_duration,
    SUM(goal_pageviews) AS goals_pageviews
FROM sub1
GROUP BY date
ORDER BY date
"""
bquery(sql)

Unnamed: 0,date,sessions,goals_destination,goals_duration,goals_pageviews
0,2017-07-01,2048,30,181,114
1,2017-07-02,1895,32,168,104
2,2017-07-03,2046,39,214,116
3,2017-07-04,1938,31,194,94
4,2017-07-05,2885,53,312,202
5,2017-07-06,2658,43,280,156
6,2017-07-07,2450,55,298,195
7,2017-07-08,1859,34,208,114
8,2017-07-09,1921,39,186,129
9,2017-07-10,2769,72,338,220
