In [1]:
# pip install https://github.com/googleapis/python-analytics-data
from apiclient.discovery import build 
from oauth2client.service_account import ServiceAccountCredentials
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import DateRange
from google.analytics.data_v1beta.types import Dimensionimport plotly.express as px
from google.analytics.data_v1beta.types import Metric
from google.analytics.data_v1beta.types import RunReportRequest
import plotly.express as px
import plotly.graph_objs as go
import numpy as np
import pandas as pd

In [2]:
# https://lvngd.com/blog/access-the-google-analytics-reporting-api-with-python/

In [3]:
# VIEW ID Gives the analytics tracker you want to connect to (e.g. Wiki, robots, ros.org)
# You'll need one for each property
# Example: https://keyword-hero.com/documentation/finding-your-view-id-in-google-analytics
# Example: https://analytics.google.com/analytics/web/#/a17821189w35999412p183497681/admin/view/settings
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = './ganalyticstest-254023-93ec92d191d6.json'
WIKI_VIEW_ID = '76364704'
DISCOURSE_VIEW_ID = '117310724'
ANSWERS_VIEW_ID = '42382471'
ROSCON_VIEW_ID = '52532925'
ROSORG_VIEW_ID = '79652481'
ROBOTS_VIEW_ID = '129454749'
ROS2_VIEW_ID = '87843515'
INDEX_VIEW_ID = '183497681'

websites = [WIKI_VIEW_ID,DISCOURSE_VIEW_ID,ANSWERS_VIEW_ID,ROSORG_VIEW_ID,ROBOTS_VIEW_ID,INDEX_VIEW_ID]
web_names = ["ROS Wiki","ROS Discourse","ROS Answers","ROS.org","robots.ros.org","ROS Index"]

In [4]:
credentials = ServiceAccountCredentials.from_json_keyfile_name( 
            KEY_FILE_LOCATION, SCOPES)

In [5]:
analytics = build('analyticsreporting', 'v4', credentials=credentials)

In [6]:
# Note, you must grep the keyfile for the e-mail address, 
# e.g. blah@foo.iam.gserviceaccount.com and, and then add it
# to the authorized user account.
# i.e. https://analytics.google.com/analytics/web/#/a17821189w35999412p76364704/admin/suiteusermanagement/account
# See: https://stackoverflow.com/questions/12837748/analytics-google-api-error-403-user-does-not-have-any-google-analytics-account

In [7]:
# API REFERENCE: https://developers.google.com/analytics/devguides/reporting/core/v3/reference#metrics
# Metrics REFERENCE: https://ga-dev-tools.web.app/dimensions-metrics-explorer/

In [42]:
response = analytics.reports().batchGet( 
    body={ 
        'reportRequests': [ 
            { 
                'viewId': ROSCON_VIEW_ID, 
                'dateRanges': [{'startDate': '2021-07-14', 'endDate': '2021-07-14'},
                               {'startDate': '2020-07-14', 'endDate': '2020-07-14'}], 
                'metrics': [{'expression': 'ga:hits'}] 
                }] 
                } 
            ).execute()

In [43]:
response

{'reports': [{'columnHeader': {'metricHeader': {'metricHeaderEntries': [{'name': 'ga:hits',
       'type': 'INTEGER'}]}},
   'data': {'isDataGolden': True,
    'maximums': [{'values': ['257']}, {'values': ['88']}],
    'minimums': [{'values': ['257']}, {'values': ['88']}],
    'rowCount': 1,
    'rows': [{'metrics': [{'values': ['257']}, {'values': ['88']}]}],
    'totals': [{'values': ['257']}, {'values': ['88']}]}}]}

In [81]:
def fetch_stats( viewid, metrics, years,start = "{0}-07-01", stop = "{0}-07-07", result_parser = None):
    results = []
    for idx in range(0,len(years)):
        start_date = start.format(years[idx])
        stop_date = stop.format(years[idx])    
        response = analytics.reports().batchGet( 
            body={ 
                'reportRequests': [ 
                    { 
                        'viewId': viewid, 
                        'dateRanges': [{'startDate': start_date, 'endDate': stop_date}], 
                        'metrics': metrics,
                        }]         start_date = start.format(years[idx])
        stop_date = stop.format(years[idx])    
        response = analytics.reports().batchGet( 
            body={ 
                'reportRequests': [ 
                    { 
                        'viewId': viewid, 
                        'dateRanges': [{'startDate': start_date, 'endDate': stop_date}], 
                        'metrics': metrics,
                        }] 
                    }).execute()
        # this method may change based on datatype
        if result_parser:
            results.append(result_parser(response))
        else:
            results.append(int(response["reports"][0]["data"]["totals"][0]["values"][0]))
    return results

# Rejigger our dataset to have values under a column
# Read through the data and make a column for each website
# and a row for each year

def stats_to_df(years,web_names,stats):
    proto_df = []
    for idx, year in enumerate(years):
        temp = {}
        temp["year"] = year
        for name,stat in zip(web_names,stats):
            temp[name] = int(stat[idx])
        proto_df.append(temp)

    df = pd.DataFrame(data=proto_df)
    return df 

#print(fetch_stats(WIKI_VIEW_ID,metrics,years))

In [82]:
metrics = [{'expression': 'ga:hits'}]
years = [2013,2014,2015,2016,2017,2018,2019,2020,2021]
viewid = WIKI_VIEW_ID

# Main loop, for each web property fetch stats by years. 
hit_stats = []
for web in websites:
    hit_stats.append(fetch_stats(web,metrics,years))

df = stats_to_df(years,web_names,hit_stats)

In [62]:
# Quick print data as a sanity check
for name,stat in zip(web_names,hit_stats):
    print(name,stat)
    
df.head()

ROS Wiki [0, 218840, 267810, 295702, 414117, 486583, 521988, 460224, 456847]
ROS Discourse [0, 0, 0, 1218, 5428, 8808, 12947, 18988, 19329]
ROS Answers [47065, 81665, 66948, 93578, 118528, 143152, 168471, 173500, 191410]
ROS.org [0, 30760, 55555, 65575, 67458, 72083, 77019, 60026, 60371]
robots.ros.org [0, 0, 0, 0, 5573, 5506, 6177, 6325, 5134]
ROS Index [0, 0, 0, 0, 0, 0, 26019, 57177, 9495]


Unnamed: 0,year,ROS Wiki,ROS Discourse,ROS Answers,ROS.org,robots.ros.org,ROS Index
0,2013,0,0,47065,0,0,0
1,2014,218840,0,81665,30760,0,0
2,2015,267810,0,66948,55555,0,0
3,2016,295702,1218,93578,65575,0,0
4,2017,414117,5428,118528,67458,5573,0


In [65]:
fig = px.line(df, x="year", y=df.columns,title="Hits by Year")
fig.show()

In [73]:
metrics = [{'expression': 'ga:newUsers'}]
years = [2013,2014,2015,2016,2017,2018,2019,2020,2021]

# Main loop, for each web property fetch stats by years. 
hit_stats = []
for web in websites:
    hit_stats.append(fetch_stats(web,metrics,years))

df = stats_to_df(years,web_names,hit_stats)
fig = px.line(df, x="year", y=df.columns,title="Users by Year")
fig.show()

In [74]:
metrics = [{'expression': 'ga:users'}]
years = [2013,2014,2015,2016,2017,2018,2019,2020,2021]

# Main loop, for each web property fetch stats by years. 
hit_stats = []
for web in websites:
    hit_stats.append(fetch_stats(web,metrics,years))

df = stats_to_df(years,web_names,hit_stats)
fig = px.line(df, x="year", y=df.columns,title="Users by Year")
fig.show()

In [98]:
metrics = [{'expression': 'ga:operatingSystem'}]
years = [2013,2014,2015,2016,2017,2018,2019,2020,2021]

def print_input(stuff):
    print(stuff)
    return None

# Main loop, for each web property fetch stats by years. 
hit_stats = []
for web in websites:
    hit_stats.append(fetch_stats(web,metrics,years),result_parser=print_input)

#df = stats_to_df(years,web_names,hit_stats)
#fig = px.line(df, x="year", y=df.columns,title="Users by Year")
#fig.show()


HttpError: <HttpError 400 when requesting https://analyticsreporting.googleapis.com/v4/reports:batchGet?alt=json returned "Unknown metric(s): ga:operatingSystem
For details see https://developers.google.com/analytics/devguides/reporting/core/dimsmets.". Details: "Unknown metric(s): ga:operatingSystem
For details see https://developers.google.com/analytics/devguides/reporting/core/dimsmets.">

In [105]:
# This is helpful
# https://ga-dev-tools.web.app/query-explorer/
# https://developers.google.com/analytics/devguides/reporting/core/v4/samples#python

import pprint
pp = pprint.PrettyPrinter(indent=2)

response = analytics.reports().batchGet( 
    body={ 
        'reportRequests': [ 
            { 
                'viewId': ROSCON_VIEW_ID, 
                'dateRanges': [{'startDate': '2019-07-14', 'endDate': '2019-07-14'},
                              {'startDate': '2021-07-14', 'endDate': '2021-07-14'}], 
                'metrics': [{'expression': 'ga:hits'}],
                'dimensions':[{'name':'ga:operatingSystem'}]
                }] 
                } 
            ).execute()
pp.pprint(response)

{ 'reports': [ { 'columnHeader': { 'dimensions': ['ga:operatingSystem'],
                                   'metricHeader': { 'metricHeaderEntries': [ { 'name': 'ga:hits',
                                                                                'type': 'INTEGER'}]}},
                 'data': { 'isDataGolden': True,
                           'maximums': [{'values': ['91']}, {'values': ['92']}],
                           'minimums': [{'values': ['0']}, {'values': ['0']}],
                           'rowCount': 7,
                           'rows': [ { 'dimensions': ['(not set)'],
                                       'metrics': [ {'values': ['0']},
                                                    {'values': ['1']}]},
                                     { 'dimensions': ['Android'],
                                       'metrics': [ {'values': ['28']},
                                                    {'values': ['27']}]},
                                     { 'dimensions