# Guide to Google Analytics with Python

By Woratana Ngarmtrakulchol [ http://www.byperth.com ]

## Getting started

1. Install Google API Client

In [1]:
# Install Google API Client
#https://medium.com/dataly-data-science-thailand/google-analytics-api-how-to-get-data-from-google-analytics-with-python-in-jupyter-notebook-with-85483dd73e22
!pip install --upgrade google-api-python-client

Collecting google-api-python-client
  Downloading https://files.pythonhosted.org/packages/56/04/5259a17a16a779426f6e2ac62796135b0d4a59cf8033a21037fd4ba5bf81/google_api_python_client-1.7.4-py3-none-any.whl (55kB)
Collecting uritemplate<4dev,>=3.0.0 (from google-api-python-client)
  Downloading https://files.pythonhosted.org/packages/e5/7d/9d5a640c4f8bf2c8b1afc015e9a9d8de32e13c9016dcc4b0ec03481fb396/uritemplate-3.0.0-py2.py3-none-any.whl
Collecting httplib2<1dev,>=0.9.2 (from google-api-python-client)
  Downloading https://files.pythonhosted.org/packages/fd/ce/aa4a385e3e9fd351737fd2b07edaa56e7a730448465aceda6b35086a0d9b/httplib2-0.11.3.tar.gz (215kB)
Collecting google-auth-httplib2>=0.0.3 (from google-api-python-client)
  Downloading https://files.pythonhosted.org/packages/33/49/c814d6d438b823441552198f096fcd0377fd6c88714dbed34f1d3c8c4389/google_auth_httplib2-0.0.3-py2.py3-none-any.whl
Collecting google-auth>=1.4.1 (from google-api-python-client)
  Downloading https://files.pythonhosted.

Then follow the tutorial below:
2. Create new service account
3. Download key file (got after creating service) and place in the same folder as this file
4. Add service email to SERVICE_ACCOUNT_EMAIL below
5. Add service email as user to the Google Analytics

Tutorial: https://developers.google.com/analytics/devguides/reporting/core/v4/quickstart/service-py

In [3]:
!pip install --upgrade oauth2client

Collecting oauth2client
  Downloading https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl (98kB)
Installing collected packages: oauth2client
Successfully installed oauth2client-4.1.3


In [3]:
"""Hello Analytics Reporting API V4."""

import argparse
import pandas as pd
import numpy as np
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools


SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
DISCOVERY_URI = ('https://analyticsreporting.googleapis.com/$discovery/rest')
KEY_FILE_LOCATION = '.\My Project 26766-ae554bd965b7.p12'
SERVICE_ACCOUNT_EMAIL = 'gatest@valid-progress-217020.iam.gserviceaccount.com'
VIEW_ID = '172386207'

The sample tutorial below is from the tutorial

Basically, we get the data from Google Analytics from get_report() function below. We can adapt the input in `reportRequests` to fit our query.

## Reference for building query
V4 Query Reference: https://developers.google.com/analytics/devguides/reporting/core/v4/basics

And we can get some ideas from V3 API Common Query (Old Version): https://developers.google.com/analytics/devguides/reporting/core/v3/common-queries

But we will need to adapt the query a little bit to fit V4 API.

In [9]:
def initialize_analyticsreporting():
  """Initializes an analyticsreporting service object.

  Returns:
    analytics an authorized analyticsreporting service object.
  """

  credentials = ServiceAccountCredentials.from_p12_keyfile(
    SERVICE_ACCOUNT_EMAIL, KEY_FILE_LOCATION, scopes=SCOPES)

  http = credentials.authorize(httplib2.Http())

  # Build the service object.
  analytics = build('analytics', 'v4', http=http, discoveryServiceUrl=DISCOVERY_URI)

  return analytics


def get_report(analytics):
  # Use the Analytics Service Object to query the Analytics Reporting API V4.
  return analytics.reports().batchGet(
        # Get sessions number from the last 7 days
#       body={
#         'reportRequests': [
#         {
#           'viewId': VIEW_ID,
#           'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
#           'metrics': [{'expression': 'ga:sessions'}]
#         }]
#       }
      # Get posts from last 7 days
      body={
          'reportRequests': [
              {
                  'viewId': VIEW_ID,
                  'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
                  'metrics': [
                      {'expression': 'ga:pageviews'},
                      {'expression': 'ga:uniquePageviews'},
                      {'expression': 'ga:timeOnPage'},
                      {'expression': 'ga:bounces'},
                      {'expression': 'ga:entrances'},
                      {'expression': 'ga:exits'}
                  ],
                  "dimensions": [
                      {"name": "ga:pagePath"}
                  ],
                  "orderBys": [
                      {"fieldName": "ga:pageviews", "sortOrder": "DESCENDING"}
                  ]
              }
          ]
      }
  ).execute()


def print_response(response):
  """Parses and prints the Analytics Reporting API V4 response"""

  for report in response.get('reports', []):
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    rows = report.get('data', {}).get('rows', [])

    for row in rows:
      dimensions = row.get('dimensions', [])
      dateRangeValues = row.get('metrics', [])

      for header, dimension in zip(dimensionHeaders, dimensions):
        print ( header + ': ' + dimension )

      for i, values in enumerate(dateRangeValues):
        print ('Date range (' + str(i) + ')' )
        for metricHeader, value in zip(metricHeaders, values.get('values')):
          print ( metricHeader.get('name') + ': ' + value )


def main():
    analytics = initialize_analyticsreporting()
    response = get_report(analytics)
    #df = pd.DataFrame(response)
    #print(df.head())
    print_response(response)

if __name__ == '__main__':
  main()


ga:pagePath: /en-gb/oral-health/
Date range (0)
ga:pageviews: 1213
ga:uniquePageviews: 626
ga:timeOnPage: 40210.0
ga:bounces: 113
ga:entrances: 520
ga:exits: 269
ga:pagePath: /en-gb/samples/
Date range (0)
ga:pageviews: 1137
ga:uniquePageviews: 518
ga:timeOnPage: 44709.0
ga:bounces: 23
ga:entrances: 153
ga:exits: 205
ga:pagePath: /en-gb/registration/
Date range (0)
ga:pageviews: 497
ga:uniquePageviews: 381
ga:timeOnPage: 73978.0
ga:bounces: 31
ga:entrances: 84
ga:exits: 101
ga:pagePath: /en-gb/vpv/register/complete
Date range (0)
ga:pageviews: 252
ga:uniquePageviews: 237
ga:timeOnPage: 33958.0
ga:bounces: 0
ga:entrances: 2
ga:exits: 88
ga:pagePath: /samples/
Date range (0)
ga:pageviews: 200
ga:uniquePageviews: 82
ga:timeOnPage: 5578.0
ga:bounces: 2
ga:entrances: 33
ga:exits: 28
ga:pagePath: /en-gb/my-profile/personal-detail/
Date range (0)
ga:pageviews: 184
ga:uniquePageviews: 98
ga:timeOnPage: 2347.0
ga:bounces: 1
ga:entrances: 2
ga:exits: 9
ga:pagePath: /en-gb/thank-you/?errorcode=0
