## Connecting to Google Analytics with reporting API

In [1]:
# Importing modules for API setup and data conversion

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import json

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

In [2]:
# Google API params. Views = Academy only report

SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'keys/client_secrets.json'
VIEW_ID = 'ga:175337801'

### Func 1: Get data report from google analytics

In [3]:
def initialize_analyticsreporting():
    """Initializes an Analytics Reporting API V4 service object.
    Returns: An authorized Analytics Reporting API V4 service object."""

    credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES)

    # Build the service object.
    analytics = build('analyticsreporting', 'v4', credentials=credentials)

    return analytics

### Func 2: Return report in json dict, plus metrics and dimensions used

In [4]:
def get_report(analytics, metrics, dimensions, start, end):
    """Queries the Analytics Reporting API V4.
    Args:
    analytics: An authorized Analytics Reporting API V4 service object.
    Returns:
    The Analytics Reporting API V4 response.
    Using date range 2020-11-09 to 2020-11-15 for testing purposes"""
    METS = [f'ga:{metric}' for metric in metrics]
    DIMS = [f'ga:{dimension}' for dimension in dimensions]
    
    return analytics.reports().batchGet(
        body={
            'reportRequests': [
                                {
                                    'viewId': VIEW_ID,
                                    'dateRanges': [{'startDate': start
                                                    , 'endDate': end}],
                                    'metrics': [{'expression': expression} for expression in METS],
                                    'orderBys': [{'fieldName': METS[0], 
                                                  'sortOrder': 'DESCENDING'}],
                                    'dimensions': [{'name': name} for name in DIMS]
                                }]
            }).execute(), METS, DIMS

### Func 3: Convert to pd DataFrame

In [5]:
def to_df(response, METS, DIMS):
    data_dict = {f"{i}": [] for i in DIMS + METS}
    
    for report in response.get('reports', []):
        rows = report.get('data', {}).get('rows', [])
        for row in rows:
            for i, key in enumerate(DIMS):
                data_dict[key].append(row.get('dimensions', [])[i])
            date_values = row.get('metrics', [])
            for values in date_values:
                all_values = values.get('values', [])
                for i, key in enumerate(METS):
                    data_dict[key].append(all_values[i])
                    
    df = pd.DataFrame(data=data_dict)
    df.columns = [col.split(':')[-1] for col in df.columns]
    
    return df

### Func 4: Final – putting it all together

In [6]:
def ga_to_df(metrics,dimensions,start,end):
    '''metrics = list, dimesnions = list,
    start = str, end = str'''
    analytics = initialize_analyticsreporting()
    response, METS, DIMS = get_report(analytics, metrics, dimensions, start, end)
    response_df = to_df(response, metrics, dimensions)
    
    return response_df

## Gettting general data for posts

In [7]:
# The first blog, "Of trees and men", was published 29 September 2017

df_posts = ga_to_df(metrics=['pageViews',
                             'avgTimeOnPage',
                             'avgSessionDuration', 
                             'sessions', 
                             'bounces'],
                    dimensions=['landingPagePath'],
                    start='2017-09-29',
                    end='today')

In [9]:
df_posts

Unnamed: 0,landingPagePath,pageViews,avgTimeOnPage,avgSessionDuration,sessions,bounces
0,/academy/,25347,145.08571234366502,201.27080394922427,10635,5193
1,/academy/is-it-too-late-for-our-planet/,6989,259.2520391517129,55.49384001388166,5763,4786
2,/academy/what-are-scope-1-2-3-emissions/,5838,794.4245562130177,105.70069740410692,5162,3332
3,/academy/most-powerful-greenhouse-gas/,5517,314.0914634146341,30.888557213930348,5025,4554
4,/academy/the-stakeholders-of-climate-change/,5389,432.7226368159204,76.61766630316248,4585,3821
5,/academy/sustainable-climate-change-organisation-partnership/,2534,156.66666666666666,114.68967874231032,1463,939
6,/academy/climate-action-data-driven-approach/,1804,232.8179271708684,152.51284403669726,1090,800
7,/academy/how-can-the-circular-economy-support-sustainable-development/,1675,535.3693693693693,82.07088781830696,1453,1203
8,/academy/blockchain-versus-sustainability/,1051,232.8450704225352,36.375137513751376,909,832
9,/academy/the-benefits-of-monitoring-carbon-emissions-for-a-business/,867,428.0551181102362,76.21486486486486,740,570
