## Connecting to Google Analytics with reporting API

In [1]:
# Importing modules for API setup and data conversion

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import json

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

In [2]:
# Google API params. Views = Academy only report

SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'keys/client_secrets.json'
VIEW_ID = 'ga:175337801'

### Func 1: Get data report from google analytics

In [3]:
def initialize_analyticsreporting():
    """Initializes an Analytics Reporting API V4 service object.
    Returns: An authorized Analytics Reporting API V4 service object."""

    credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES)

    # Build the service object.
    analytics = build('analyticsreporting', 'v4', credentials=credentials)

    return analytics

### Func 2: Return report in json dict, plus metrics and dimensions used

In [4]:
def get_report(analytics, metrics, dimensions, start, end):
    """Queries the Analytics Reporting API V4.
    Args:
    analytics: An authorized Analytics Reporting API V4 service object.
    Returns:
    The Analytics Reporting API V4 response.
    Using date range 2020-11-09 to 2020-11-15 for testing purposes"""
    METS = [f'ga:{metric}' for metric in metrics]
    DIMS = [f'ga:{dimension}' for dimension in dimensions]
    
    return analytics.reports().batchGet(
        body={
            'reportRequests': [
                                {
                                    'viewId': VIEW_ID,
                                    'dateRanges': [{'startDate': start
                                                    , 'endDate': end}],
                                    'metrics': [{'expression': expression} for expression in METS],
                                    'orderBys': [{'fieldName': METS[0], 
                                                  'sortOrder': 'DESCENDING'}],
                                    'dimensions': [{'name': name} for name in DIMS]
                                }]
            }).execute(), METS, DIMS

### Func 3: Convert to pd DataFrame

In [5]:
def to_df(response, METS, DIMS):
    data_dict = {f"{i}": [] for i in DIMS + METS}
    
    for report in response.get('reports', []):
        rows = report.get('data', {}).get('rows', [])
        for row in rows:
            for i, key in enumerate(DIMS):
                data_dict[key].append(row.get('dimensions', [])[i])
            date_values = row.get('metrics', [])
            for values in date_values:
                all_values = values.get('values', [])
                for i, key in enumerate(METS):
                    data_dict[key].append(all_values[i])
                    
    df = pd.DataFrame(data=data_dict)
    df.columns = [col.split(':')[-1] for col in df.columns]
    
    return df

### Func 4: Final – putting it all together

In [6]:
def ga_to_df(metrics,dimensions,start,end):
    '''metrics = list, dimesnions = list,
    start = str, end = str'''
    analytics = initialize_analyticsreporting()
    response, METS, DIMS = get_report(analytics, metrics, dimensions, start, end)
    response_df = to_df(response, metrics, dimensions)
    
    return response_df

## Gettting general data for posts

In [7]:
# The first article, "Of trees and men", was published 29 September 2017.
# However Google Analytics only started tracking later, but better
# safe than sorry

df_posts = ga_to_df(metrics=['pageViews',
                             'avgTimeOnPage',
                             'avgSessionDuration', 
                             'sessions', 
                             'bounces'],
                    dimensions=['landingPagePath'],
                    start='2017-10-01',
                    end='today')

In [8]:
df_posts.head(10)

Unnamed: 0,landingPagePath,pageViews,avgTimeOnPage,avgSessionDuration,sessions,bounces
0,/academy/,27847,139.82447164869967,201.53430171508572,11428,5228
1,/academy/what-are-scope-1-2-3-emissions/,10298,935.0118143459916,126.36771644902886,9113,4703
2,/academy/is-it-too-late-for-our-planet/,7655,274.2827899924185,57.816603535353536,6336,5052
3,/academy/the-stakeholders-of-climate-change/,6219,485.5,83.99774053850498,5311,4030
4,/academy/most-powerful-greenhouse-gas/,5879,316.2674418604651,30.646280067126607,5363,4766
5,/academy/sustainable-climate-change-organisation-partnership/,2537,156.66666666666666,114.4549795361528,1466,942
6,/academy/how-can-the-circular-economy-support-sustainable-development/,1991,633.3961538461539,96.3581744656268,1731,1320
7,/academy/climate-action-data-driven-approach/,1825,227.54918032786884,152.39890210430008,1093,801
8,/academy/ai-climate-change/,1448,723.3762376237623,195.7056768558952,1145,495
9,/academy/the-benefits-of-monitoring-carbon-emissions-for-a-business/,1245,533.6407766990292,110.46775745909528,1039,669
