In [4]:
import pandas as pd
import numpy as np
import json
import datetime
import os
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials


In [5]:
def initialize_analyticsreporting(KEY_FILE_LOCATION):
    """Initializes an Analytics Reporting API V4 service object.
    Args:
    KEY_FILE_LOCATION: str, ServiceAccount key file
    Returns:
    An authorized Analytics Reporting API V4 service object.
    """

    SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']

    credentials = ServiceAccountCredentials.from_json_keyfile_name(
        KEY_FILE_LOCATION, scopes=SCOPES)

    # Build the service object.
    analytics = build('analytics', 'v4', credentials=credentials)

    return analytics


def get_report(analytics, VIEW_ID, start_date, end_date, ga_metrics, ga_dimensions, next_page='0'):
    """Queries the Analytics Reporting API V4.

    Args:
    analytics: An authorized Analytics Reporting API V4 service object.
    VIEW_ID: str
    start_date: str
    end_date: str
    ga_metrics: list of str
    ga_dimensions: list of str
    Returns:
    The Analytics Reporting API V4 response.
    """
    return analytics.reports().batchGet(
        body = {
            'reportRequests': [
                {
                    'viewId': VIEW_ID,
                    'pageSize': 10000,
                    'pageToken': next_page,
                    'dateRanges': [{'startDate': start_date, 
                                    'endDate': end_date}],
                    'metrics': [{'expression': m} for m in ga_metrics],
                    'dimensions': [{'name': d} for d in ga_dimensions]
                }]
        }
    ).execute()


def response_to_dataframe(response):
    """Parses and prints the Analytics Reporting API V4 response.

    Args:
    response: An Analytics Reporting API V4 response.
    
    Returns:
    An pandas DataFrame
    """
    report = response['reports'][0]
    df_data = []
    columnHeader = report.get('columnHeader', {})
    dimensionHeaders = columnHeader.get('dimensions', [])
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])

    for row in report.get('data', {}).get('rows', []):
        row_data = dict()
        dimensions = row.get('dimensions', [])
        dateRangeValues = row.get('metrics', [])

        for header, dimension in zip(dimensionHeaders, dimensions):
            row_data[header] = dimension

        for i, values in enumerate(dateRangeValues):
            #print 'Date range: ' + str(i)
            for metricHeader, value in zip(metricHeaders, values.get('values')):
                row_data[metricHeader.get('name')] = value
        df_data.append(row_data)
        
    return pd.DataFrame(df_data)

def get_report_over_10000_rows(analytics, VIEW_ID, start_date, end_date, 
                               ga_metrics, ga_dimensions):
    """Queries the Analytics Reporting API V4 for reports containing over 10000 
    rows.

    Args:
    analytics: An authorized Analytics Reporting API V4 service object.
    VIEW_ID: str
    start_date: str
    end_date: str
    ga_metrics: list of str
    ga_dimensions: list of str
    Returns:
    The Analytics Reporting API V4 response.
    """
    response = get_report(analytics, VIEW_ID, start_date, end_date, ga_metrics, ga_dimensions)
    products_df = response_to_dataframe(response)
    while 'nextPageToken' in response['reports'][0]:
        next_page_token = response['reports'][0]['nextPageToken']
        response = get_report(analytics, VIEW_ID, start_date, end_date, 
                              ga_metrics, ga_dimensions, next_page=next_page_token)
        df = response_to_dataframe(response)
        products_df = products_df.append(df)
    return products_df

In [17]:
analytics = initialize_analyticsreporting('/home/jian/Projects/Saatva/GA/GA_api_jubapluscc@gmail.com.json')

def num_func_trans(df):
    if 'ga:sessions' in df.columns:
        df['ga:sessions']=df['ga:sessions'].apply(lambda x: int(x.replace(",","")))
        df['ga:sessions']=df['ga:sessions'].astype(int)

    if 'ga:adClicks' in df.columns:
        df['ga:adClicks']=df['ga:adClicks'].apply(lambda x: int(x.replace(",","")))
        df['ga:adClicks']=df['ga:adClicks'].astype(int)

    if 'ga:impressions' in df.columns:
        df['ga:impressions']=df['ga:impressions'].apply(lambda x: int(x.replace(",","")))
        df['ga:impressions']=df['ga:impressions'].astype(int)
    
    if 'ga:transactions' in df.columns:
        df['ga:transactions']=df['ga:transactions'].apply(lambda x: int(x.replace(",","")))
        df['ga:transactions']=df['ga:transactions'].astype(int)
        
    if 'ga:users' in df.columns:
        df['ga:users']=df['ga:users'].apply(lambda x: int(x.replace(",","")))
        df['ga:users']=df['ga:users'].astype(int)
        
    if 'ga:transactionRevenue' in df.columns:
        df['ga:transactionRevenue']=df['ga:transactionRevenue'].apply(lambda x: float(x.replace(",","")))
        df['ga:transactionRevenue']=df['ga:transactionRevenue'].astype(float)

    
    return df

In [18]:
ga_data_saatva=get_report_over_10000_rows(analytics=analytics, VIEW_ID='122377905',start_date="2018-01-01", end_date="2018-12-31",
                                            ga_metrics=['ga:sessions','ga:users','ga:goal11Completions','ga:timeOnPage','ga:transactions'],
                                            ga_dimensions=['ga:date','ga:sourceMedium','ga:campaign','ga:keyword','ga:pagePath'])

In [19]:
ga_data_saatva=num_func_trans(ga_data_saatva)

In [20]:
ga_data_saatva.head(2)

Unnamed: 0,ga:campaign,ga:date,ga:goal11Completions,ga:keyword,ga:pagePath,ga:sessions,ga:sourceMedium,ga:timeOnPage,ga:transactions,ga:users
0,(not set),20180101,0,(not set),/,267,(direct) / (none),14735.0,0,251
1,(not set),20180101,0,(not set),/?pt_campaign[ADL] [Co-Brand] [US] Saatva (Exa...,1,(direct) / (none),0.0,0,1


In [21]:
ga_data_saatva['ga:users'].sum()

1250450