In [None]:
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd

In [None]:
KEY_FILE_LOCATION = "config/client_secrets.json"
SCOPES = ["https://www.googleapis.com/auth/analytics.readonly"]

startDate ='2023-01-01'
endDate='2023-05-31'
dimensions = ['ga:year','ga:month','ga:sourceMedium']
metrics = ['ga:users', 'ga:newUsers','ga:sessions','ga:bounceRate','ga:pageviewsPerSession','ga:avgSessionDuration','ga:transactionsPerSession','ga:transactions','ga:transactionRevenue']



In [None]:
def initialize_analyticsreporting():
    credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES)
    analytics = build("analyticsreporting", "v4", credentials=credentials)

    return analytics


Analytics = initialize_analyticsreporting()

In [None]:
def request(pageToken='undefined'):
    response = Analytics.reports().batchGet(
        # TODO: Validate values and prevent false query
        body={
            "reportRequests": [
                {
                    "viewId": '178838015',
                    "dateRanges": {'startDate': startDate, 'endDate': endDate},
                    "metrics": [{'expression': expression} for expression in metrics],
                    "dimensions": [{'name': name} for name in dimensions],
                    "pageSize": 100000,
                    "pageToken": pageToken
                }]
        }
    ).execute()
    return response

In [None]:
def manipulate(response):
    for report in response.get('reports', []):
        column_h = report.get('columnHeader', {})
        dimension_h = column_h.get('dimensions', [])
        metric_h = [i.get('name', {}) for i in column_h.get('metricHeader', {}).get('metricHeaderEntries', [])]
        final_rows = []

        for row in report.get('data', {}).get('rows', []):
            dimensions = row.get('dimensions', [])
            metrics = row.get('metrics', [])[0].get('values', {})
            row_obj = {}

            for header, dimension in zip(dimension_h, dimensions):
                row_obj[header] = dimension

            for metric_headers, metric in zip(metric_h, metrics):
                row_obj[metric_headers] = metric

            final_rows.append(row_obj)

    df = pd.DataFrame(final_rows)
    return df

In [None]:
def next_records():
    records = []
    response = request()
    nextPageToken = response.get("reports")[0].get('nextPageToken', None)
    df = manipulate(response)
    records.append(df)
    while nextPageToken != None:
        response = request(nextPageToken)
        df = manipulate(response)
        records.append(df)
        nextPageToken = response.get("reports")[0].get('nextPageToken', None)
    df = pd.concat(records).reset_index(drop=True)
    df.columns = df.columns.str.replace(r'ga:', '')
    return df

In [None]:

data=next_records()