In [67]:
# recommended to use virtual env to install pip package
# see link official docs for installation reference: https://github.com/googleapis/python-analytics-data#installation
!pip install google-analytics-data

Defaulting to user installation because normal site-packages is not writeable


In [68]:
import datetime
import pandas as pd
import numpy as np

In [69]:
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
    DateRange,
    Dimension,
    Metric,
    RunReportRequest,
    OrderBy,
)

In [70]:
# modified from official google docs for GA4: 
# https://developers.google.com/analytics/devguides/reporting/data/v1/quickstart-client-libraries

def pull_from_ga_into_df(dimensions, metrics, property_id="353368209"):
    """Runs a simple report on a Google Analytics 4 property."""
    # TODO(developer): Uncomment this variable and replace with your
    #  Google Analytics 4 property ID before running the sample.
    # property_id = "YOUR-GA4-PROPERTY-ID"

    # Using a default constructor instructs the client to use the credentials
    # specified in GOOGLE_APPLICATION_CREDENTIALS environment variable.
    client = BetaAnalyticsDataClient()

    request = RunReportRequest(
        property=f"properties/{property_id}",
        dimensions=[Dimension(name=d) for d in dimensions],
        metrics=[Metric(name=m) for m in metrics],
        date_ranges=[DateRange(start_date="7daysAgo", end_date="today")],
#         order_bys=[OrderBy(dimension="date")]
    )
    response = client.run_report(request)
    
    ## process into pd dataframe
    
    # get headers
    data_dict = {}
    for d in response.dimension_headers:
        data_dict[d.name] = []

    for m in response.metric_headers:
        data_dict[m.name] = []

    # insert into dictionary
    for r in response.rows:
        vals = [v.value for v in [*r.dimension_values, *r.metric_values]]
        for i, key in enumerate(data_dict):
            data_dict[key].append(vals[i])

    df = pd.DataFrame(data=data_dict)
    
    return df

In [71]:
pull_from_ga_into_df(["city"], ["activeUsers"])

Unnamed: 0,city,activeUsers
0,(not set),10
1,Quezon City,10
2,Cebu City,5
3,Manila,5
4,Faisalabad,4
5,Makati,4
6,Singapore,3
7,Bacoor,2
8,Prineville,2
9,,1


In [72]:
query_map = {}

## Audience Overview

In [73]:
query_map["AudienceOverview"] = {
    "metrics": [
        "averageSessionDuration",
        "bounceRate",
        "newUsers",
        "sessionsPerUser",
        "screenPageViewsPerUser",
        "screenPageViewsPerSession",
        "screenPageViews",
        "sessions",
        "activeUsers", # not sure if this is what we want for the api
    ],
    "dimensions": [
        
    ],
}

In [74]:
audience_overview_df = pull_from_ga_into_df(query_map["AudienceOverview"]["dimensions"], query_map["AudienceOverview"]["metrics"])

In [75]:
audience_overview_df

Unnamed: 0,averageSessionDuration,bounceRate,newUsers,sessionsPerUser,screenPageViewsPerUser,screenPageViewsPerSession,screenPageViews,sessions,activeUsers
0,257.2210388493151,0.4931506849315068,48,1.3773584905660377,4.490566037735849,3.26027397260274,238,73,53
