In [49]:
# recommended to use virtual env to install pip package
# see link official docs for installation reference: https://github.com/googleapis/python-analytics-data#installation
!pip install google-analytics-data

Defaulting to user installation because normal site-packages is not writeable


In [50]:
import datetime
import pandas as pd
import numpy as np

In [71]:
# modified from official google docs for GA4: 
# https://developers.google.com/analytics/devguides/reporting/data/v1/quickstart-client-libraries

def pull_from_ga(dimensions, metrics, property_id="353368209"):
    """Runs a simple report on a Google Analytics 4 property."""
    # TODO(developer): Uncomment this variable and replace with your
    #  Google Analytics 4 property ID before running the sample.
    # property_id = "YOUR-GA4-PROPERTY-ID"

    # Using a default constructor instructs the client to use the credentials
    # specified in GOOGLE_APPLICATION_CREDENTIALS environment variable.
    client = BetaAnalyticsDataClient()

    request = RunReportRequest(
        property=f"properties/{property_id}",
        dimensions=[Dimension(name=d) for d in dimensions],
        metrics=[Metric(name=m) for m in metrics],
        date_ranges=[DateRange(start_date="2023-02-28", end_date="today")],
    )
    response = client.run_report(request)

#     print("Report result:")
#     for row in response.rows:
#         print(row.dimension_values[0].value, row.metric_values[0].value)
    print(response)
    
    return response

In [52]:
pull_from_ga(["city"], ["activeUsers"])

Report result:
(not set) 28
Quezon City 22
Columbus 20
Manila 12
Cebu City 7
Makati 7
Lulea 5
Prineville 5
Singapore 4
Bacoor 3
Faisalabad 3
Altoona 2
Ashburn 2
Calamba 2
Cheyenne 2
Dasmarinas 2
Davao City 2
Dublin 2
Marikina 2
Binangonan 1
Brambleton 1
Bursa 1
Caloocan 1
Forest City 1
Kakegawa 1
Khmelnytskyi 1
Lapu-Lapu City 1
New York 1
Nyiregyhaza 1
Paranaque 1
Pasig 1
Richardson 1
Shinjuku City 1
Sialkot 1
Taguig 1
Taytay 1
Ulyanovsk 1
dimension_headers {
  name: "city"
}
metric_headers {
  name: "activeUsers"
  type_: TYPE_INTEGER
}
rows {
  dimension_values {
    value: "(not set)"
  }
  metric_values {
    value: "28"
  }
}
rows {
  dimension_values {
    value: "Quezon City"
  }
  metric_values {
    value: "22"
  }
}
rows {
  dimension_values {
    value: "Columbus"
  }
  metric_values {
    value: "20"
  }
}
rows {
  dimension_values {
    value: "Manila"
  }
  metric_values {
    value: "12"
  }
}
rows {
  dimension_values {
    value: "Cebu City"
  }
  metric_values {
    v

dimension_headers {
  name: "city"
}
metric_headers {
  name: "activeUsers"
  type_: TYPE_INTEGER
}
rows {
  dimension_values {
    value: "(not set)"
  }
  metric_values {
    value: "28"
  }
}
rows {
  dimension_values {
    value: "Quezon City"
  }
  metric_values {
    value: "22"
  }
}
rows {
  dimension_values {
    value: "Columbus"
  }
  metric_values {
    value: "20"
  }
}
rows {
  dimension_values {
    value: "Manila"
  }
  metric_values {
    value: "12"
  }
}
rows {
  dimension_values {
    value: "Cebu City"
  }
  metric_values {
    value: "7"
  }
}
rows {
  dimension_values {
    value: "Makati"
  }
  metric_values {
    value: "7"
  }
}
rows {
  dimension_values {
    value: "Lulea"
  }
  metric_values {
    value: "5"
  }
}
rows {
  dimension_values {
    value: "Prineville"
  }
  metric_values {
    value: "5"
  }
}
rows {
  dimension_values {
    value: "Singapore"
  }
  metric_values {
    value: "4"
  }
}
rows {
  dimension_values {
    value: "Bacoor"
  }
  metr

In [53]:
query_map = {}

## Audience Overview

In [54]:
query_map["AudienceOverview"] = {
    "metrics": [
        "averageSessionDuration",
        "bounceRate",
        "newUsers",
        "sessionsPerUser",
        "screenPageViewsPerUser",
        "screenPageViewsPerSession",
        "screenPageViews",
        "sessions",
        "activeUsers", # not sure if this is what we want for the api
    ],
}

In [88]:
data = pull_from_ga(query_map["AudienceOverview"]["dimensions"], query_map["AudienceOverview"]["metrics"])

dimension_headers {
  name: "date"
}
metric_headers {
  name: "averageSessionDuration"
  type_: TYPE_SECONDS
}
metric_headers {
  name: "bounceRate"
  type_: TYPE_FLOAT
}
metric_headers {
  name: "newUsers"
  type_: TYPE_INTEGER
}
metric_headers {
  name: "sessionsPerUser"
  type_: TYPE_FLOAT
}
metric_headers {
  name: "screenPageViewsPerUser"
  type_: TYPE_FLOAT
}
metric_headers {
  name: "screenPageViewsPerSession"
  type_: TYPE_FLOAT
}
metric_headers {
  name: "screenPageViews"
  type_: TYPE_INTEGER
}
metric_headers {
  name: "sessions"
  type_: TYPE_INTEGER
}
metric_headers {
  name: "activeUsers"
  type_: TYPE_INTEGER
}
rows {
  dimension_values {
    value: "20230228"
  }
  metric_values {
    value: "669.631665"
  }
  metric_values {
    value: "0.17647058823529413"
  }
  metric_values {
    value: "12"
  }
  metric_values {
    value: "1.7894736842105263"
  }
  metric_values {
    value: "15.052631578947368"
  }
  metric_values {
    value: "8.4117647058823533"
  }
  metric_val

In [76]:
headers = []
for d in data.dimension_headers:
    headers.append(d.name)
    
for m in data.metric_headers:
    headers.append(m.name)

headers

['date',
 'averageSessionDuration',
 'bounceRate',
 'newUsers',
 'sessionsPerUser',
 'screenPageViewsPerUser',
 'screenPageViewsPerSession',
 'screenPageViews',
 'sessions',
 'activeUsers']

In [105]:
# transpose to columns
for r in data.rows:
    vals = [v.value for v in [*r.dimension_values, *r.metric_values]]
    print(vals)

['20230228', '669.631665', '0.17647058823529413', '12', '1.7894736842105263', '15.052631578947368', '8.4117647058823533', '286', '34', '19']
['20230307', '492.89098839999997', '1', '5', '1.6666666666666667', '4', '2.4', '12', '5', '3']
['20230301', '488.800945', '0.25', '8', '1.6', '14.2', '8.875', '142', '16', '10']
['20230302', '253.1425468', '0.4', '3', '1.25', '1.75', '1.4', '7', '5', '4']
['20230304', '219.40251999999998', '0.33333333333333331', '2', '2', '4', '2', '12', '6', '3']
['20230306', '154.80616195238093', '0.61904761904761907', '15', '1.3125', '1.75', '1.3333333333333333', '28', '21', '16']
['20230305', '15.566998857142858', '0.5714285714285714', '4', '1.75', '2', '1.1428571428571428', '8', '7', '4']
['20230303', '9.9476896', '0.7', '10', '1', '1.2', '1.2', '12', '10', '10']


In [102]:
audience_overview_df = pd.DataFrame(columns=headers)
audience_overview_df

Unnamed: 0,date,averageSessionDuration,bounceRate,newUsers,sessionsPerUser,screenPageViewsPerUser,screenPageViewsPerSession,screenPageViews,sessions,activeUsers
