# Accessing Google Analytics Reports Using Python

[Official Documentation](https://support.google.com/analytics/answer/9143382?hl=en#zippy=%2Cattribution%2Cdemographics%2Cecommerce%2Cevent%2Cgaming%2Cgeneral%2Cgeography%2Clink%2Cpage-screen%2Cplatform-device%2Cpublisher%2Ctime%2Ctraffic-source%2Cuser%2Cuser-lifetime%2Cvideo%2Cadvertising%2Cpredictive%2Crevenue%2Csearch-console%2Csession)

In [23]:
#!pip3 install google.analytics.data

In [14]:
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
    DateRange,
    Dimension,
    Metric,
    RunReportRequest,
)
import os
import pandas as pd
import json

def sample_run_report(property_id="424145747"):
    """Runs a simple report on a Google Analytics 4 property."""
    # TODO(developer): Uncomment this variable and replace with your
    #  Google Analytics 4 property ID before running the sample.
    # property_id = "YOUR-GA4-PROPERTY-ID"

    # Using a default constructor instructs the client to use the credentials
    # specified in GOOGLE_APPLICATION_CREDENTIALS environment variable.
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Xue Tan/Box/Web and Social Media Analytics/apt-port-251804-905e08b9e9e3.json'
   #905e08b9e9e39003c9663aa3b495fac3f1bce7b1 
    client = BetaAnalyticsDataClient()
    #client = initialize_analyticsreporting()

    request = RunReportRequest(
        property=f"properties/{property_id}",
        dimensions=[Dimension(name="city"),Dimension(name="date")], #Dimension(name="browser"),
        metrics=[Metric(name="activeUsers")],
        date_ranges=[DateRange(start_date="2024-01-01", end_date="today")],
    )
    response = client.run_report(request)
    return response



def response_to_df(response):
    columns = []
    rows = []
     
    for col in response.dimension_headers:
        columns.append(col.name)
    for col in response.metric_headers:
        columns.append(col.name)
     
    for row_data in response.rows:
        row = []
        for val in row_data.dimension_values:
            row.append(val.value)
        for val in row_data.metric_values:
            row.append(val.value)
        rows.append(row)
    return pd.DataFrame(rows, columns=columns)

response=sample_run_report(property_id="424145747")
df=response_to_df(response)

print(df)

               city      date activeUsers
0         (not set)  20240203           3
1            Dallas  20240221           2
2         (not set)  20240124           1
3         Abbeville  20240203           1
4         Abbeville  20240204           1
5       Bloomington  20240124           1
6            Dallas  20240216           1
7            Dallas  20240220           1
8        Richardson  20240120           1
9        Richardson  20240124           1
10       Richardson  20240130           1
11       Richardson  20240203           1
12       Richardson  20240221           1
13  University Park  20240202           1


## Statistical Test

### Independent t-test (two-sample t-test): 
Used to compare the means of two independent groups.

### Paired t-test: 
Used to compare the means of the same group at two different times or under two different conditions.

In [20]:
from scipy import stats
df['activeUsers'] = df['activeUsers'].astype(int)


group_a = df[df['city'] == 'Richardson']['activeUsers']
print(group_a)
group_b = df[df['city'] != 'Richardson']['activeUsers']
print(group_b)
# Perform the t-test
t_stat, p_value = stats.ttest_ind(group_a, group_b)

# Output the results
print(f"T-statistic: {t_stat}, P-value: {p_value}")

8     1
9     1
10    1
11    1
12    1
Name: activeUsers, dtype: int32
0     3
1     2
2     1
3     1
4     1
5     1
6     1
7     1
13    1
Name: activeUsers, dtype: int32
T-statistic: -1.035098339013531, P-value: 0.3210327709321358
