In [None]:
import pandas as pd
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest

# Retrieve data from Google Analytics
def get_ga_data(property_id, start_date, end_date):
    client = BetaAnalyticsDataClient()
    request = RunReportRequest(
        property=f"properties/{property_id}",
        dimensions=[Dimension(name="date"), Dimension(name="pageTitle"), Dimension(name="sourceMedium")],
        metrics=[Metric(name="sessions"), Metric(name="bounceRate"), Metric(name="averageSessionDuration")],
        date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
    )
    report = client.run_report(request)
    return [row.to_dict() for row in report.rows]

# Analyze website traffic data
def analyze_traffic(data):
    df = pd.DataFrame(data)
    
    # Calculate metrics
    df["bounceRate"] = df["bounceRate"].astype(float)
    df["averageSessionDuration"] = df["averageSessionDuration"].astype(float)
    
    # Group by page title and source/medium
    page_stats = (
        df.groupby(["pageTitle", "sourceMedium"])
        .agg({"sessions": "sum", "bounceRate": "mean", "averageSessionDuration": "mean"})
        .reset_index()
    )
    
    # Calculate page-level metrics
    page_stats["pageviews_per_session"] = page_stats["sessions"] / df["sessions"].sum()
    page_stats["avg_session_duration_diff"] = page_stats["averageSessionDuration"] - df["averageSessionDuration"].mean()
    
    return page_stats

# Example usage
property_id = "123456789"
start_date = "2023-01-01"
end_date = "2023-12-31"
data = get_ga_data(property_id, start_date, end_date)
page_stats = analyze_traffic(data)
print(page_stats)