In [14]:
# !pip install japanize-matplotlib
# !pip install reportlab
# !pip install --upgrade reportlab

In [15]:
# -----------------------------------------------------------
# 1) Imports & Colab authentication
# -----------------------------------------------------------
import logging
import os
from datetime import datetime, timedelta
from typing import Dict, List

import japanize_matplotlib  # enables Japanese font rendering
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from google.colab import auth, drive
from google.auth import default
import gspread
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import (
    Image as RLImage,
    PageBreak,
    Paragraph,
    SimpleDocTemplate,
    Spacer,
    Table,
    TableStyle,
)

# -----------------------------------------------------------
# 2) CONFIG –- tweak here for next month
# -----------------------------------------------------------
SPREADSHEET_NAME = "ga4_seo_lp_report_2025"   # GA4 export workbook
PAGE1_URL = "/"     # page 1
PAGE2_URL = "/en"   # page 2

#   (1-12, calendar months; if start > end it wraps across the year)
COMP_START_MONTH = 9   # e.g. January
COMP_END_MONTH   = 5   # e.g. March                       # <-- ★ when the improvement went live (1-12)

# One annotation per page (year, month, label)
ANNOTATIONS: Dict[str, Dict[str, object]] = {
    PAGE1_URL: {"year": 2023, "month": 12,  "text": "Tracking Issue"},
    PAGE2_URL: {"year": 2023, "month": 12, "text": "Tracking Issue"},
}

DATA_START = "2023-01-01"
DATA_END   = "2025-5-31"

BAR_COLORS = {"FY23": "#BFE9DB", "FY24": "#6AC1B7", "FY25": "#264E86"}

# --- Metrics configuration (tweak these lists as needed) ---
METRICS = [
    "Sessions",
    "Engaged Sessions",
    "New Users",
    # "Pageview",
]

# Metrics and aggregation functions for comparison tables
COMPARISON_METRICS: Dict[str, str] = {
    metric: "sum" for metric in METRICS
}

metric_notes = {
    "Sessions": "Number of sessions initiated from Organic Search.",
    "Engaged Sessions": "Sessions with significant engagement (e.g., multiple page views or ≥10 s).",
    "New Users": "First-time users visiting the site.",
    # "Pageview": "The number of times the page was viewed by the user.",
}


In [16]:
# -----------------------------------------------------------
# 3) Start‑up (mount Drive, auth Sheets)
# -----------------------------------------------------------
drive.mount("/content/drive", force_remount=True)
auth.authenticate_user()
creds, _ = default()
GC = gspread.authorize(creds)

Mounted at /content/drive


In [17]:

# -----------------------------------------------------------
# 4) Load GA4 worksheets between DATA_START and DATA_END
# -----------------------------------------------------------
MONTH_ORDER = {9:1,10:2,11:3,12:4,1:5,2:6,3:7,4:8,5:9,6:10,7:11,8:12}
RENAME_MAP = {"sessions":"Sessions", "engagedSessions":"Engaged Sessions", "newUsers":"New Users", "screenPageViews":"Pageview"}

def list_sheet_names(start: str, end: str) -> List[str]:
    periods = pd.period_range(pd.to_datetime(start), pd.to_datetime(end), freq="M")
    return [f"df_{p.strftime('%Y%m')}" for p in periods]

def load_ga4_data() -> pd.DataFrame:
    ss = GC.open(SPREADSHEET_NAME)
    frames: List[pd.DataFrame] = []
    for name in list_sheet_names(DATA_START, DATA_END):
        try:
            raw = ss.worksheet(name).get_all_values()[14:]
            if raw:
                frames.append(pd.DataFrame(raw[1:], columns=raw[0]))
                logging.debug("Loaded %s", name)
        except gspread.exceptions.WorksheetNotFound:
            logging.warning("Sheet %s not found – skipped", name)
    if not frames:
        raise RuntimeError("No data loaded; check sheet names/date range")

    df = pd.concat(frames, ignore_index=True)
    df = (
        df.rename(columns=RENAME_MAP)
          .assign(yearMonth=pd.to_datetime(df["yearMonth"], format="%Y%m"))
          .drop(columns=["averageSessionDuration"], errors="ignore")
    )
    for col in RENAME_MAP.values():
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)

    mask = (df["yearMonth"] >= DATA_START) & (df["yearMonth"] <= DATA_END)
    df = df.loc[mask].copy()
    df["fiscal_year"] = df["yearMonth"].apply(lambda d: d.year + 1 if d.month >= 9 else d.year)
    df["month"] = df["yearMonth"].dt.month
    logging.info("Data rows after clean: %s", len(df))
    return df

# -----------------------------------------------------------
# 5) Helper functions – tables & charts
# -----------------------------------------------------------

def custom_month_sort(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(sort=df["month"].map(MONTH_ORDER)).sort_values("sort").drop(columns="sort")

def monthly_table(df_sub: pd.DataFrame, metric: str) -> pd.DataFrame:
    tbl = (
        df_sub.groupby(["fiscal_year","month"], as_index=False)[metric].sum()
              .pivot(index="month", columns="fiscal_year", values=metric)
              .reset_index()
    )
    tbl = custom_month_sort(tbl).set_index("month")
    tbl = tbl.rename(columns=lambda c: f"FY{str(c)[-2:]}" if isinstance(c, int) else c)
    for fy in BAR_COLORS.keys():
        tbl[fy] = tbl.get(fy, 0).fillna(0).astype(int)
    tbl["YoY"] = tbl.apply(lambda r: "-" if r.get("FY24",0)==0 else f"{(r['FY25']/r['FY24']-1):.0%}", axis=1)
    tbl["MoM"] = tbl["FY25"].pct_change().apply(lambda x: "-" if pd.isna(x) else f"{x:.0%}")
    return tbl

def save_bar_chart(df_sub: pd.DataFrame, metric: str, page_url: str) -> str:
    tbl = monthly_table(df_sub, metric)
    idx = np.arange(len(tbl))
    width = 0.25
    plt.figure(figsize=(6,6))
    for i, fy in enumerate(BAR_COLORS.keys()):
        plt.bar(idx + (i-1)*width, tbl[fy], width=width, color=BAR_COLORS[fy], label=fy)

    # draw annotation if defined for this page
    ann = ANNOTATIONS.get(page_url)
    if ann and ann["month"] in tbl.index:
        fy_calc = ann["year"] + 1 if ann["month"] >= 9 else ann["year"]
        col = f"FY{str(fy_calc)[-2:]}"
        offset = list(BAR_COLORS.keys()).index(col) - 1 if col in BAR_COLORS else 0
        xpos = idx[tbl.index == ann["month"]][0] + offset*width
        plt.axvline(x=xpos, color="blue", ls="--")
        plt.text(xpos, tbl[list(BAR_COLORS.keys())].max().max()*0.9, ann["text"], rotation=90)

    plt.xticks(idx, tbl.index)
    plt.ylabel(metric)
    plt.title(f"{metric} – {page_url}")
    plt.legend()
    plt.tight_layout()
    filename = f"{metric}_{page_url.strip('/').replace('/','_')}.png"
    plt.savefig(filename)
    plt.close()
    return filename

# -----------------------------------------------------------
# 6) ReportLab helpers
# -----------------------------------------------------------
styles = getSampleStyleSheet()

def rl_table(df_tbl: pd.DataFrame) -> Table:
    df_tbl = df_tbl.reset_index()
    fmt = lambda x: f"{x:,}" if isinstance(x, (int, float)) else x
    data = [list(df_tbl.columns)] + [[fmt(v) for v in row] for row in df_tbl.values]
    tbl = Table(data)
    tbl.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.HexColor("#517D99")),
        ("TEXTCOLOR",(0,0),(-1,0),colors.white),
        ("ALIGN",(0,0),(-1,-1),"CENTER"),
        ("FONTSIZE",(0,0),(-1,-1),8),
        ("GRID",(0,0),(-1,-1),0.5,colors.black),
    ]))
    return tbl

# -----------------------------------------------------------
# 7-a) Helper – comparison tables  (fixed)
# -----------------------------------------------------------
def add_comparison_section(elements,
                           df_page1: pd.DataFrame,
                           df_page2: pd.DataFrame):
    """
    Adds comparison tables for configured metrics (FY24 vs FY25).
    """
    # Heading period text
    if COMP_START_MONTH <= COMP_END_MONTH:
        period_text = f"{COMP_START_MONTH}–{COMP_END_MONTH}"
    else:
        period_text = f"{COMP_START_MONTH}–12 & 1–{COMP_END_MONTH}"

    def _build_table(df_sub: pd.DataFrame) -> Table:
        if COMP_START_MONTH <= COMP_END_MONTH:
            mask = (df_sub["month"] >= COMP_START_MONTH) & (df_sub["month"] <= COMP_END_MONTH)
        else:
            mask = (df_sub["month"] >= COMP_START_MONTH) | (df_sub["month"] <= COMP_END_MONTH)

        fy24 = df_sub[(df_sub["fiscal_year"] == 2024) & mask]
        fy25 = df_sub[(df_sub["fiscal_year"] == 2025) & mask]

        rows = [["Metrics", "FY24", "FY25", "YoY"]]
        for m, agg in COMPARISON_METRICS.items():
            v24 = getattr(fy24[m], agg)() if hasattr(fy24[m], agg) else fy24[m].sum()
            v25 = getattr(fy25[m], agg)() if hasattr(fy25[m], agg) else fy25[m].sum()
            yoy = "N/A" if v24 == 0 else f"{(v25 / v24 - 1):+.0%}"
            rows.append([m, f"{int(v24):,}", f"{int(v25):,}", yoy])

        tbl = Table(rows, colWidths=[150, 100, 100, 80])
        tbl.setStyle(TableStyle([
            ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#517D99")),
            ("TEXTCOLOR",  (0, 0), (-1, 0), colors.white),
            ("ALIGN",      (0, 0), (-1, -1), "CENTER"),
            ("FONTNAME",   (0, 0), (-1, 0), "Helvetica-Bold"),
            ("FONTSIZE",   (0, 0), (-1, 0), 9),
            ("BACKGROUND", (0, 1), (-1, -1), colors.whitesmoke),
            ("GRID",       (0, 0), (-1, -1), 0.5, colors.black),
        ]))
        return tbl

    elements.append(PageBreak())
    elements.append(Paragraph(f"Comparison Table (Months {period_text})", styles["Heading2"]))
    elements.append(Spacer(1, 6))

    # Page-1
    elements.append(Paragraph(f"Performance – {PAGE1_URL}", styles["Heading3"]))
    elements.append(_build_table(df_page1))
    elements.append(Spacer(1, 18))

    # Page-2
    elements.append(Paragraph(f"Performance – {PAGE2_URL}", styles["Heading3"]))
    elements.append(_build_table(df_page2))
    elements.append(Spacer(1, 24))


# -----------------------------------------------------------
# 7) Build PDF
# -----------------------------------------------------------
def add_metric_section(elements, title: str, df_sub: pd.DataFrame, page_url: str):
    elements.append(Paragraph(title, styles["Heading2"]))
    for metric in METRICS:
        tbl = monthly_table(df_sub, metric)
        img = save_bar_chart(df_sub, metric, page_url)
        elements.extend([
            Paragraph(metric, styles["Heading3"]),
            Table([[rl_table(tbl), RLImage(img, width=260, height=260)]], colWidths=[240,310]),
            Spacer(1,37)
        ])

def build_pdf(df: pd.DataFrame):
    pdf_filename = f"ga_lp_comparison_{datetime.now():%Y%m%d}.pdf"
    doc = SimpleDocTemplate(pdf_filename, pagesize=A4,
                            leftMargin=30, rightMargin=30)
    els = []

    # ---------- Cover ----------
    last_day_prev_month = (datetime.now().replace(day=1) - timedelta(days=1)).strftime('%Y-%m-%d')
    els += [
        Spacer(1, A4[1]/2 - 130),
        Paragraph("Organic Search Landing Page Report", styles["Title"]),
        Spacer(1, 12),
        Paragraph(f"Comparing '{PAGE1_URL}' vs '{PAGE2_URL}'", styles["Title"]),
        Spacer(1, 12),
        Paragraph(datetime.now().strftime("%Y-%m"), styles["Title"]),
        Spacer(1, 200),
        Paragraph("Created by: Shohei", styles["BodyText"]),
        Paragraph(f"Pages: {PAGE1_URL} vs {PAGE2_URL}", styles["BodyText"]),
        Paragraph("Data Source: Google Analytics 4", styles["BodyText"]),
        Paragraph(f"Data Range: {DATA_START} – {last_day_prev_month}", styles["BodyText"]),
        PageBreak(),
    ]

    # ---------- Performance trend pages ----------
    df_p1 = df[df["landingPage"] == PAGE1_URL]
    df_p2 = df[df["landingPage"] == PAGE2_URL]

    add_metric_section(els, f"Performance Trend – {PAGE1_URL}", df_p1, PAGE1_URL)
    els.append(PageBreak())
    add_metric_section(els, f"Performance Trend – {PAGE2_URL}", df_p2, PAGE2_URL)

    # ---------- NEW: Comparison tables ----------
    add_comparison_section(els, df_p1, df_p2)

    # ---------- NEW: Notes page ----------
    els.append(PageBreak())
    els += [
        Paragraph("Notes", styles["Heading2"]),
        Spacer(1, 6),
        Paragraph(
            "This document contains confidential information intended solely for the designated recipients.",
            styles['BodyText']),
        Paragraph(
            "Unauthorized use, disclosure, or copying of this document is strictly prohibited.",
            styles['BodyText']),
        Spacer(1, 14),
        Paragraph("Explanation of Metrics", styles["Heading3"]),
        Spacer(1, 6),
    ]

    for k, v in metric_notes.items():
        els.append(Paragraph(f"<bullet>&bull;</bullet> <b>{k}</b>: {v}", styles["BodyText"]))
        els.append(Spacer(1, 6))


    # ---------- Build ----------
    doc.build(els)
    logging.info("PDF generated → %s", pdf_filename)



In [18]:
# -----------------------------------------------------------
# 8) Run end‑to‑end
# -----------------------------------------------------------
try:
    df_all = load_ga4_data()
    build_pdf(df_all)
except Exception as err:
    logging.error("Process failed: %s", err)