This file is designed to work with Google Colab; for the data table, download the Excel file(df-ga-channel-group-report-monthly), update it as needed, and then upload and convert it to a Google Spreadsheet.

In [19]:
!pip install japanize-matplotlib
!pip install reportlab
!pip install --upgrade reportlab



In [20]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import japanize_matplotlib
from datetime import datetime, timedelta

# ReportLab で PDF を生成するために使うクラスや関数
from reportlab.lib.pagesizes import A4
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, PageBreak
)
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors
from reportlab.lib.units import inch

# Google Colab 用の認証関連
from google.colab import auth, drive
import gspread
from google.auth import default

In [21]:
#===============================================================================
# (1) 基本設定
#===============================================================================
FILENAME = "df-ga-channel-group-report-monthly"  # Google Sheetsファイル名
SHEET_NAME = "df_monthly_channel"               # シート名

DESIRED_CHANNELS = [
    'Organic Search',
    'Direct',
    'Organic Social',
    'Referral',
    'Unassigned'
]

METRICS = ['sessions', 'engagedSessions']

In [22]:
#===============================================================================
# (2) Google Sheetsからデータを読み込む準備
#===============================================================================
drive.mount('/content/drive', force_remount=True)
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

def load_sheet_data(spreadsheet_name, sheet_name):
    ss = gc.open(spreadsheet_name)
    worksheet = ss.worksheet(sheet_name)
    data = worksheet.get_all_values()
    df = pd.DataFrame(data[1:], columns=data[0])
    return df

df = load_sheet_data(FILENAME, SHEET_NAME)

Mounted at /content/drive


In [23]:
#===============================================================================
# (3) 前処理用の関数
#===============================================================================
def preprocess_df(df):
    exclude_columns = ['yearMonth', 'sessionDefaultChannelGroup']
    for column in df.columns:
        if column not in exclude_columns:
            df[column] = pd.to_numeric(df[column].astype(str).str.replace(',', ''), errors='coerce')

    df['yearMonth'] = pd.to_datetime(df['yearMonth'], format='%Y%m')
    df['year'] = df['yearMonth'].dt.year
    df['month'] = df['yearMonth'].dt.month

    df['fiscal_year'] = df['yearMonth'].apply(
        lambda x: 'FY23' if x < pd.Timestamp('2023-06-01')
        else 'FY24' if pd.Timestamp('2023-06-01') <= x <= pd.Timestamp('2024-05-31')
        else 'FY25' if pd.Timestamp('2024-06-01') <= x <= pd.Timestamp('2025-05-31')
        else None
    )

    return df

def custom_month_sort(df):
    month_order = {
        6: 1, 7: 2, 8: 3, 9: 4, 10: 5, 11: 6,
        12: 7, 1: 8, 2: 9, 3: 10, 4: 11, 5: 12
    }
    df['sort_key'] = df['month'].map(month_order)
    df = df.sort_values(by='sort_key').drop('sort_key', axis=1)
    return df

df = preprocess_df(df)

In [24]:
#===============================================================================
# (4) 各チャネルごとのデータを抜き出す
#===============================================================================
df_hub = {}
for ch in DESIRED_CHANNELS:
    df_channel = df[df['sessionDefaultChannelGroup'] == ch].copy()
    df_hub[ch] = df_channel

# All Channel も追加（全チャネル合計）
df_allchannel = df.groupby('yearMonth', as_index=False)[METRICS].sum()
df_allchannel = preprocess_df(df_allchannel)
df_hub["All Channel"] = df_allchannel

In [25]:
#===============================================================================
# (5) テーブル/グラフ生成用の関数
#===============================================================================
def generate_monthly_table(metric, df):
    df_agg = df.groupby(['fiscal_year', 'month'])[metric].sum().reset_index()
    df_pivot = df_agg.pivot_table(
        index='month',
        columns='fiscal_year',
        values=metric,
        fill_value=0
    ).reset_index()

    df_pivot = custom_month_sort(df_pivot)
    df_pivot.set_index('month', inplace=True)

    if 'FY23' in df_pivot.columns:
        df_pivot.drop(columns='FY23', inplace=True)

    for fy in ['FY24', 'FY25']:
        if fy not in df_pivot.columns:
            df_pivot[fy] = 0
        df_pivot[fy] = df_pivot[fy].astype(int)

    def yoy_func(row):
        if row['FY24'] == 0 or row['FY25'] == 0:
            return '-'
        return f"{int(round((row['FY25'] / row['FY24'] - 1) * 100))}%"
    df_pivot['YoY'] = df_pivot.apply(yoy_func, axis=1)

    df_pivot['MoM'] = df_pivot['FY25'].pct_change() * 100
    df_pivot['MoM'] = df_pivot['MoM'].apply(
        lambda x: '-' if not np.isfinite(x) else f"{int(round(x))}%"
    )

    total_fy24 = df_pivot['FY24'].sum()
    total_fy25 = df_pivot['FY25'].sum()
    total_row = pd.DataFrame({
        'FY24': [total_fy24],
        'FY25': [total_fy25],
        'YoY': ['-'],
        'MoM': ['-']
    }, index=['Total'])

    df_pivot = pd.concat([df_pivot, total_row])
    return df_pivot

def generate_monthly_graph(metric, df, channel_name, title):
    df_agg = df.groupby(['fiscal_year', 'month'])[metric].sum().reset_index()
    df_pivot = df_agg.pivot_table(
        index='month',
        columns='fiscal_year',
        values=metric,
        fill_value=0
    ).reset_index()

    df_pivot = custom_month_sort(df_pivot)
    df_pivot.set_index('month', inplace=True)

    for fy in ['FY24', 'FY25']:
        if fy not in df_pivot.columns:
            df_pivot[fy] = 0
        df_pivot[fy] = df_pivot[fy].astype(int)

    plt.figure(figsize=(6, 6))
    bar_width = 0.35
    index = np.arange(len(df_pivot.index))

    # 棒の色は例として指定（ユーザーの既存コード）:
    plt.bar(index, df_pivot['FY24'], bar_width, color='#6AC1B7', label='FY24')
    plt.bar(index + bar_width, df_pivot['FY25'], bar_width, color='#264E86', label='FY25')

    y_formatter = ScalarFormatter(useOffset=False)
    y_formatter.set_scientific(False)
    plt.gca().yaxis.set_major_formatter(y_formatter)

    plt.xlabel('Month')
    plt.ylabel(metric)
    plt.title(title)
    plt.xticks(index + bar_width / 2, df_pivot.index)
    plt.legend()
    plt.grid(True, linewidth=0.2, linestyle='--', axis='y')
    plt.tight_layout()

    image_dir = 'images'
    os.makedirs(image_dir, exist_ok=True)
    channel_sanitized = channel_name.replace(" ", "_")
    image_path = os.path.join(image_dir, f'{channel_sanitized}_{metric}.png')
    plt.savefig(image_path)
    plt.close()

    return image_path

def create_table(data, image_path, styles):
    formatted_data = []
    for row in data:
        formatted_row = []
        for val in row:
            if isinstance(val, (int, float)) and not isinstance(val, bool):
                formatted_row.append(f"{int(val):,}")
            else:
                formatted_row.append(val)
        formatted_data.append(formatted_row)

    table = Table(formatted_data, repeatRows=1)
    table_style = TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#517D99')),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
        ('FONTSIZE', (0, 0), (-1, -1), 7),
        ('LEFTPADDING', (0, 0), (-1, -1), 4),
        ('RIGHTPADDING', (0, 0), (-1, -1), 4),
        ('TOPPADDING', (0, 0), (-1, -1), 4),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 2),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.black)
    ])
    table.setStyle(table_style)

    month_col_index = 0
    table.setStyle(TableStyle([
        ('BACKGROUND', (month_col_index, 1), (month_col_index, -1), colors.whitesmoke)
    ]))

    try:
        img = Image(image_path)
        img.drawHeight = 260
        img.drawWidth = 260
    except Exception as e:
        img = Paragraph(f"Image not found: {e}", styles['BodyText'])

    return table, img

In [26]:
#===============================================================================
# (6) Ratio列を追加する関数
#===============================================================================
def add_ratio_columns(df_table, total_table):
    """
    df_table: 個別チャネルの monthly_table
    total_table: "All Channel" の monthly_table (同じ metric)
    同じ行（= 同じ month / 'Total'）で比較し、(個別チャネル / All Channel)*100 を計算。
    """
    for fy in ['FY24', 'FY25']:
        ratio_col = f"{fy} Ratio"
        if fy in df_table.columns and fy in total_table.columns:
            df_table[ratio_col] = df_table.apply(
                lambda row: '-'
                if total_table.loc[row.name, fy] == 0
                else f"{int(round((row[fy] / total_table.loc[row.name, fy]) * 100))}%",
                axis=1
            )
        else:
            df_table[ratio_col] = '-'
    return df_table

In [27]:
#===============================================================================
# (7) PDF 生成関数
#===============================================================================
def generate_pdf(datasets):
    current_date = datetime.now().strftime('%Y-%m-%d')
    pdf = SimpleDocTemplate(
        f"ga-channel-group-report-monthly-with-allchannel-ratio-{current_date}.pdf",
        pagesize=A4,
        leftMargin=0.8*inch,
        rightMargin=0.8*inch,
        topMargin=80,
        bottomMargin=80
    )
    styles = getSampleStyleSheet()
    elements = []

    #----------------------------------------
    # (A) カバーページ
    #----------------------------------------
    first_day_this_month = datetime(datetime.now().year, datetime.now().month, 1)
    last_day_prev_month = first_day_this_month - timedelta(days=1)
    last_day_prev_month_str = last_day_prev_month.strftime('%Y-%m-%d')

    cover_title = Paragraph('GA4 Channel Report - Monthly', styles['Title'])
    cover_title_2 = Paragraph('Default Channel Group', styles['Title'])
    previous_month = datetime.now() - timedelta(days=30)
    cover_date = Paragraph(f'{previous_month.strftime("%B %Y")}', styles['Title'])

    elements.append(Spacer(1, A4[1]/2 - 180))
    elements.append(cover_title)
    elements.append(cover_title_2)
    elements.append(Spacer(1, 12))
    elements.append(cover_date)
    elements.append(Spacer(1, 200))
    elements.append(Paragraph(
        f'Created by: Shohei on {current_date}',
        styles['BodyText']
    ))
    elements.append(Paragraph(
        f'Website: heysho.com',
        styles['BodyText']
    ))
    elements.append(Paragraph(
        'Data Source: GA4 - Default Channel Group',
        styles['BodyText']
    ))
    elements.append(Paragraph(
        f'Data Range: 2023-06-01 - {last_day_prev_month_str}',
        styles['BodyText']
    ))
    elements.append(PageBreak())

    #----------------------------------------
    # (B) 目次(Table of Contents)
    #----------------------------------------
    elements.append(Paragraph('Table of Contents', styles['Heading1']))
    elements.append(Spacer(1, 12))

    for idx, (_, _, title) in enumerate(datasets):
        anchor = f'section_{idx}'
        toc_entry = Paragraph(f'<link href="#{anchor}">{title}</link>', styles['BodyText'])
        elements.append(toc_entry)

    elements.append(Spacer(1, 20))
    elements.append(Paragraph("**Click to jump to the page", styles['Normal']))
    elements.append(PageBreak())

    #----------------------------------------
    # (C) データページ（チャネル別テーブル&グラフ）
    #----------------------------------------
    for idx, (df_table, image_path, title) in enumerate(datasets):
        df_table = df_table.reset_index()
        df_table.rename(columns={'index': 'Month'}, inplace=True)
        data = [df_table.columns.to_list()] + df_table.values.tolist()

        table_obj, image_obj = create_table(data, image_path, styles)

        anchor = f'section_{idx}'
        elements.append(Paragraph(f'<a name="{anchor}"/>{title}', styles['Heading2']))
        elements.append(Spacer(1, 6))

        col_layout = Table(
            [[table_obj, image_obj]],
            colWidths=[270, 260],
            style=[
                ('ALIGN', (0, 0), (0, 0), 'RIGHT'),
                ('ALIGN', (1, 0), (1, 0), 'LEFT')
            ]
        )
        elements.append(col_layout)
        elements.append(Spacer(1, 20))

        if (idx + 1) % 2 == 0:
            elements.append(PageBreak())

    #----------------------------------------
    # (D) 最終ページ
    #----------------------------------------
    elements.append(Paragraph('Usage Rights and License', styles['Heading3']))
    elements.append(Spacer(1, 6))
    elements.append(Paragraph(
        'The use of this template is restricted to personal purposes only. Any commercial use or provision to third parties is strictly prohibited. Redistribution of the template, as well as the redistribution of any modified version or derivative works that incorporate modifications, is prohibited in all forms. The sale, transfer, or public use (including online sharing) of any part or the entirety of the template is also prohibited.',
        styles['BodyText']
    ))

    elements.append(Spacer(1, 14))
    elements.append(Paragraph('Explanation of Metrics', styles['Heading3']))
    elements.append(Spacer(1, 6))

    metrics_explanations = [
        {
            "title": "sessions",
            "description": "Number of user visits to the website during a specified period."
        },
        {
            "title": "engagedSessions",
            "description": "Number of engaged sessions that happen on the website."
        },
        {
            "title": "MoM (Month-over-Month)",
            "description": "Percentage change from one month to the previous month."
        },
        {
            "title": "YoY (Year-over-Year)",
            "description": "Percentage change compared to the same month in the previous fiscal year."
        }
    ]
    for metric in metrics_explanations:
        text = f"<bullet>&bull;</bullet> <b>{metric['title']}</b>: {metric['description']}"
        elements.append(Paragraph(text, styles['BodyText']))
        elements.append(Spacer(1, 6))

    elements.append(Spacer(1, 12))
    elements.append(Paragraph('Explanation of Channels', styles['Heading3']))
    elements.append(Spacer(1, 6))

    channels_explanations = [
        {
            "title": "All Channel",
            "description": "Represents the total aggregate of all traffic sources combined."
        },
        {
            "title": "Organic Search",
            "description": "Traffic from unpaid search engine results."
        },
        {
            "title": "Paid Search",
            "description": "Traffic from paid ads on search engines."
        },
        {
            "title": "Paid Shopping",
            "description": "Traffic driven by paid product listings (e.g., Google Shopping)."
        },
        {
            "title": "Paid Other",
            "description": "Traffic from other paid campaigns (e.g., Line Ads)."
        },
        {
            "title": "Display",
            "description": "Traffic from banner ads on third-party websites."
        },
        {
            "title": "Paid Social",
            "description": "Traffic from paid social media ads."
        },
        {
            "title": "Organic Social",
            "description": "Traffic from unpaid social media posts."
        },
        {
            "title": "Email",
            "description": "Traffic driven by email marketing campaigns."
        },
        {
            "title": "Direct",
            "description": "Traffic from users entering the URL directly or via bookmarks."
        },
        {
            "title": "Referral",
            "description": "Traffic referred from other websites."
        },
        {
            "title": "Unassigned",
            "description": "Traffic that cannot be categorized into a specific channel."
        }
    ]

    for channel in channels_explanations:
        text = f"<bullet>&bull;</bullet> <b>{channel['title']}</b>: {channel['description']}"
        elements.append(Paragraph(text, styles['BodyText']))
        elements.append(Spacer(1, 6))

    elements.append(PageBreak())

    pdf.build(elements)
    print(f"PDF 'ga-channel-monthly-report-{current_date}.pdf' generated successfully.")


In [28]:
#===============================================================================
# (8) メイン処理：Ratio 列を追加して PDF 出力
#===============================================================================
datasets = []
all_channel_tables = {}  # "All Channel" テーブルを保存しておき、他チャネルで参照

all_channels_list = ["All Channel"] + DESIRED_CHANNELS

for idx, ch in enumerate(all_channels_list):
    df_data = df_hub.get(ch, pd.DataFrame())
    if df_data.empty:
        continue

    for metric in METRICS:
        # タイトル: 1-(a/b). チャネル名 - metric
        title_suffix = 'a' if metric == 'sessions' else 'b'
        title_number = idx + 1
        report_title = f"{title_number}-{title_suffix}. {ch} - {metric}"

        monthly_table = generate_monthly_table(metric, df_data)
        image_path = generate_monthly_graph(metric, df_data, ch, report_title)

        # (A) All Channel の場合、後で他チャネルが ratio を計算できるように保存
        if ch == "All Channel":
            # 保存
            all_channel_tables[metric] = monthly_table.copy()
            # 自身の ratio は常に 100%
            for fy in ['FY24', 'FY25']:
                ratio_col = f"{fy} Ratio"
                monthly_table[ratio_col] = "100%"
        else:
            # (B) 個別チャネルの場合は "All Channel" テーブルを参照して ratio を計算
            if metric in all_channel_tables:
                monthly_table = add_ratio_columns(monthly_table, all_channel_tables[metric])
            else:
                # 念のため、All Channel テーブルが無い場合はダッシュ表示
                for fy in ['FY24', 'FY25']:
                    monthly_table[f"{fy} Ratio"] = "-"

        datasets.append((monthly_table, image_path, report_title))

# PDF生成
generate_pdf(datasets)

PDF 'ga-channel-monthly-report-2025-04-13.pdf' generated successfully.
