# Megaton E2E Smoke Test (Manual)

**目的**
- Megaton を使った最小の end-to-end 動作確認（外部サービスにアクセスします）

**これは保証しません**
- 網羅的な正しさ・パフォーマンス・権限設定の完全性
- GA4 / Sheets / BQ のすべての組み合わせ

**安全メモ**
- Google Sheets への書き込みは **新しい一時シート** を作成して行います（既存シートは上書きしません）。

In [None]:
# Cell 1: Configuration (single place to edit)
import os
from datetime import datetime, timedelta

RUN_GA4 = True
RUN_SHEETS = True
RUN_BQ = True
RUN_GSC = False  # optional
RUN_DRIVE = False  # optional

# 1-day range by default
DATE_TO = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
DATE_FROM = DATE_TO

# Optional env-based settings (loaded after .env in Cell 3 if empty)
GS_URL = os.environ.get("SMOKE_GS_URL", "")
BQ_PROJECT = os.environ.get("SMOKE_BQ_PROJECT", "")
BQ_DATASET = os.environ.get("SMOKE_BQ_DATASET", "")
GSC_SITE_URL = os.environ.get("SMOKE_GSC_SITE_URL", "")

TEMP_SHEET_NAME = f"_smoke_{datetime.utcnow().strftime('%Y%m%d_%H%M')}"
MEGATON_GIT_REF = os.environ.get("MEGATON_GIT_REF", "main")

In [None]:
# Cell 2: Environment detection + install
import sys
import subprocess

IN_COLAB = "google.colab" in sys.modules

MEGATON_AVAILABLE = False


def _pip_install(args):
    cmd = [sys.executable, "-m", "pip"] + args
    print("Running:", " ".join(cmd))
    subprocess.check_call(cmd)


try:
    import megaton  # noqa: F401
    MEGATON_AVAILABLE = True
except Exception:
    if IN_COLAB:
        pkg = f"git+https://github.com/mak00s/megaton@{MEGATON_GIT_REF}"
        _pip_install(["install", "-U", pkg])
        try:
            import megaton  # noqa: F401
            MEGATON_AVAILABLE = True
        except Exception as exc:
            print("Failed to import megaton after install:", exc)
    else:
        print("megaton is not installed. Trying editable install: pip install -e .")
        try:
            _pip_install(["install", "-e", "."])
            import megaton  # noqa: F401
            MEGATON_AVAILABLE = True
        except Exception as exc:
            print("Install failed. Please run: pip install -e .")
            print("Error:", exc)

print("IN_COLAB:", IN_COLAB)
print("MEGATON_AVAILABLE:", MEGATON_AVAILABLE)
print("MEGATON_GIT_REF:", MEGATON_GIT_REF)

In [None]:
# Cell 3: Load .env (simple)
import os

CREDS_JSON = None
READY = False

if not MEGATON_AVAILABLE:
    print("Skipped: megaton is not available.")
else:
    load_dotenv = None
    try:
        from dotenv import load_dotenv  # type: ignore
    except Exception:
        if IN_COLAB:
            try:
                _pip_install(["install", "python-dotenv"])
                from dotenv import load_dotenv  # type: ignore
            except Exception as exc:
                print("Failed to install python-dotenv:", exc)
        else:
            print("python-dotenv is not installed; skipping .env load.")

    if load_dotenv:
        load_dotenv(".env")

    # Refresh config values if they were empty
    if not GS_URL:
        GS_URL = os.environ.get("SMOKE_GS_URL", "")
    if not BQ_PROJECT:
        BQ_PROJECT = os.environ.get("SMOKE_BQ_PROJECT", "")
    if not BQ_DATASET:
        BQ_DATASET = os.environ.get("SMOKE_BQ_DATASET", "")
    if not GSC_SITE_URL:
        GSC_SITE_URL = os.environ.get("SMOKE_GSC_SITE_URL", "")

    CREDS_JSON = os.environ.get("MEGATON_CREDS_JSON")

    if not CREDS_JSON:
        print("MEGATON_CREDS_JSON is not set.")
        print("Create .env and set MEGATON_CREDS_JSON to a single-line JSON string.")
        print("If you prefer multiline JSON, set CREDS_JSON manually in this cell.")
        READY = False
    else:
        READY = True

# Optional: set CREDS_JSON manually here (do not print it)
# CREDS_JSON = '{...}'

In [None]:
# Cell 4: Initialize Megaton
from megaton.start import Megaton
import pandas as pd

mg = None

if not READY:
    print("Skipped: credentials are not configured.")
else:
    try:
        mg = Megaton(CREDS_JSON, headless=True)
        print("Megaton initialized.")
    except Exception as exc:
        print("Failed to initialize Megaton:", exc)
        mg = None


def _select_first_ga4_property(mg_instance):
    ga4_client = mg_instance.ga.get("4") if mg_instance else None
    if not ga4_client:
        return False
    if not ga4_client.accounts:
        return False
    account_id = ga4_client.accounts[0]["id"]
    ga4_client.account.select(account_id)
    if not ga4_client.account.properties:
        return False
    property_id = ga4_client.account.properties[0]["id"]
    ga4_client.property.select(property_id)
    print("Selected GA4 property:", property_id)
    return True

In [None]:
# Cell 5: GA4 smoke (required if RUN_GA4)
if not RUN_GA4:
    print("Skipped: RUN_GA4 is False")
elif not mg:
    print("Skipped: Megaton not initialized")
else:
    if not mg.ga.get("4"):
        print("Skipped: GA4 client is not available")
    else:
        if not mg.ga["4"].property.id:
            if not _select_first_ga4_property(mg):
                print("Skipped: no GA4 properties available")
        if mg.ga["4"].property.id:
            try:
                mg.report.set_dates(DATE_FROM, DATE_TO)
                mg.report.run(
                    d=["date", "eventName"],
                    m=["eventCount"],
                )
                df = mg.report.data
                print("GA4 shape:", df.shape)
                display(df.head())
                assert isinstance(df, pd.DataFrame)
                assert {"date", "eventName", "eventCount"}.issubset(df.columns)
            except Exception as exc:
                print("GA4 smoke failed:", exc)

In [None]:
# Cell 6: Sheets smoke (optional if RUN_SHEETS)
if not RUN_SHEETS:
    print("Skipped: RUN_SHEETS is False")
elif not mg:
    print("Skipped: Megaton not initialized")
elif not GS_URL:
    print("Skipped: GS_URL not set")
else:
    try:
        mg.open.sheet(GS_URL)
        mg.gs.sheet.create(TEMP_SHEET_NAME)
        df = mg.report.data if mg and mg.report.data is not None else pd.DataFrame({"test": [1]})
        mg.save.to.sheet(TEMP_SHEET_NAME, df)
        print("Wrote to new sheet:", TEMP_SHEET_NAME)
    except Exception as exc:
        print("Sheets smoke failed:", exc)

In [None]:
# Cell 7: BigQuery smoke (optional if RUN_BQ)
if not RUN_BQ:
    print("Skipped: RUN_BQ is False")
elif not mg:
    print("Skipped: Megaton not initialized")
elif not BQ_PROJECT:
    print("Skipped: BQ_PROJECT not set")
else:
    try:
        bq_client = mg.launch_bigquery(BQ_PROJECT)
        df = bq_client.run("SELECT 1 AS test", to_dataframe=True)
        print("BQ result:")
        display(df)
        if BQ_DATASET:
            bq_client.dataset.select(BQ_DATASET)
            date1 = DATE_FROM.replace("-", "")
            date2 = DATE_TO.replace("-", "")
            sql = bq_client.ga4.get_query_to_flatten_events(date1, date2, event_parameters=[], user_properties=[])
            print("Flatten SQL (preview):")
            print(sql[:400])
    except Exception as exc:
        print("BigQuery smoke failed:", exc)

In [None]:
# Cell 8: Optional services (GSC / Drive)
if RUN_GSC:
    if not mg:
        print("Skipped: Megaton not initialized")
    elif not GSC_SITE_URL:
        print("Skipped: GSC_SITE_URL not set")
    else:
        try:
            sc = mg.launch_sc(GSC_SITE_URL)
            if sc:
                print("Search Console client initialized.")
        except Exception as exc:
            print("Search Console smoke failed:", exc)
else:
    print("Skipped: RUN_GSC is False")

if RUN_DRIVE:
    try:
        from megaton import mount_google_drive

        mount_google_drive()
    except Exception as exc:
        print("Drive mount failed:", exc)
else:
    print("Skipped: RUN_DRIVE is False")