以下のセルでGSを更新したら、Excel形式でダウンロードし、ローカルのJupyter NotebookでPPTを複製し貼り付ける

In [None]:
#@title 初期設定

import pandas as pd
import pytz
import re
import calendar
from datetime import datetime, timedelta
from IPython.display import clear_output

try:
    from megaton import start
except ModuleNotFoundError:
    %pip install -U -q git+https://github.com/mak00s/megaton
    from megaton import start

def get_past_date(n_days=None, n_months=None, first_or_last=None, timezone="Asia/Tokyo"):
    """
    Returns today's date, the first day of the current month, a date N days ago,
    or the first/last day of N months ago in 'YYYY-MM-DD' format.

    Args:
        n_days (int, optional): The number of days ago. Default is None.
        n_months (int, optional): The number of months ago. Default is None.
        first_or_last (str, optional): 'first' or 'last' to specify the first or last day of the month. Default is None.
        timezone (str): Timezone for the calculation. Default is 'Asia/Tokyo'.

    Returns:
        str: The calculated date in 'YYYY-MM-DD' format.

    Raises:
        ValueError: If both 'n_days' and 'n_months' are provided, or if 'first_or_last' is invalid.
    """
    if n_days is not None and n_months is not None:
        raise ValueError("Specify either 'n_days' or 'n_months', but not both.")
    if first_or_last and first_or_last not in ['first', 'last']:
        raise ValueError("Invalid value for 'first_or_last'. Use 'first' or 'last'.")

    # Current datetime in the specified timezone
    now = datetime.now(pytz.timezone(timezone))

    if n_days is not None:
        # Calculate N days ago
        result_date = now - timedelta(days=n_days)
    elif n_months is not None:
        # Calculate the first or last day of N months ago
        year = now.year
        month = now.month - n_months
        while month <= 0:
            year -= 1
            month += 12
        if first_or_last == 'first':
            result_date = datetime(year, month, 1, tzinfo=pytz.timezone(timezone))
        elif first_or_last == 'last':
            last_day = calendar.monthrange(year, month)[1]
            result_date = datetime(year, month, last_day, tzinfo=pytz.timezone(timezone))
        else:
            # Default to the start of the month if first_or_last is None
            result_date = datetime(year, month, 1, tzinfo=pytz.timezone(timezone))
    else:
        # Default to today or the first day of the current month
        if first_or_last == 'first':
            result_date = datetime(now.year, now.month, 1, tzinfo=pytz.timezone(timezone))
        else:
            result_date = now

    return result_date.strftime('%Y-%m-%d')

def save_to_google_sheet(gs_url, sheet_name, df, sort_by=None):
    """
    Saves a DataFrame to a Google Sheet. If the sheet exists, it clears the existing data;
    otherwise, it creates a new sheet. Adjusts column widths based on the data and freezes the first row.

    Args:
        gs_url (str): The URL of the Google Sheets document.
        sheet_name (str): The name of the sheet to save the data.
        df (pd.DataFrame): The DataFrame to save.
    """
    def calculate_pixel_size(value, single_byte_multiplier=7, multi_byte_multiplier=14):
        """
        Calculates the pixel size for a given value, accounting for multi-byte characters.

        Args:
            value (str): The string value to calculate the size for.
            single_byte_multiplier (int): Width multiplier for single-byte characters.
            multi_byte_multiplier (int): Width multiplier for multi-byte characters.

        Returns:
            int: The calculated pixel size.
        """
        total_width = 0
        for char in str(value):
            if ord(char) < 128:  # Single-byte character
                total_width += single_byte_multiplier
            else:  # Multi-byte character
                total_width += multi_byte_multiplier
        return total_width

    # Sort the DataFrame if specified
    if sort_by:
        df = df.sort_values(by=sort_by, ascending=False)

    if mg.open.sheet(gs_url):
        try:
            # Try to create a new sheet
            mg.gs._driver.add_worksheet(title=sheet_name, rows=10, cols=10)
        except Exception:
            # If the sheet already exists, select it and clear the data
            mg.gs.sheet.select(sheet_name)
            mg.gs.sheet.clear()

        # Save the DataFrame to the sheet
        mg.save.to.sheet(df=df, sheet_name=sheet_name)

        # Get the sheet object and its ID
        sheet = mg.gs._driver.worksheet(sheet_name)
        sheet_id = sheet.id

        # Calculate column widths
        column_widths = []
        for col_name in df.columns:
            max_length = max(
                df[col_name].astype(str).map(
                    lambda x: calculate_pixel_size(x)
                ).max(),
                calculate_pixel_size(col_name)
            )
            pixel_size = max(min(max_length, 500), 50)  # Minimum width: 50, Maximum width: 500
            column_widths.append(pixel_size)

        # Prepare batch update requests for column resizing and freezing the first row
        requests = [
            # Column resizing
            {
                "updateDimensionProperties": {
                    "range": {
                        "sheetId": sheet_id,
                        "dimension": "COLUMNS",
                        "startIndex": i,
                        "endIndex": i + 1
                    },
                    "properties": {"pixelSize": column_widths[i]},
                    "fields": "pixelSize"
                }
            }
            for i in range(len(column_widths))
        ]

        # Add request to freeze the first row
        requests.append({
            "updateSheetProperties": {
                "properties": {
                    "sheetId": sheet_id,
                    "gridProperties": {"frozenRowCount": 1}
                },
                "fields": "gridProperties.frozenRowCount"
            }
        })

        # Execute batch update
        mg.gs._driver.batch_update({"requests": requests})
        print(f"Data successfully saved to the sheet: {sheet_name}, column widths adjusted, and first row frozen.")
    else:
        print("Failed to open the Google Sheets document.")

def update_sheets_cells(cells_to_update):
    """
    Updates specific cells across multiple sheets in Google Sheets with provided values.

    Args:
        cells_to_update (dict): A dictionary where keys are sheet names, and values are
                                dictionaries mapping cell names to their values.

                                Example:
                                {
                                    "Page": {"I1": From, "K1": __To},
                                    "Summary": {"A1": "Report Start", "B1": From}
                                }

    Returns:
        None
    """
    try:
        for sheet_name, updates in cells_to_update.items():
            # Select the target sheet
            mg.gs.sheet.select(sheet_name)

            # Update the specified cells with the provided values
            for cell, value in updates.items():
                mg.gs.sheet._driver.update_acell(cell, value)

            print(f"Successfully updated sheet '{sheet_name}' with updates: {updates}")
    except Exception as e:
        print(f"An error occurred while updating the sheets: {e}")

CREDS_PATH = "/nbs/key/sa-shiseido-corp-dts.json"
mg = start.Megaton(CREDS_PATH, use_ga3=False)

GA4_ACCOUNT = '151965783'  #param {type:"string"}
GA4_PROPERTY = '334854563'  #param {type:"string"}
mg.ga['4'].account.select(GA4_ACCOUNT)
mg.ga['4'].property.select(GA4_PROPERTY)

clear_output()

#@markdown サイト定義
DOMAIN_PATTERN = "corp.shiseido.com" #param {type:"string"}
PAGE_PATTERN = "/deilab/" #@param {type:"string"}

#@markdown 書き込むGoogle SheetsのURL
GS_URL = "https://docs.google.com/spreadsheets/d/1F-TpvVDEX_g8n7aREggkXOUvREd9q7Yv-0EtbObKqes/" #@param {type:"string"}

# レポート期間
# now = datetime.now(pytz.timezone("Asia/Tokyo"))

# From = (now.replace(day=1) - timedelta(days=1)).replace(day=1).strftime('%Y-%m-%d')
From = get_past_date(n_months=1, first_or_last='first')
__To = get_past_date(n_months=1, first_or_last='last')

# From = "2024-03-07"  #上書きする場合
# __To = "2025-09-30"  #@param {type:"date"}

print(f"対象期間：{From}〜{__To}")
mg.report.set_dates(From, __To)

print("準備ができました。次のセルへ進んでください。")

対象期間：2025-11-01〜2025-11-30
準備ができました。次のセルへ進んでください。


## GA4データ抽出

### ページ

In [None]:
#@title ページ別の指標 →df_p

mg.report.set_dates(From, __To)

# ページ別のUU・SS・PV
event_name, metrics_name = ("page_view", "pv")
mg.report.run(
    d = [("pagePath", "page")],
    m = [("totalUsers", "uu"), "sessions", ("eventCount", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName=={event_name}",
    sort = "-sessions"
)
_df = mg.report.data


# ページ別の精読率
event_name = "footer_view"
metrics_name = event_name + "s"
mg.report.run(
    d = [("pagePath", "page")],
    m = [("eventCount", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName=={event_name}",
    sort = "-eventCount"
)
_df = _df.merge(mg.report.data, on='page', how='left')
_df[metrics_name] = _df[metrics_name].fillna(0).astype(int)


# ページ別のPDF Click
event_name, metrics_name = ("pdf_click", "pdf_views")
mg.report.run(
    d = [
        ("pagePath", "page"),
        # "linkUrl"
    ], m = [("eventCount", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName=={event_name}",
    sort = "-eventCount"
)
try:
    _df = _df.merge(mg.report.data, on='page', how='left')
    _df[metrics_name] = _df[metrics_name].fillna(0).astype(int)
except TypeError:
    # _df = _df
    _df[metrics_name] = 0


# YouTube再生
event_name, metrics_name = ("video_start", "video_views")
mg.report.run(
    d = [
        ("pagePath", "page"),
        # "videoTitle"
    ], m = [("eventCount", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName=={event_name}",
    sort = "-eventCount"
)
try:
    _df = _df.merge(mg.report.data, on='page', how='left')
    _df[metrics_name] = _df[metrics_name].fillna(0).astype(int)
except TypeError:
    # df_y = _df
    _df[metrics_name] = 0

event_name = "video_complete"
metrics_name = event_name
mg.report.run(
    d = [
        ("pagePath", "page"),
        # "videoTitle"
    ], m = [("eventCount", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName=={event_name}",
    sort = "-eventCount"
)
try:
    _df = _df.merge(mg.report.data, on='page', how='left')
    _df[metrics_name] = _df[metrics_name].fillna(0).astype(int)
except TypeError:
    # df_pfday = _df
    _df[metrics_name] = 0


clear_output()
df_p = _df
# mg.show.table(df_p, rows=10)

# Google Sheetsへ保存

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_page'  #@param {type:"string"}

save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_p)

update_sheets_cells({"Page": {"J1": From, "L1": __To}})

Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_page」シートを選択しました。
データを「_page」シートへ反映しました。
Data successfully saved to the sheet: _page, column widths adjusted, and first row frozen.
Successfully updated sheet 'Page' with updates: {'J1': '2025-11-01', 'L1': '2025-11-30'}


In [None]:
#@title Pageの日別指標 →df_d

mg.report.run(
    d = ["date", ("pagePath", "page")],
    m = [("totalUsers", "uu"), "sessions", ("eventCount", "pv")],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName==page_view",
    sort = "date"
)
df_d = mg.report.data

# mg.show.table(df_d, rows=10)

# Google Sheetsへ保存

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_page-d'  #@param {type:"string"}

save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_d)

Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_page-d」シートを選択しました。
データを「_page-d」シートへ反映しました。
Data successfully saved to the sheet: _page-d, column widths adjusted, and first row frozen.


In [None]:
#@title Pageの月別指標 →df_m

mg.report.set_dates("2024-03-07", __To)

mg.report.run(
    d = ["yearMonth", ("pagePath", "page")],
    m = [("totalUsers", "uu"), "sessions", ("eventCount", "pv")],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};eventName==page_view",
    sort = "yearMonth"
)
df_m = mg.report.data

# Convert yearMonth column to datetime and format it without time
df_m['yearMonth'] = pd.to_datetime(df_m['yearMonth'], format='%Y%m').dt.strftime('%Y-%m-%d')

mg.report.set_dates(From, __To)

# mg.show.table(df_m, rows=5)

# Google Sheetsへ上書き保存

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_page-m'  #@param {type:"string"}

save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_m)

Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_page-m」シートを選択しました。
データを「_page-m」シートへ反映しました。
Data successfully saved to the sheet: _page-m, column widths adjusted, and first row frozen.


### 流入元

In [None]:
#@title 流入元 →df_c

mg.report.run(
    d = [
        ("sessionDefaultChannelGroup", "channel"),
        ("sessionSource", "source"),
        "landingPage",
        # "pageReferrer",
    ], m = [
        ("sessions", "entrances"),
        # ("eventCount", "pv")
    ],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};landingPage=~{PAGE_PATTERN};eventName==page_view",
    sort = "-sessions"
)
df_c = mg.report.data
# df_c['landingPage'] = df_c['landingPage'] + '/'
df_c['landingPage'] = df_c['landingPage'].apply(lambda x: x if x.endswith(('.html', '/')) else x + '/')

def classify_source_channel(row):
    ch = str(row.get("channel", ""))
    med = str(row.get("medium", "")).lower()
    src = str(row.get("source", "")).lower().replace("www.", "")

    # --- AI チャネル ---
    ai_patterns = {
        "ChatGPT": r"(chatgpt|chat\.openai\.com)",
        "Copilot": r"(copilot|bing\.com|microsoftcopilot)",
        "Gemini": r"(gemini|bard|aistudio\.google\.com|makersuite\.google\.com)",
        "Claude": r"(claude|anthropic\.com)",
        "Perplexity": r"(perplexity|pplx\.ai)"
    }
    for ai_name, pattern in ai_patterns.items():
        if re.search(pattern, src) or re.search(pattern, med):
            return ai_name.capitalize(), "AI"

    # --- 内部アクセス ---
    if re.search(r"(extra\.shiseido\.co\.jp|(spark|international|intra).shiseido.co.jp|office\.net|sharepoint|teams|basement\.jp|yammer)", src):
        return src, "Shiseido Internal"

    # --- Organic Search ---
    if re.search(r"(service\.smt\.docomo\.ne\.jp|search|jp\.hao123\.com|\.jword\.jp)", src):
        return src, "Organic Search"

    # --- SNS系 (Organic Social) ---
    if re.search(r"(t\.co|twitter)", src):
        return "Twitter", "Organic Social"
    if "instagram" in src:
        return "Instagram", "Organic Social"
    if "facebook" in src:
        return "Facebook", "Organic Social"
    if "threads" in src:
        return "Threads", "Organic Social"
    if "tiktok" in src:
        return "TikTok", "Organic Social"

    # --- Referral → Organic Search 再分類 ---
    if ch == "Referral" and "search" in src:
        return src, "Organic Search"

    # fallback
    return row.get("source", ""), ch

df_c[["source", "channel"]] = df_c.apply(classify_source_channel, axis=1, result_type="expand")

mg.show.table(df_c, rows=10)

Unnamed: 0,channel,source,landingPage,entrances
0,Organic Search,google,/deilab/jp/actions/shiseidoshock1/,3461
1,Organic Search,yahoo,/deilab/jp/actions/shiseidoshock1/,464
2,Organic Search,google,/deilab/jp/actions/shiseidoshock2/,291
3,Organic Search,bing,/deilab/jp/actions/shiseidoshock1/,149
4,Organic Social,Twitter,/deilab/jp/actions/shiseidoshock1/,133
...,...,...,...,...
148,Shiseido Internal,statics.teams.cdn.office.net,/deilab/en/research/,1
149,Unassigned,(not set),/deilab/jp/,1
150,AI,Chatgpt,/deilab/jp/actions/shiseidoshock1/,1
151,AI,Chatgpt,/deilab/jp/research/bias/,1


In [None]:
#@title Google Sheetsへ保存する

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_ch'  #@param {type:"string"}

# if mg.open.sheet(GS_URL):
#     mg.save.to.sheet(df=df_c, sheet_name=Sheet)  # 上書き
save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_c)

Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_ch」シートを選択しました。
データを「_ch」シートへ反映しました。
Data successfully saved to the sheet: _ch, column widths adjusted, and first row frozen.


In [None]:
#@title Referralの内訳 →df_r
df_r = df_c[df_c['channel']=='Referral'].groupby(['source']).agg({'entrances': sum}).reset_index().sort_values(by='entrances', ascending=False)

# mg.show.table(df_r, rows=5)

# Google Sheetsへ保存する

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_ref'  #@param {type:"string"}

save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_r)

  df_r = df_c[df_c['channel']=='Referral'].groupby(['source']).agg({'entrances': sum}).reset_index().sort_values(by='entrances', ascending=False)


Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_ref」シートを選択しました。
データを「_ref」シートへ反映しました。
Data successfully saved to the sheet: _ref, column widths adjusted, and first row frozen.


In [None]:
#@title 流入元の月別 →ch_m

mg.report.set_dates("2024-03-07", __To)

mg.report.run(
    d = [
        "yearMonth",
         ("sessionDefaultChannelGroup", "channel"),
        ("sessionSource", "source"),
    ], m = [
        ("totalUsers", "uu"), "sessions"
    ],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};landingPage=~{PAGE_PATTERN};eventName==page_view",
    sort = "yearMonth"
)
df_m = mg.report.data

# Convert yearMonth column to datetime and format it without time
df_m['yearMonth'] = pd.to_datetime(df_c['yearMonth'], format='%Y%m').dt.strftime('%Y-%m-%d')

df_m[["source", "channel"]] = df_m.apply(classify_source_channel, axis=1, result_type="expand")

mg.report.set_dates(From, __To)

# Google Sheetsへ保存する

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_ch-m'  #@param {type:"string"}
save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_m)

Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_ch-m」シートを選択しました。
データを「_ch-m」シートへ反映しました。
Data successfully saved to the sheet: _ch-m, column widths adjusted, and first row frozen.


### LP

In [None]:
#@title 流入 →df_lp

# 流入後 →df_lpm
events = [
    # "pdf_click",
    "footer_view",
    # "video_start",
    # "video_complete"
]

mg.report.run(
    d = [
        ("landingPage", "page"),
        ("eventName", "event"),
    ], m = [("eventCount", "count")],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath=~{PAGE_PATTERN};landingPage=~{PAGE_PATTERN};eventName=~{'|'.join(events)}",
    sort = "-eventCount"
)
_df = mg.report.data
# _df['page'] = _df['page'] + '/'
_df['page'] = _df['page'].apply(lambda x: x if x.endswith(('.html', '/')) else x + '/')

# pivot
_df2 = _df.pivot(index='page', columns='event', values='count').reset_index()
# _df2 = _df2.fillna(0).astype({event: int for event in events})
for event in events:
    try:
        _df2 = _df2.fillna(0).astype({event: int})
    except:
        _df2[event] = 0

# mg.show.table(_df2, rows=10)


_df = df_c.groupby(['landingPage']).sum('entrances').reset_index()
_df.rename(columns={'landingPage': 'page'}, inplace=True)
column_order = _df.columns.to_list() + events
df_lpm = _df.merge(_df2, on='page', how='left')[column_order]
df_lpm = df_lpm.fillna(0).astype({event: int for event in events})

# mg.show.table(df_lpm, rows=10)


# corp内PV →df_lpm2
metrics_name = "corp_pv"
mg.report.run(
    d = [
        ("landingPage", "page"),
        # ("pagePath", "page"),
    ], m = [("eventCount", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};landingPage=~{PAGE_PATTERN};pagePath!~{PAGE_PATTERN};eventName==page_view",
    sort = "-eventCount"
)
_df = mg.report.data
# _df['page'] = _df['page'] + '/'
_df['page'] = _df['page'].apply(lambda x: x if x.endswith(('.html', '/')) else x + '/')

df_lp = df_lpm.merge(_df, on='page', how='left')
df_lp[metrics_name] = df_lp[metrics_name].fillna(0).astype(int)


mg.show.table(df_lp, rows=10)

Unnamed: 0,page,entrances,footer_view,corp_pv
0,/deilab/en/,14,6,8
1,/deilab/en/actions/,3,4,24
2,/deilab/en/actions/nlw/,4,1,2
3,/deilab/en/actions/session/,5,0,0
4,/deilab/en/actions/shiseidoshock1/,3,1,0
5,/deilab/en/actions/shiseidoshock2/,11,4,13
6,/deilab/en/actions/wetforce/,2,1,0
7,/deilab/en/news/,2,1,0
8,/deilab/en/research/,2,1,0
9,/deilab/en/research/bias/,4,0,0


In [None]:
#@title Google Sheetsへ保存する

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_lp'  #@param {type:"string"}

# if mg.open.sheet(GS_URL):
#     mg.save.to.sheet(df=df_lp, sheet_name=Sheet)  # 上書き
save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_lp)

Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_lp」シートを選択しました。
データを「_lp」シートへ反映しました。
Data successfully saved to the sheet: _lp, column widths adjusted, and first row frozen.


### Click

In [None]:
#@title 次のページ →df_c
event_name, metrics_name = ("page_view", "click")

mg.report.run(
    d = [("pageReferrer", "referrer"), ("pagePath", "page")],
    m = [("totalUsers", "uu"), ("sessions", metrics_name)],
    filter_d = f"hostName=~{DOMAIN_PATTERN};pagePath!~{PAGE_PATTERN};pageReferrer=~{PAGE_PATTERN};eventName=={event_name}",
    sort = "-sessions"
)
mg.report.prep({
    "referrer": {  # 対象のカラム
        "cut": ["\?.*$", "^https://corp.shiseido.com"],  # 正規表現でカット（複数並べられる）
    },
})
# df_c = mg.report.data
df_c = mg.report.data.groupby(
    ['referrer','page']
).sum().reset_index().sort_values(by="referrer")

mg.show.table(df_c, rows=10)

# Google Sheetsへ保存する

#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_click'  #@param {type:"string"}

# if mg.open.sheet(GS_URL):
#     mg.save.to.sheet(df=df_c, sheet_name=Sheet)  # 上書き
save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df_c)

  "cut": ["\?.*$", "^https://corp.shiseido.com"],  # 正規表現でカット（複数並べられる）


Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_click」シートを選択しました。
データを「_click」シートへ反映しました。
Data successfully saved to the sheet: _click, column widths adjusted, and first row frozen.


# Search Consoleデータ抽出

In [None]:
#@title GSCからデータを取得して加工

try:
    import jaconv
except ModuleNotFoundError:
    %pip install -q jaconv
    import jaconv
    clear_output()

from google.oauth2 import service_account
from googleapiclient.discovery import build
import logging

# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Connect Google Search Console
def connect_to_gsc_OLD(credentials_path):
    """
    Connects to the Google Search Console API using a service account key.

    Args:
        credentials_path (str): Path to the service account key file.

    Returns:
        object: Google Search Console service object.
    """
    scope = ['https://www.googleapis.com/auth/webmasters']
    credentials = service_account.Credentials.from_service_account_file(
        credentials_path, scopes=scope
    )
    return build('searchconsole','v1',credentials=credentials)

def connect_to_gsc(credentials_path, scopes=None):
    """
    Establishes a connection to Google Search Console.
    """
    scopes = scopes or ['https://www.googleapis.com/auth/webmasters']
    try:
        credentials = service_account.Credentials.from_service_account_file(credentials_path, scopes=scopes)
        return build('searchconsole', 'v1', credentials=credentials)
    except Exception as e:
        logger.error(f"Failed to connect to GSC: {e}")
        raise

def execute_gsc_query_OLD(service, site_url, payload):
    """
    Executes a query to the Google Search Console API and processes the results.

    Args:
        service (object): GSC service object.
        site_url (str): The site property URL in Google Search Console.
        payload (dict): The query payload.

    Returns:
        pd.DataFrame: DataFrame containing query results.
    """
    try:
        response = service.searchanalytics().query(siteUrl=site_url, body=payload).execute()
    except Exception as e:
        print(f"Error during API call: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on error

    rows = response.get('rows', [])
    if not rows:
        print("No data found in the response.")
        return pd.DataFrame()

    # Process each row in the API response
    results = []
    for row in rows:
        data = {
            dimension: row['keys'][i] for i, dimension in enumerate(payload['dimensions'])
        }
        data['clicks'] = row['clicks']
        data['impressions'] = row['impressions']
        results.append(data)

    return pd.DataFrame(results)

# Execute a GSC query
def execute_gsc_query(service, site_url, payload):
    """
    Executes a query to the GSC API and returns results as a DataFrame.
    """
    results, start_row = [], 0

    try:
        while True:
            payload["startRow"] = start_row
            response = service.searchanalytics().query(siteUrl=site_url, body=payload).execute()
            rows = response.get('rows', [])
            if not rows:
                break
            results.extend(rows)
            start_row += len(rows)

        # Process rows into a DataFrame
        dimensions = payload["dimensions"]
        data = [
            {**{dim: row['keys'][i] for i, dim in enumerate(dimensions)},
             "clicks": row.get("clicks", 0),
             "impressions": row.get("impressions", 0)}
            for row in results
        ]
        return pd.DataFrame(data)
    except Exception as e:
        logger.error(f"Error during API call: {e}")
        return pd.DataFrame()

# Group similar queries with normalization
def group_similar_queries(df, query_column="query", metric_columns=None, additional_dimensions=None, normalize_columns=None):
    """
    Groups similar queries by normalizing and aggregating metrics.
    Includes normalization of specified columns.
    """
    if query_column not in df.columns:
        raise ValueError(f"Column '{query_column}' not found in the DataFrame.")

    # Normalize specified columns
    normalize_columns = normalize_columns or [query_column]
    for column in normalize_columns:
        if column in df.columns:
            df[column] = df[column].apply(lambda x: jaconv.h2z(x, kana=True) if pd.notnull(x) else x)
            df[column] = df[column].apply(lambda x: jaconv.z2h(x, kana=False, ascii=True, digit=True) if pd.notnull(x) else x)

    # Metric and additional dimensions setup
    metric_columns = metric_columns or df.select_dtypes(include=["number"]).columns.tolist()
    additional_dimensions = additional_dimensions or [col for col in df.columns if col not in metric_columns + [query_column]]

    # Normalize queries for grouping
    def normalize_query(query):
        return "".join(sorted(query.replace(" ", "").replace("　", ""))) if pd.notnull(query) else query

    df["normalized_query"] = df[query_column].apply(normalize_query)

    # Aggregate metrics
    agg_rules = {query_column: "first", **{col: "sum" for col in metric_columns}}
    grouped = df.groupby(["normalized_query"] + additional_dimensions, as_index=False).agg(agg_rules)
    return grouped.drop(columns=["normalized_query"])

def normalize_query_column_OLD(df, query_columns):
    """
    Normalizes the 'query' column by 半角カナを全角カナへ、英数字を半角へ

    Args:
        df (pd.DataFrame): The input DataFrame.
        query_columns (list): The names of the columns to normalize.

    Returns:
        pd.DataFrame: DataFrame with normalized query column.
    """
    for column in query_columns:
        if column in df.columns:
            df[column] = df[column].apply(lambda x: jaconv.h2z(x, kana=True) if pd.notnull(x) else x)  # 半角カナを全角カナへ
            df[column] = df[column].apply(lambda x: jaconv.z2h(x, kana=False, ascii=True, digit=True) if pd.notnull(x) else x)  # 英数字を半角へ
    return df

def query_web_gsc_OLD(credentials_path, site_url, filter_pattern, start_date, end_date, dimensions=("query", "page")):
    """
    Queries Google Search Console data with a filter and processes the results.

    Args:
        credentials_path (str): Path to the service account key file.
        site_url (str): The site property URL in Google Search Console.
        filter_pattern (str): The regex or filter expression for the page.
        start_date (str): Query start date (YYYY-MM-DD).
        end_date (str): Query end date (YYYY-MM-DD).
        dimensions (tuple): Dimensions to include in the query (default: ('query', 'page')).

    Returns:
        pd.DataFrame: Processed DataFrame containing query and page metrics.
    """
    # Step 1: Connect to GSC
    service = connect_to_gsc(credentials_path)

    # Step 2: Prepare payload
    payload = {
        'startDate': start_date,
        'endDate': end_date,
        'dimensions': list(dimensions),
        'type': 'web',
        "dimensionFilterGroups": [
            {"filters": [{
                "dimension": "page",
                "operator": "includingRegex", #contains/equals/notContains/notEquals/includingRegex/excludingRegex
                "expression": filter_pattern
            }]}
        ],
        "rowLimit": 25000,
        "startRow": 0,
    }

    # Step 3: Execute the query and process results
    df = execute_gsc_query(service, site_url, payload)

    # Step 4: Normalize the 'query' column
    normalize_columns = ["query", "page"]  # Specify the columns to normalize
    df = normalize_query_column(df, normalize_columns)

    # Step 5: Aggregate and sort results
    if not df.empty:
        df = (
            df.groupby(list(dimensions), as_index=False)
            .agg({'clicks': 'sum', 'impressions': 'sum'})
            .sort_values(by=['clicks', 'impressions'], ascending=[False, False])
        )

    return df

def query_web_gsc(credentials_path, site_url, filter_pattern, start_date, end_date, dimensions=["query", "page"], scopes=None):
    """
    Queries Google Search Console data with a filter and processes the results.
    """
    # Step 1: Connect to GSC
    service = connect_to_gsc(credentials_path, scopes)

    # Step 2: Prepare payload
    payload = {
        'startDate': start_date,
        'endDate': end_date,
        'dimensions': dimensions,
        'type': 'web',
        "dimensionFilterGroups": [
            {"filters": [{
                "dimension": "page",
                "operator": "includingRegex", #contains/equals/notContains/notEquals/includingRegex/excludingRegex
                "expression": filter_pattern
            }]}
        ],
        "rowLimit": 25000,
        "startRow": 0,
    }

    # Step 3: Execute the query and process results
    return execute_gsc_query(service, site_url, payload)

print(f"対象期間：{From}〜{__To}")

GSC_PROPERTY = "https://corp.shiseido.com"

FILTER_PATTERN = "/deilab/"
df = query_web_gsc(CREDS_PATH, GSC_PROPERTY, FILTER_PATTERN, From, __To)
#@markdown 結果を保存するGoogle Sheetsのシート名
Sheet = '_gsc'  #@param {type:"string"}
save_to_google_sheet(gs_url=GS_URL, sheet_name=Sheet, df=df[df['clicks'] > 0])

update_sheets_cells({"入口とKW": {"G1": From, "I1": __To}})

対象期間：2025-11-01〜2025-11-30
Googleスプレッドシート「DE&IラボGAレポート」を開きました。
「_gsc」シートを選択しました。
データを「_gsc」シートへ反映しました。
Data successfully saved to the sheet: _gsc, column widths adjusted, and first row frozen.
Successfully updated sheet '入口とKW' with updates: {'G1': '2025-11-01', 'I1': '2025-11-30'}
