In [1]:
import requests
from pprint import pprint

services_api = "https://aws.amazon.com/api/dirs/items/search?item.directoryId=aws-products&sort_by=item.additionalFields.productNameLowercase&sort_order=asc&size=1000&item.locale=ja_JP&tags.id=aws-products%23type%23service%7Caws-products%23type%23feature"
services_json = requests.get(services_api).json()

In [None]:
%pip install -U notion-sdk-py
%pip install python-dateutil

In [3]:
from dateutil.parser import parse


def _is_date(string, fuzzy=False):
    try:
        parse(string, fuzzy=fuzzy)
        return True

    except ValueError:
        return False

In [4]:
import pandas as pd

services_df = pd.DataFrame(
    [s["item"]["additionalFields"] for s in services_json["items"]]
)
display(services_df.head(5))

Unnamed: 0,pricingUrl,freeTierAvailability,productSummary,launchDate,productUrl,productName,productNameLowercase,productCategory,featureFlag,featuredRank,subHeadline
0,https://aws.amazon.com/iot-core/pricing/?did=a...,<p>&#160;</p>,デバイスをクラウドに接続,2015-12-18,https://aws.amazon.com/iot-core/?did=ap_card&t...,AWS IoT Core,1 aws iot core,IoT,,,
1,https://aws.amazon.com/iot-fleetwise/pricing/?...,<p>&#160;</p>,車両データをほぼリアルタイムで簡単に収集し、変換して、クラウドに転送,2021-11-30,https://aws.amazon.com/iot-fleetwise/?did=ap_c...,AWS IoT FleetWise,2 aws iot fleetwise,IoT,,,
2,https://aws.amazon.com/iot-sitewise/pricing/?d...,<p>&#160;</p>,IoT データコレクターおよびインタプリタ,2020-07-09,https://aws.amazon.com/iot-sitewise/?did=ap_ca...,AWS IoT SiteWise,3 aws iot sitewise,IoT,,,
3,https://aws.amazon.com/iot-twinmaker/pricing/?...,<p>&#160;</p>,実世界システムのデジタルツインを簡単に作成して業務を最適化,2021-11-30,https://aws.amazon.com/iot-twinmaker/?did=ap_c...,AWS IoT TwinMaker,4 aws iot twinmaker,IoT,,,
4,https://aws.amazon.com/greengrass/pricing/?did...,12 か月間無料,デバイスのローカルでのコンピューティング、メッセージング、同期,2017-06-07,https://aws.amazon.com/greengrass/?did=ap_card...,AWS IoT Greengrass,5 aws iot greengrass,IoT,,,


In [7]:
import os
from notion_client import Client
import numpy as np

TOKEN = "changeme-notion-token"
DATABASE_ID = "changeme-database-id"

client = Client(auth=TOKEN)
res = client.databases.query(database_id=DATABASE_ID)
results = res["results"]


def _is_exist_page(database_id, service_name, launch_date):
    filter = {
        "and": [
            {
                "property": "Service Name",
                "title": {"equals": service_name},
            }
        ]
    }
    if _is_date(str(launch_date)):
        filter["and"].append(
            {
                "property": "Launch Date",
                "date": {"equals": launch_date},
            }
        )

    res = client.databases.query(database_id=database_id, filter=filter)
    results = res["results"]

    if len(results) > 0:
        return True
    return False


for index, row in services_df.iterrows():
    product_name = row["productName"]
    launch_date = row["launchDate"]
    product_category = row["productCategory"]
    product_url = row["productUrl"]
    product_summary = row["productSummary"]

    if not product_name:
        continue
    if str(product_category) == "nan":
        product_category = "その他"
    if str(product_url) == "nan":
        product_url = ""
    if str(product_summary) == "nan":
        product_summary = ""

    if _is_exist_page(DATABASE_ID, row["productName"], row["launchDate"]):
        continue
    # print(
    #     row["productName"],
    #     row["launchDate"],
    #     row["productCategory"],
    #     row["productUrl"],
    #     row["productSummary"],
    # )

    prop = {
        "Service Name": {
            "title": [
                {
                    "text": {
                        "content": product_name,
                    }
                }
            ]
        },
        "Category": {
            "select": {
                "name": product_category,
            }
        },
        "Official Page": {
            "url": product_url,
        },
        "Summary": {"rich_text": [{"text": {"content": product_summary}}]},
    }

    if _is_date(str(launch_date)):
        prop["Launch Date"] = {
            "date": {
                "start": launch_date,
            }
        }

    res = client.pages.create(parent={"database_id": DATABASE_ID}, properties=prop)