In [8]:
import requests
from keys import NOTION_TOKEN, DATABASE_ID
import numpy as np
import pandas as pd
import os


In [9]:
headers = {
    "Authorization": "Bearer " + NOTION_TOKEN,
    "Content-Type": "application/json",
    "Notion-Version": "2022-06-28",
}

In [10]:
def get_pages(num_pages=None):
    url = f"https://api.notion.com/v1/databases/{DATABASE_ID}/query"

    get_all = num_pages is None
    page_size = 100 if get_all else num_pages

    payload = {"page_size": page_size}
    response = requests.post(url, json=payload, headers=headers)

    data = response.json()

    # Comment this out to dump all data to a file
    # import json
    # with open('db.json', 'w', encoding='utf8') as f:
    #    json.dump(data, f, ensure_ascii=False, indent=4)

    results = data["results"]
    while data["has_more"] and get_all:
        payload = {"page_size": page_size, "start_cursor": data["next_cursor"]}
        url = f"https://api.notion.com/v1/databases/{DATABASE_ID}/query"
        response = requests.post(url, json=payload, headers=headers)
        data = response.json()
        results.extend(data["results"])

    return results

In [11]:
existing_names = {}
def get_name(user_id: str) -> str:
    if user_id in existing_names:
        return existing_names[user_id]
    url = f'https://api.notion.com/v1/users/{user_id}'
    response = requests.get(url, headers=headers)
    data = response.json()
    name = data['name']
    existing_names[user_id] = name
    return name

In [12]:
def get_sourcer_name(props):
    try:
       return get_name(props['Sourcer Name']['people'][0]['id'])
    except:
        return None

def get_sourcing_week(props):
    try:
        return props['Sourcing Week']['select']['name']
    except:
        return None

def get_contacts(props):
    try:
        return props['Contacts']['number']
    except:
        return None

def get_company(props):
    try:
        return props['Company']['title'][0]['text']['content']
    except:
        return None

In [15]:
pages = get_pages()
data = {}

for page in pages:
    page_id = page["id"]
    props = page["properties"]
    sourcer_name = get_sourcer_name(props)
    sourcing_week = get_sourcing_week(props)
    contacts = get_contacts(props)
    company = get_company(props)
    data[page_id] = {'Name': sourcer_name, 'Week': sourcing_week, 'Number': contacts, 'Company': company}

In [16]:
df = pd.DataFrame.from_dict(data, orient='index')

file = 'data.csv'
counter = 0
while os.path.isfile(file):
    counter += 1
    file = f'data{counter}.csv'
df.to_csv(file)