# Paperpile to Notion

[API Reference](https://developers.notion.com/)

`% conda activate api`

`% conda install -c conda-forge jsonpath-ng`


In [None]:
import requests
import secret
import json

In [None]:
# Retrieve a database

base_url = 'https://api.notion.com/v1/databases/'

header = {'Authorization': secret.NOTION_API_KEY,
          'Notion-Version': '2021-08-16', 'Content-Type': 'application/json'}
query = {'filter': {'property': 'Sync', 'checkbox': {'equals': True}}}
query = {}

response = requests.post(base_url + secret.NOTION_DATABASE_ID + '/query',
                         headers=header, json=query)
response


In [None]:
# Create a Page

base_url = 'https://api.notion.com/v1/pages'

header = {'Authorization': secret.NOTION_API_KEY,
          'Notion-Version': '2021-08-16', 'Content-Type': 'application/json'}

query = {
    'parent': {'database_id': secret.NOTION_DATABASE_ID},
    'properties': {
        'Number': 401,
        'Name': [{'type': 'text', 'text': {'content': 'from Python'}}],
        'Column': [{'type': 'text', 'text': {'content': "Tag 1"}}],
    }
}

response = requests.post(base_url, headers=header, json=query)
response


In [None]:
# Paperpile JSON

fn = secret.PAPERPILE_JSON
with open(fn) as f:
    papers = json.load(f)

title = papers[-1]['title']
url = papers[-1]['url']
doi = papers[-1]['doi']
published = papers[-1]['published']['year']
abstract = papers[-1]['abstract']
author = papers[-1]['author']
(title, url, doi, published, abstract, author)


In [None]:
def decode(x):
    return [i for i in x]

decode(papers[-1])

In [None]:
# Create a Page

base_url = 'https://api.notion.com/v1/pages'

header = {'Authorization': secret.NOTION_API_KEY,
          'Notion-Version': '2021-08-16', 'Content-Type': 'application/json'}

query = {
    'parent': {'database_id': secret.NOTION_DATABASE_ID},
    'properties': {
        'Title': [{'type': 'text', 'text': {'content': title}}],
        # 'Author': [{'type': 'text', 'text': {'content': author[0]['bak']}}],
        'Published': [{'type': 'text', 'text': {'content': published}}],
        'doi': [{'type': 'text', 'text': {'content': doi}}],
        # 'URL': {'url': 'url[0]'}
    }
}


query = {
    'parent': {'database_id': secret.NOTION_DATABASE_ID},
    'properties': {
        'Title': [{'type': 'text', 'text': {'content': title}}],
        'Published': [{'type': 'text', 'text': {'content': published}}],
        'doi': [{'type': 'text', 'text': {'content': doi}}],
    }
}

response = requests.post(base_url, headers=header, json=query)
response


In [None]:
def propTitle(_text):
    return {"title": [{"type": "text", "text": {"content": _text}}]}

def propRichText(_text):
    return {"rich_text": [{"type": "text", "text": {"content": _text}}]}

def propText(_text):
    return {"text": [{"type": "text", "text": {"content": _text}}]}

def propNumber(_number):
    return {"number": _number}

def propURL(_url):
    return {"url": _url}


In [None]:
paragraph = (dict(
    object='block',
    type='paragraph',
    paragraph=propText(abstract)))

paragraph

children = []
children.append(paragraph)

children


In [None]:
# Paperpile JSON to Notion Database

import requests
import secret
import json


def propTitle(_text):
    return {"title": [{"type": "text", "text": {"content": _text}}]}


def propRichText(_text):
    return {"rich_text": [{"type": "text", "text": {"content": _text}}]}


def propText(_text):
    return {"text": [{"type": "text", "text": {"content": _text}}]}


def propNumber(_number):
    return {"number": _number}


def propURL(_url):
    return {"url": _url}
    


fn = secret.PAPERPILE_JSON
with open(fn) as f:
    papers = json.load(f)

for paper in papers:

    title=url=doi=published=abstract=author = ""

    try:
        title = paper.get('title')
        url = paper.get('url')
        doi = paper.get('doi')
        published = paper.get('published')['year']
        abstract = paper.get('abstract')
        author = paper.get('author')

    except Exception as e:
        print('----- Exception -----')
        if hasattr(e, 'message'):
            print(e.message)
        else:
            print(e)
        # print(paper)
        # print('----- Exception -----')

    # Create a Page

    base_url = 'https://api.notion.com/v1/pages'
    header = {'Authorization': secret.NOTION_API_KEY,
            'Notion-Version': '2021-08-16', 'Content-Type': 'application/json'}

    paragraph = (dict(
        object='block',
        type='paragraph',
        paragraph=propText(abstract)))

    children = []
    children.append(paragraph)

    query = (dict(
        parent=dict(database_id=secret.NOTION_DATABASE_ID),
        properties=dict(
        )))

    # もしアブストラクトがあれば
    # blockの字数制限に注意

    if title is not None:
        query['properties'].update(dict(Title=propTitle(title)))

    if published is not None:
        query['properties'].update(dict(Published=propRichText(published)))

    if abstract is not None:
        query.update(dict(children=children))

    if doi is not None:
        query['properties'].update(dict(doi=propRichText(doi)))

    if url is not None:
        query['properties'].update(dict(URL=propURL(url[0])))

    response = requests.post(base_url, headers=header, json=query)
    response


In [None]:
response.json()

# 論文の引用関係をグラフで可視化

[https://qiita.com/iwashi-kun/items/ed7c3e19ceea6383eda5](https://qiita.com/iwashi-kun/items/ed7c3e19ceea6383eda5)


In [None]:
import pandas as pd
import numpy as np
import math
from tqdm import tqdm
from crossref.restful import Works
import pprint

tqdm.pandas()

In [None]:
df_paperpile = pd.read_json(secret.PAPERPILE_JSON)
df_paperpile["doi"] = df_paperpile["doi"].fillna("")
df_paperpile.head()

In [None]:
graph_nodes = {}
graph_edges = {}

def makeGraph(row):
    if row["doi"] != "":
        # doiが存在している場合，引用文献を取得
        works = Works()
        paper = works.doi('https://doi.org/' + row["doi"])

        if paper is not None:
            graph_nodes[row["doi"]] = {
                "title": row["title"],
                "author": row["author"],
                "citedByLink": row["citedByLink"],
                "citedByCount": int(paper["is-referenced-by-count"]) if "is-referenced-by-count" in paper else -1,
                "referenceCount": int(paper["reference-count"]) if "reference-count" in paper else -1,
                "inPaperPile": True
            }

            if "reference" in paper:
                for ref in tqdm(paper["reference"]):
                    if "DOI" in ref:
                        if not ref["DOI"] in graph_nodes:
                            works = Works()
                            refpaper = works.doi('https://doi.org/' + ref["DOI"])
                            # print(refpaper)
                            if refpaper is not None:
                                graph_nodes[ref["DOI"]] = {
                                    "title": refpaper["title"] if "title" in refpaper else "",
                                    "author": refpaper["author"] if "author" in refpaper else "",
                                    "citedByLink": "",
                                    "citedByCount": int(refpaper["is-referenced-by-count"]) if "is-referenced-by-count" in refpaper else -1,
                                    "referenceCount": int(refpaper["reference-count"]) if "reference-count" in refpaper else -1,
                                    "inPaperPile": False
                                }

                            if not row["doi"] in graph_edges:
                                graph_edges[row["doi"]] = []
                            graph_edges[row["doi"]].append(ref["DOI"])

        else:
            # なぜかDOIから引けなかった場合
            graph_nodes[row["doi"]] = {
                "title": row["title"],
                "author": row["author"],
                "citedByLink": row["citedByLink"],
                "citedByCount": -1,
                "referenceCount": -1,
                "inPaperPile": True
            }
    else:
        # doiが存在していない場合
        graph_nodes[row["doi"]] = {
            "title": row["title"],
            "author": row["author"],
            "citedByLink": row["citedByLink"],
            "citedByCount": -1,
            "referenceCount": -1,
            "inPaperPile": True
        }

df_paperpile.progress_apply(makeGraph, axis=1)

In [None]:
import pickle

# save
with open('data/graph_nodes.pkl', 'wb') as nodes:
    pickle.dump(graph_nodes, nodes)

with open('data/graph_edges.pkl', 'wb') as edges:
    pickle.dump(graph_edges, edges)

# load
with open('data/graph_nodes.pkl', 'rb') as nodes:
    graph_nodes = pickle.load(nodes)

with open('data/graph_edges.pkl', 'rb') as edges:
    graph_edges = pickle.load(edges)


In [13]:
import networkx as nx
import plotly.graph_objects as go

def visualizeGraph(isOnlyInPaperpile=False):
    # init graph
    G = nx.Graph()

    # Impact factor(cited by)
    impactFactor = []
    impactSize = []
    isExistColor = []

    # create node
    for key in graph_nodes:
        if isOnlyInPaperpile == False or (isOnlyInPaperpile == True and graph_nodes[key]["inPaperPile"] == True):
            G.add_node(key)
            G.nodes[key]["info"] = {
                "title": str(graph_nodes[key]["title"]),
                "author": str(graph_nodes[key]["author"]),
                "citedByLink": str(graph_nodes[key]["citedByLink"]),
                "citedByCount": int(graph_nodes[key]["citedByCount"]),
                "referenceCount": int(graph_nodes[key]["referenceCount"]),
                "inPaperPile": graph_nodes[key]["inPaperPile"]
            }
            impactFactor.append(graph_nodes[key]["citedByCount"])
            impactSize.append(graph_nodes[key]["citedByCount"] + 10 if graph_nodes[key]["citedByCount"] + 10 < 40 else 40)
            if graph_nodes[key]["inPaperPile"] == True:
                isExistColor.append(1)
            else:
                isExistColor.append(0)



    # edge
    for key in graph_nodes:
        if isOnlyInPaperpile == False or (isOnlyInPaperpile == True and graph_nodes[key]["inPaperPile"] == True):
            if key in graph_edges:
                for edg in graph_edges[key]:
                    if edg in graph_nodes:
                        G.add_edge(key, edg)

    # レイアウト自動調整
    pos = nx.spring_layout(G, k=0.3, seed=1)
    for node in G.nodes():
        G.nodes[node]["pos"] = pos[node]

    # create edge
    edge_x = []
    edge_y = []
    for edge in G.edges():
        # 起点の座標
        x0, y0 = G.nodes[edge[0]]['pos']
        # 終点の座標
        x1, y1 = G.nodes[edge[1]]['pos']

        # 追加
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    # nodeinfo
    node_info = []
    for n in G.nodes():
        graphinfo = []
        for key in G.nodes[n]["info"]:
            if key != "author":
                graphinfo.append(key + ": " +str(G.nodes[n]["info"][key]))
        node_info.append("<br>".join(graphinfo))

    # traceに設定
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        # hoverinfo='none',
        # customdata=node_info,
        mode='lines')

    # create node
    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = G.nodes[node]['pos']
        node_x.append(x)
        node_y.append(y)

    # traceの作成
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        # hovertemplate="%{customdata}<extra></extra>",
        text=node_info,
        marker=dict(
            showscale=True,
            # colorscale options
            #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
            #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
            #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
            colorscale='YlGnBu',
            reversescale=False,
            color=[],
            size=impactSize,
            colorbar=dict(
                thickness=1,
                title='1: in paperpile, 2: not in paperpile',
                xanchor='left',
                titleside='right'
            ),
            line_width=2))
    
    # 色設定
    node_trace.marker.color = isExistColor

    # 可視化
    fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Visualize Paperpile paper',
                    titlefont_size=16,
                    showlegend=False,
                    # hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    # annotations=[ dict(
                    #     text="Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                    #     showarrow=False,
                    #     xref="paper", yref="paper",
                    #     x=0.005, y=-0.002 ) ],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )
    fig.update_layout(height=1000)
    fig.show()

visualizeGraph()