The is how to manually add paper into the database

In [None]:
from backend.database import Paper, insert_paper, get_paper, update_paper
from backend.api import query_arxiv, query_serp, query_biorxiv
from backend.guess_category import Guesser

In [None]:
CATEGORY_GUESSER = Guesser()


def find_paper(title: str) -> dict:
    """Add a paper to the database."""

    # Check if the paper already exists
    assert get_paper(title) is None

    results = query_serp(term=title, historical=True)

    # Top result should be the paper of interest
    results = results["formatted_results"]

    print(len(results))
    selected_result = results[0]  # Should be the top result

    print(f"Selected: {selected_result['title']}")
    return selected_result


def add_paper(result: dict) -> None:
    """Crawl extra metadata and add the paper to the database."""
    if result["journal"] == "arxiv.org":
        arxiv_data = query_arxiv(result["arxiv_id"])
        if arxiv_data is not None:
            result.update(arxiv_data)

    # Append extra biorxiv data
    if result["journal"] == "biorxiv.org":
        biorxiv_data = query_biorxiv(result["doi"])
        if biorxiv_data is not None:
            result.update(biorxiv_data)

    # MUST Sanitize using Paper class, otherwise cannot get the correct title
    paper = Paper(**result)
    insert_paper(paper)

    # Append guessed category
    if paper.category is None:
        paper.category = CATEGORY_GUESSER.guess(paper.id, paper.title)
        update_paper(paper)

In [None]:
# A list of manually added papers

titles = [
    # "Optimally-weighted Estimators of the Maximum Mean Discrepancy for Likelihood-Free Inference",
    # "Approximate Bayesian Computation with Domain Expert in the Loop",
    # "Approximate Bayesian neural Doppler imaging",
    # "Bayesian Stokes inversion with normalizing flows",
    # "Sequential Neural Score Estimation: Likelihood-Free Inference with Conditional Score Based Diffusion Models"
]

Given that the title might not align perfectly, especially for those containing LaTeX, it would be advisable to manually double-check for certainty.

In [None]:
paper = find_paper(title=titles[0])
paper

It it looks good, then we can add it to the database.

In [None]:
add_paper(paper)