File for just viewing lines of the entire table.

In [1]:
import logging
import numpy as np

import os
os.chdir("..")

import pandas as pd

from src.db import Database
from src.tags import Tags

LOGGER = logging.getLogger(__name__)
logging.basicConfig(level = logging.INFO)

In [1210]:
def reload_metadata(db, filter=True):
    # Initialize Tag class
    metadata = Tags()

    # Load the tag file and turn into a dataframe
    metadata.load("data/tag_files/tags.yaml")
    md = pd.DataFrame(metadata.tags["tagged_papers"])
    
    # Merge with database
    mddb = db.merge(md, on="id", how="outer")
    
    # List current progress out of total.
    total_papers = mddb.shape[0]
    tagged_papers = sum(~mddb["tag"].isnull())
    percent = np.round((tagged_papers/total_papers) * 100, 2)
    
    LOGGER.info(f"Currently {tagged_papers}/{total_papers} ({percent}%) papers tagged.")
    LOGGER.info(f"{total_papers-tagged_papers} papers remaining.")
    
    # Filter out already tagged papers
    if filter:
        mddb = mddb[mddb['tag'].isnull()]
        
    mddb = mddb.reset_index()
        
    return mddb

def print_paper(mddb, idx=0):
    row = mddb.loc[idx]
    id = row["id"]
    title = row["title"]
    authors = row["authors"]
    publication_year = row["publication_year"]
    
    LOGGER.info(f"ID: {id} \n")
    LOGGER.info(f"Title: {title} \n")
    LOGGER.info(f"Authors: {authors} \n")
    LOGGER.info(f"Publication year: {publication_year} \n")

## Load

In [3]:
# Set tables
tables = ["active_inference", "bayesian_mechanics", "free_energy", "friston", "karl_friston", "predictive_coding", "predictive_processing"]

# Load database
database = Database()
database.load(tables=tables)
db = database.db

# Load metadata
mddb = reload_metadata(db, filter=False)


INFO:src.db:Checking tables...
INFO:src.db:Loading tables...
INFO:src.db:Tables downloaded from PubMed on Thursday, Sept. 14, 2023.
INFO:src.tags:YAML tag file successfully loaded from data/tag_files/tags.yaml.
INFO:__main__:Currently 2337/3579 (65.3%) papers tagged.


## View table

In [1875]:
# View paper and
print_paper(mddb)

KeyError: 0

In [1874]:
# Load/reload metadata
mddb = reload_metadata(db)

INFO:src.tags:YAML tag file successfully loaded from data/tag_files/tags.yaml.
INFO:__main__:Currently 3585/3585 (100.0%) papers tagged.
INFO:__main__:0 papers remaining.
