## Example 1: Using the `Database` class

This notebook demonstrates how to use the `Database` class to load/save existing database or create new database.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir("..")

from src.db import Database

TABLE_DIR = "data/tables/2023_09_14"
TAG_PATH = "data/tags/tags.yaml"

## Create a database, attach tags, and save the database

In [6]:
database = Database()
database.create(table_dir=TABLE_DIR)
database.attach_tags(tag_path=TAG_PATH)
database.save()

INFO:src.db:Loading tables...
INFO:src.db:Database created at 2025-03-21 17:12:35.196730.
INFO:src.db:Loading tags...


INFO:src.tags:YAML tag file successfully loaded from data/tags/tags.yaml.
INFO:src.db:Adding tags to database...
INFO:src.db:Database saved to data/databases/database__2025-03-21__17:12:35.196730.pkl.


## Load a database

In [69]:
database = Database()
database.load("data/databases/database__2025-03-21__17:12:35.196730.pkl")
database.db

INFO:src.db:Database loeaded from data/databases/database__2025-03-21__17:12:35.196730.pkl.


Unnamed: 0,title,authors,where_published,year,doi,tag
0,Friston's free energy principle: new life for ...,Holmes J.,BJPsych Bull,2022,10.1192/bjb.2021.6,"[psychoanalysis, psychotherapy]"
1,Friston's theory of everything,McCrone J.,Lancet Neurol,2022,10.1016/S1474-4422(22)00137-5,[editorial]
2,Voxel-based morphometry--the methods,"Ashburner J, Friston KJ.",Neuroimage,2000,10.1006/nimg.2000.0582,"[review, neuroimaging]"
3,Scientific realism about Friston blankets with...,"Kiverstein J, Kirchhoff M.",Behav Brain Sci,2022,10.1017/S0140525X22000267,"[Markov blankets, philosophy, comment / response]"
4,Structural and functional brain networks: from...,"Park HJ, Friston K.",Science,2013,10.1126/science.1238411,"[review, network analysis]"
...,...,...,...,...,...,...
3580,Changes in both top-down and bottom-up effecti...,"Thomas GEC, Zeidman P, Sultana T, Zarkali A, R...",Brain Commun,2022,10.1093/braincomms/fcac329,[predictive processing]
3581,Spectral-temporal EEG dynamics of speech discr...,"Gilley PM, Uhler K, Watson K, Yoshinaga-Itano C.",BMC Neurosci,2017,10.1186/s12868-017-0353-4,[predictive processing]
3582,A robot or a dumper truck? Facilitating play-b...,"Paldam E, Roepstorff A, Steensgaard R, Lundsga...",Autism Dev Lang Impair,2022,10.1177/23969415221086714,[predictive processing]
3583,The neurophenomenology of early psychosis: An ...,"Nelson B, Lavoie S, Gawęda Ł, Li E, Sass LA, K...",Conscious Cogn,2020,10.1016/j.concog.2019.102845,[predictive processing]


## Detaching a database

In [5]:
database.detach()

INFO:src.db:Database detached.


## Remove papers from database

In [4]:
papers_to_remove = ["10.1192/bjb.2021.6", "10.1016/S1474-4422(22)00137-5"]
database.remove(doi_list=papers_to_remove)

database.db

Unnamed: 0,title,authors,where_published,year,doi,tag
0,Voxel-based morphometry--the methods,"Ashburner J, Friston KJ.",Neuroimage,2000,10.1006/nimg.2000.0582,"[review, neuroimaging]"
1,Scientific realism about Friston blankets with...,"Kiverstein J, Kirchhoff M.",Behav Brain Sci,2022,10.1017/S0140525X22000267,"[Markov blankets, philosophy, comment / response]"
2,Structural and functional brain networks: from...,"Park HJ, Friston K.",Science,2013,10.1126/science.1238411,"[review, network analysis]"
3,Functional and effective connectivity: a review,Friston KJ.,Brain Connect,2011,10.1089/brain.2011.0008,"[review, neuroanatomy, neuroimaging, network a..."
4,Bayesian Dysconnections,Friston KJ.,Am J Psychiatry,2020,10.1176/appi.ajp.2020.20091421,"[editorial, schizophrenia, psychopathology]"
...,...,...,...,...,...,...
3578,Changes in both top-down and bottom-up effecti...,"Thomas GEC, Zeidman P, Sultana T, Zarkali A, R...",Brain Commun,2022,10.1093/braincomms/fcac329,[predictive processing]
3579,Spectral-temporal EEG dynamics of speech discr...,"Gilley PM, Uhler K, Watson K, Yoshinaga-Itano C.",BMC Neurosci,2017,10.1186/s12868-017-0353-4,[predictive processing]
3580,A robot or a dumper truck? Facilitating play-b...,"Paldam E, Roepstorff A, Steensgaard R, Lundsga...",Autism Dev Lang Impair,2022,10.1177/23969415221086714,[predictive processing]
3581,The neurophenomenology of early psychosis: An ...,"Nelson B, Lavoie S, Gawęda Ł, Li E, Sass LA, K...",Conscious Cogn,2020,10.1016/j.concog.2019.102845,[predictive processing]


## Update database from dict

In [50]:
papers = [{
    "doi"    : "https://doi.org/10.48550/arXiv.2409.15532",
    "authors": [
        "Lancelot Da Costa", "Nathael Da Costa", "Conor Heins", "Johan Medrano", "Grigorios A. Pavliotis", "Thomas Parr", "Ajith Anil Meera", "Karl Friston"],
    "where_published": "ArXiv",
    "year": 2024,
    "title": "A theory of generalised coordinates for stochastic differential equations"
},
{
    "doi" : "https://doi.org/10.48550/arXiv.2503.13223",
    "authors" : ["Allahkaram Shafiei", "Hozefa Jesawada", "Karl Friston", "Giovanni Russo"],
    "where_published": "ArXiv",
    "year": 2025,
    "title": "Robust Decision-Making Via Free Energy Minimization"
}]

In [51]:
database.update_from_dicts_list(entries=papers)
database.db

In [70]:
database.update_from_CSV(csv_path="/home/sanjeev/Documents/git_repos/aif-fep-db/examples/test_entry.csv")
database.db

Unnamed: 0,title,authors,where_published,year,doi,tag
0,Friston's free energy principle: new life for ...,Holmes J.,BJPsych Bull,2022,10.1192/bjb.2021.6,"[psychoanalysis, psychotherapy]"
1,Friston's theory of everything,McCrone J.,Lancet Neurol,2022,10.1016/S1474-4422(22)00137-5,[editorial]
2,Voxel-based morphometry--the methods,"Ashburner J, Friston KJ.",Neuroimage,2000,10.1006/nimg.2000.0582,"[review, neuroimaging]"
3,Scientific realism about Friston blankets with...,"Kiverstein J, Kirchhoff M.",Behav Brain Sci,2022,10.1017/S0140525X22000267,"[Markov blankets, philosophy, comment / response]"
4,Structural and functional brain networks: from...,"Park HJ, Friston K.",Science,2013,10.1126/science.1238411,"[review, network analysis]"
...,...,...,...,...,...,...
3582,A robot or a dumper truck? Facilitating play-b...,"Paldam E, Roepstorff A, Steensgaard R, Lundsga...",Autism Dev Lang Impair,2022,10.1177/23969415221086714,[predictive processing]
3583,The neurophenomenology of early psychosis: An ...,"Nelson B, Lavoie S, Gawęda Ł, Li E, Sass LA, K...",Conscious Cogn,2020,10.1016/j.concog.2019.102845,[predictive processing]
3584,Noradrenergic deficits contribute to apathy in...,"Hezemans FH, Wolpe N, O'Callaghan C, Ye R, Rua...",PLoS Comput Biol,2022,10.1371/journal.pcbi.1010079,[predictive processing]
3585,A theory of generalised coordinates for stocha...,"[Lancelot Da Costa, Nathael Da Costa, Conor He...",ArXiv,2024,10.48550/arXiv.2409.15532,[untagged]


In [None]:
import pandas as pd
from ast import literal_eval

csv_path = "/home/sanjeev/Documents/git_repos/aif-fep-db/examples/test_entry.csv"
papers = pd.read_csv(csv_path, converters={"authors": literal_eval, "tag": literal_eval})
papers = papers.to_dict(orient="records")

In [None]:
database.db.iloc[-2:, :].to_csv("test_entry.csv", index=False)

In [None]:
import re

DOI_REGEX_PATTERN = r"^10\.\d{4,9}/[-._;()/:A-Z0-9]+$"
doi = "https://doi.org/10.48550/arXiv.2409.15532"
doi = doi.strip("https://doi.org/")

match = re.match(DOI_REGEX_PATTERN, doi, re.IGNORECASE)

if not bool(match):
    print(f"Warning: {doi} failed DOI validation!")

In [5]:
from src.utils import load_tables

table_dir = "data/tables/2024_07_22"
table_names = os.listdir(table_dir)
table_paths = [table_dir + "/" + table_name for table_name in table_names]
table_list = load_tables(table_dir)


for idx, table in enumerate(table_list):
    table.drop(["PMID", "Citation", "First Author", "Create Date", "PMCID", "NIHMS ID"], axis=1, inplace=True)
    table.columns = ["title", "authors", "where_published", "year", "doi"]
    table.to_csv(table_paths[idx])

KeyError: "['PMID', 'Citation', 'First Author', 'Create Date', 'PMCID', 'NIHMS ID'] not found in axis"

In [11]:
import pandas as pd

db_list = []
table_dir = "data/tables/2023_09_14"
table_names = os.listdir(table_dir)
table_paths = [table_dir + "/" + table_name for table_name in table_names]

for table_path in table_paths:
    table = pd.read_csv(table_path)
    db_list.append(table)

datetime.datetime