# Notebook to compile ArXiv, PsyArXiv, and BioArXiv papers into a single database

In [68]:
import pandas as pd

from src.db import Database

In [69]:
# Import Pubmed
database = Database()
database.create("data/tables/2024_07_22")
pubmed = database.db
pubmed

INFO:src.db:Loading tables...


INFO:src.db:Database created at 2025-04-15 16:45:04.248461.


Unnamed: 0,title,authors,where_published,year,doi
0,Reliability of dynamic causal modelling of res...,"Jafarian A, Assem MK, Kocagoncu E, Lanskey JH,...",Hum Brain Mapp,2024,10.1002/hbm.26782
1,Global neural self-disturbance in schizophreni...,"Sabbah SG, Northoff G.",Schizophr Res,2024,10.1016/j.schres.2024.05.015
2,'Snakes and ladders' in paleoanthropology: Fro...,"Manrique HM, Friston KJ, Walker MJ.",Phys Life Rev,2024,10.1016/j.plrev.2024.01.004
3,Two distinct networks for encoding goals and f...,"Di Cesare G, Lombardi G, Zeidman P, Urgen BA, ...",Proc Natl Acad Sci U S A,2024,10.1073/pnas.2402282121
4,The Universal Optimism of the Self-Evidencing ...,"Fisher EL, Hohwy J.",Entropy (Basel),2024,10.3390/e26060518
...,...,...,...,...,...
758,Extrapolative delay compensation through facil...,"Lim H, Choe Y.",IEEE Trans Neural Netw,2008,10.1109/TNN.2008.2001002
759,New architecture for MPEG video streaming syst...,"Fu CH, Chan YL, Ip TP, Siu WC.",IEEE Trans Image Process,2007,10.1109/tip.2007.902330
760,A basis for generating expectancies for verbs ...,"McRae K, Hare M, Elman JL, Ferretti T.",Mem Cognit,2005,10.3758/bf03193221
761,Mental state inference using visual control pa...,"Oztop E, Wolpert D, Kawato M.",Brain Res Cogn Brain Res,2005,10.1016/j.cogbrainres.2004.08.004


In [70]:
# Import ArXiv and PsyArXiv
arxiv_psyarxiv = pd.read_csv("arxiv_psyarxiv_papers.csv")
arxiv_psyarxiv["doi"] = arxiv_psyarxiv["doi"].apply(lambda x: x.strip("https://doi.org/"))
arxiv_psyarxiv

Unnamed: 0,title,authors,where_published,year,doi
0,Navigating Autonomous Vehicle on Unmarked Road...,"Yufei Huang, Yulin Li, Andrea Matta, Mohsen Ja...",arxiv,2024,10.48550/arXiv.2406.00211
1,An Active Inference Agent for Simulating Human...,Michael Carl,arxiv,2024,10.48550/arXiv.2405.03111
2,The emergence of the width of subjective tempo...,Jan Erik Bellingrath,arxiv,2024,10.48550/arXiv.2404.12895
3,Development of Compositionality and Generaliza...,"Prasanna Vijayaraghavan, Jeffrey Frederic Quei...",arxiv,2024,10.48550/arXiv.2403.19995
4,Self-Supervised Path Planning in UAV-aided Wir...,"Ali Krayani, Khalid Khan, Lucio Marcenaro, Mar...",arxiv,2024,10.48550/arXiv.2403.13827
...,...,...,...,...,...
299,From aliens to invisible limbs: The transition...,Jaan Aru,psyarxiv,2018,10.31234/osf.io/rbauz
300,Being a beast machine: The somatic basis of se...,"Manos Tsakiris, Anil Seth",psyarxiv,2018,10.31234/osf.io/6snfm
301,Interoceptive Accuracy Moderates the Glycaemic...,"David Benton, Hayley Young",psyarxiv,2018,10.31234/osf.io/x3uyq
302,Being a beast machine: The origins of selfhood...,Anil Seth,psyarxiv,2018,10.31234/osf.io/vg5da


In [71]:
# Import BioArXiv
bioarxiv = pd.read_csv("bioarxiv_papers.csv")
bioarxiv = bioarxiv[bioarxiv["remove"] == False].reset_index(drop=True)
bioarxiv = bioarxiv[["title", "authors", "where_published", "year", "doi"]]
bioarxiv

Unnamed: 0,title,authors,where_published,year,doi
0,Embodied decisions as active inference,"Priorelli, Matteo and Stoianov, Ivilin Peev an...",bioRxiv,2024,10.1101/2024.05.28.596181
1,The sense of agency from active causal inference,"Chang, Acer Yu-Chan and Oi, Hiroki and Maeda, ...",bioRxiv,2024,10.1101/2024.01.29.577723
2,Evidence of optimal control theory over active...,"Brandt, I.M. and Gr{\""u}nbaum, T. and Christen...",bioRxiv,2024,10.1101/2024.05.06.592675
3,Predictive Coding Networks for Temporal Predic...,"Millidge, Beren and Tang, Mufeng and Osanlouy,...",bioRxiv,2024,10.1101/2023.05.15.540906
4,Learning probability distributions of sensory ...,"Oliviers, Gaspard and Bogacz, Rafal and Meulem...",bioRxiv,2024,10.1101/2024.02.29.581455
...,...,...,...,...,...
93,Biological Self-organisation and Markov blankets,"Palacios, Ensor Rafael and Razi, Adeel and Par...",bioRxiv,2017,10.1101/227181
94,"Dopamine, Inference, and Uncertainty","Gershman, Samuel J.",bioRxiv,2017,10.1101/149849
95,Cortical Entropy Values Correlate with Brain S...,"Tozzi, Arturo and Peters, James F. and {\c C}a...",bioRxiv,2017,10.1101/147405
96,The Bayesian-Laplacian Brain,"Zeki, Semir and Ch{\'e}n, Oliver Y.",bioRxiv,2019,10.1101/094516


In [72]:
# Compile
db = pd.concat([pubmed, arxiv_psyarxiv, bioarxiv]).reset_index(drop=True)
db

Unnamed: 0,title,authors,where_published,year,doi
0,Reliability of dynamic causal modelling of res...,"Jafarian A, Assem MK, Kocagoncu E, Lanskey JH,...",Hum Brain Mapp,2024,10.1002/hbm.26782
1,Global neural self-disturbance in schizophreni...,"Sabbah SG, Northoff G.",Schizophr Res,2024,10.1016/j.schres.2024.05.015
2,'Snakes and ladders' in paleoanthropology: Fro...,"Manrique HM, Friston KJ, Walker MJ.",Phys Life Rev,2024,10.1016/j.plrev.2024.01.004
3,Two distinct networks for encoding goals and f...,"Di Cesare G, Lombardi G, Zeidman P, Urgen BA, ...",Proc Natl Acad Sci U S A,2024,10.1073/pnas.2402282121
4,The Universal Optimism of the Self-Evidencing ...,"Fisher EL, Hohwy J.",Entropy (Basel),2024,10.3390/e26060518
...,...,...,...,...,...
4378,Biological Self-organisation and Markov blankets,"Palacios, Ensor Rafael and Razi, Adeel and Par...",bioRxiv,2017,10.1101/227181
4379,"Dopamine, Inference, and Uncertainty","Gershman, Samuel J.",bioRxiv,2017,10.1101/149849
4380,Cortical Entropy Values Correlate with Brain S...,"Tozzi, Arturo and Peters, James F. and {\c C}a...",bioRxiv,2017,10.1101/147405
4381,The Bayesian-Laplacian Brain,"Zeki, Semir and Ch{\'e}n, Oliver Y.",bioRxiv,2019,10.1101/094516


In [73]:
# Attach tags
database.db = db
database.attach_tags(tag_path="data/tags/tags.yaml")
database.db['tag'] = database.db['tag'].apply(lambda x: ["untagged"] if x == "untagged" else x)

INFO:src.db:Loading tags...
INFO:src.tags:YAML tag file successfully loaded from data/tags/tags.yaml.
INFO:src.db:Adding tags to database...
INFO:src.db:871 papers are currently untagged.


In [74]:
# Save
database.save(database_description="Active inference, predictive processing, and the free energy principle database.")

INFO:src.db:Database saved to data/databases/database__2025-04-15__16:45:04.248461.pkl.
