### Configuring the relational DB

In [4]:
from pandas import read_csv, Series

publications = read_csv("import/relational_publications.csv", 
                  keep_default_na=False,
                  dtype={
                      "id": "string",
                      "title": "string",
                      "type": "string",
                      "publicationType": "string",
                      "publication year": "int",
                      "issue": "string",
                      "volume": "string",
                      "chapter": "string",
                      "publication venue": "string",
                      "venueType": "string",
                      "publisher": "string"        
       
                  })

# This will create a new data frame starting from 'publications' one,
# and it will include only the column "id"
publications_ids = publications[["id"]]

# Generate a list of internal identifiers for the publications
publications_internal_id = []
for idx, row in publications_ids.iterrows():
    publications_internal_id.append("publication-" + str(idx))

# Add the list of publication internal identifiers as a new column
# of the data frame via the class 'Series'
publications_ids.insert(0, "publicationId", Series(publications_internal_id, dtype="string"))

# Show the new data frame on screen
publications_ids

Unnamed: 0,publicationId,id
0,publication-0,doi:10.1162/qss_a_00023
1,publication-1,doi:10.1007/s11192-019-03217-6
2,publication-2,doi:10.1007/s11192-019-03311-9
3,publication-3,doi:10.1038/sdata.2016.18
4,publication-4,doi:10.1371/journal.pbio.3000385
...,...,...
495,publication-495,doi:10.3390/admsci10030069
496,publication-496,doi:10.1186/s12888-020-02825-4
497,publication-497,doi:10.1080/00472778.2020.1776578
498,publication-498,doi:10.1007/s11301-020-00196-4


In [9]:
# Data frame of journals
journals = publications.query("venue_type == 'journal'")
journals  # Showing the data frame

Unnamed: 0,id,title,type,publication_year,issue,volume,chapter,publication_venue,venue_type,publisher,event
0,doi:10.1162/qss_a_00023,"Opencitations, An Infrastructure Organization ...",journal-article,2020,1,1,,Quantitative Science Studies,journal,crossref:281,
1,doi:10.1007/s11192-019-03217-6,"Software Review: Coci, The Opencitations Index...",journal-article,2019,2,121,,Scientometrics,journal,crossref:297,
2,doi:10.1007/s11192-019-03311-9,Nine Million Book Items And Eleven Million Cit...,journal-article,2019,2,122,,Scientometrics,journal,crossref:297,
3,doi:10.1038/sdata.2016.18,The Fair Guiding Principles For Scientific Dat...,journal-article,2016,1,3,,Scientific Data,journal,crossref:297,
4,doi:10.1371/journal.pbio.3000385,The Nih Open Citation Collection: A Public Acc...,journal-article,2019,10,17,,Plos Biology,journal,crossref:340,
...,...,...,...,...,...,...,...,...,...,...,...
495,doi:10.3390/admsci10030069,Performance Analysis And Science Mapping Of In...,journal-article,2020,3,10,,Administrative Sciences,journal,crossref:1968,
496,doi:10.1186/s12888-020-02825-4,Mapping The Literature On Parents With Mental ...,journal-article,2020,1,20,,Bmc Psychiatry,journal,crossref:297,
497,doi:10.1080/00472778.2020.1776578,Evolution Of The Entrepreneurship And Innovati...,journal-article,2020,,,,Journal Of Small Business Management,journal,crossref:301,
498,doi:10.1007/s11301-020-00196-4,Intellectual Structure Of Management Innovatio...,journal-article,2020,3,71,,Management Review Quarterly,journal,crossref:297,


Looking for other publication types.

In [13]:
# Data frame of journals
books = publications.query("venue_type == 'book'")
books  # Showing the data frame

Unnamed: 0,id,title,type,publication_year,issue,volume,chapter,publication_venue,venue_type,publisher,event
8,doi:10.1007/978-3-030-61244-3_16,Researchflow: Understanding The Knowledge Flow...,book-chapter,2020,,,1,Lecture Notes In Computer Science - Knowledge ...,book,crossref:297,
9,doi:10.1007/978-3-030-61244-3_6,Ontologies Supporting Research-Related Informa...,book-chapter,2020,,,1,Lecture Notes In Computer Science - Knowledge ...,book,crossref:297,
10,doi:10.1007/978-3-030-54956-5_2,Question Answering On Scholarly Knowledge Graphs,book-chapter,2020,,,1,Digital Libraries For Open Knowledge - Lecture...,book,crossref:297,
11,doi:10.1007/978-3-030-55814-7_15,Dingo: An Ontology For Projects And Grants Lin...,book-chapter,2020,,,1,"Adbis, Tpdl And Eda 2020 Common Workshops And ...",book,crossref:297,
13,doi:10.1007/978-3-030-62466-8_28,The Opencitations Data Model,book-chapter,2020,,,1,Lecture Notes In Computer Science - The Semant...,book,crossref:297,
15,doi:10.1007/978-3-030-77385-4_37,Kgbench: A Collection Of Knowledge Graph Datas...,book-chapter,2021,,,1,The Semantic Web - Lecture Notes In Computer S...,book,crossref:297,
20,doi:10.1007/978-3-030-84825-5_11,Lobd: Linked Data Dashboard For Marine Biodive...,book-chapter,2021,,,1,Communications In Computer And Information Sci...,book,crossref:297,
26,doi:10.1007/978-3-030-16187-3_20,Using The Spar Ontology Network To Represent T...,book-chapter,2019,,,1,Advances In Intelligent Systems And Computing ...,book,crossref:297,
29,doi:10.1007/978-3-030-59194-6_37,Construction And Leverage Scientific Knowledge...,book-chapter,2020,,,1,Systems And Information Sciences - Advances In...,book,crossref:297,
30,doi:10.1007/978-3-030-61244-3_7,A Unified Nanopublication Model For Effective ...,book-chapter,2020,,,1,Lecture Notes In Computer Science - Knowledge ...,book,crossref:297,


In [15]:
# Data frame of journals
years = publications.query("publication_year < 2020")
years  # Showing the data frame

Unnamed: 0,id,title,type,publication_year,issue,volume,chapter,publication_venue,venue_type,publisher,event
1,doi:10.1007/s11192-019-03217-6,"Software Review: Coci, The Opencitations Index...",journal-article,2019,2,121,,Scientometrics,journal,crossref:297,
2,doi:10.1007/s11192-019-03311-9,Nine Million Book Items And Eleven Million Cit...,journal-article,2019,2,122,,Scientometrics,journal,crossref:297,
3,doi:10.1038/sdata.2016.18,The Fair Guiding Principles For Scientific Dat...,journal-article,2016,1,3,,Scientific Data,journal,crossref:297,
4,doi:10.1371/journal.pbio.3000385,The Nih Open Citation Collection: A Public Acc...,journal-article,2019,10,17,,Plos Biology,journal,crossref:340,
5,doi:10.3233/ds-190016,Enabling Text Search On Sparql Endpoints Throu...,journal-article,2019,1-2,2,,Data Science,journal,crossref:7437,
...,...,...,...,...,...,...,...,...,...,...,...
353,doi:10.3390/ijerph16010029,Emerging Trends And New Developments In Disast...,journal-article,2018,1,16,,International Journal Of Environmental Researc...,journal,crossref:1968,
354,doi:10.3390/ijerph16111928,Analysis Of Global Research On Malaria And Pla...,journal-article,2019,11,16,,International Journal Of Environmental Researc...,journal,crossref:1968,
355,doi:10.3390/ijerph16152788,Application Of The Theory Of Planned Behavior ...,journal-article,2019,15,16,,International Journal Of Environmental Researc...,journal,crossref:1968,
361,doi:10.1021/acs.analchem.9b05454,Ambient Ionization Mass Spectrometry Today And...,journal-article,2019,3,92,,Analytical Chemistry,journal,crossref:316,
