## Interacting with data

In [2]:
from sqlite3 import connect
from pandas import read_sql

with connect("/publications.db") as con:
    query = "SELECT title FROM JournalArticle"
    df_sql = read_sql(query, con)
    
df_sql  # show the content of the result of the query

OperationalError: unable to open database file

In [3]:
from sparql_dataframe import get

endpoint = "http://127.0.0.1:9999/blazegraph/sparql"
query = """
PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX schema: <https://schema.org/>

SELECT ?journal_article ?title
WHERE {
    ?journal_article rdf:type schema:ScholarlyArticle .
    ?journal_article schema:name ?title .
}
"""
df_sparql = get(endpoint, query, True)
df_sparql

Unnamed: 0,journal_article,title


In [6]:
publication_query = """
PREFIX rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX schema: <https://schema.org/>

SELECT ?internalId ?doi ?publicationYear ?title ?issue ?volume ?publicationVenue
WHERE {
    VALUES ?type {
        schema:ScholarlyArticle
        schema:Chapter
    }
    
    ?internalId rdf:type ?type .
    ?internalId schema:identifier ?doi .
    ?internalId schema:datePublished ?publicationYear .
    ?internalId schema:name ?title .
    ?internalId schema:isPartOf ?publicationVenue .
    
    # Filter by author name
    ?internalId schema:author ?author .
    ?author schema:name ?authorName .
    FILTER (CONTAINS(LCASE(?authorName), "silvio"))
        
    OPTIONAL {
        ?internalId schema:issueNumber ?issue .
        ?internalId schema:volumeNumber ?volume .
    }
}
"""

df_publications_sparql = get(endpoint, publication_query, True)
df_publications_sparql

Unnamed: 0,internalId,doi,publicationYear,title,issue,volume,publicationVenue


In [7]:
df_publications_sparql.dtypes

internalId          object
doi                 object
publicationYear     object
title               object
issue               object
volume              object
publicationVenue    object
dtype: object

In [12]:
df_publications_sparql["internalId"] = df_publications_sparql["internalId"].astype("string")
df_publications_sparql["doi"] = df_publications_sparql["doi"].astype("string")
df_publications_sparql["publicationYear"] = df_publications_sparql["publicationYear"].astype(int)
df_publications_sparql["title"] = df_publications_sparql["title"].astype("string")
df_publications_sparql["issue"] = df_publications_sparql["issue"].astype("string")
df_publications_sparql["volume"] = df_publications_sparql["volume"].astype("string")
df_publications_sparql["publicationVenue"] = df_publications_sparql["publicationVenue"].astype("string")

df_publications_sparql.dtypes

internalId          string
doi                 string
publicationYear      int64
title               string
issue               string
volume              string
publicationVenue    string
dtype: object

In [13]:
df_publications_sparql = df_publications_sparql.fillna("")

df_publications_sparql

Unnamed: 0,internalId,doi,publicationYear,title,issue,volume,publicationVenue
