In [1]:
pip install SPARQLWrapper

Note: you may need to restart the kernel to use updated packages.


In [51]:
import csv
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

In [64]:
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "http://onur-MacBook-Pro.local:7200/repositories/bacaksiz"
sparql = SPARQLWrapper(endpoint_url)

## 1. Find all Authors.

In [8]:
# Define the SPARQL query
query1 = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?author ?name
WHERE {
  ?author a ex:Author .
  ?author foaf:name ?name .
}
"""

# Set the query and format
sparql.setQuery(query1)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Print the results directly
print("Author URI, Name")
for result in results["results"]["bindings"]:
    author = result["author"]["value"]
    name = result["name"]["value"]
    print(f"{author}, {name}")

print("Results have been printed.")

Author URI, Name
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_2075376111, Yu Bao
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_2249588356, Guosun Zeng
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_1736546, Doina Caragea
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_1932524, Xinming Ou
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_48691553, Su Zhang
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_143634612, G. Yin
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_2109336475, Zhimin Zhang
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_38295334, Q. Song
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_144625576, M. Barni
http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_1715407, A. Abrardo
http://www.semanticweb.org/kocak/ontologies/202

## 2. Find all properties whose domain is Author.

In [11]:

# Define the SPARQL query
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

SELECT DISTINCT (strafter(str(?property), "#") AS ?propertyName)
WHERE {
  ?property rdfs:domain ex:Author .
}
"""

# Set the query and format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Print the results directly
print("Properties with Domain as Author:")
for result in results["results"]["bindings"]:
    propertyName = result["propertyName"]["value"]
    print(propertyName)

print("Results have been printed.")

Properties with Domain as Author:
affiliated_with
author_email
author_id
author_name
Results have been printed.


## 3. Find all properties whose domain is either Conference or Journal.

In [44]:
# Define the SPARQL query to find properties whose domain is either Conference or Journal
query_properties = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>

SELECT DISTINCT ?property
WHERE {
  ?property rdfs:domain ?domain .
  FILTER (?domain IN (ex:Conference, ex:Journal))
}
"""

# Set the query and format
sparql.setQuery(query_properties)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Print the results directly
print("Properties with domain as Conference or Journal:")
prefix_to_remove = "http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#"
if not results["results"]["bindings"]:
    print("No properties found for Conference or Journal.")
else:
    for result in results["results"]["bindings"]:
        property_uri = result["property"]["value"]
        property_name = property_uri.replace(prefix_to_remove, "")
        print(f"Property: {property_name}")

Properties with domain as Conference or Journal:
Property: city
Property: edition
Property: conference_year
Property: conference_id
Property: conference_name
Property: conference_url
Property: volume
Property: journal_year
Property: issn
Property: journal_id
Property: journal_name
Property: journal_url


## 4. Find all the papers written by a given author that where published in database conferences.

In [45]:

# Define the SPARQL query to list all authors and their names
query_authors = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?author ?name
WHERE {
  ?author a ex:Author .
  ?author foaf:name ?name .
}
"""

# Set the query and format
sparql.setQuery(query_authors)
sparql.setReturnFormat(JSON)

# Execute the query and process results
results = sparql.query().convert()

# Print the results directly
print("List of Authors and their URIs:")
if not results["results"]["bindings"]:
    print("No authors found.")
else:
    for result in results["results"]["bindings"]:
        author_uri = result["author"]["value"]
        author_name = result["name"]["value"]
        print(f"Author URI: {author_uri}, Name: {author_name}")

List of Authors and their URIs:
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_2075376111, Name: Yu Bao
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_2249588356, Name: Guosun Zeng
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_1736546, Name: Doina Caragea
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_1932524, Name: Xinming Ou
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_48691553, Name: Su Zhang
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_143634612, Name: G. Yin
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_2109336475, Name: Zhimin Zhang
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#Author_38295334, Name: Q. Song
Author URI: http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPap

## Additional Query: 5. Authors with Most Publications Across Different Conferences and Journals

In [65]:
query1 = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?authorName (COUNT(?paper) AS ?totalPapers)
WHERE {
  ?paper ex:written_by ?author .
  ?author foaf:name ?authorName .
}
GROUP BY ?authorName
ORDER BY DESC(?totalPapers)
LIMIT 10
"""

# Function to run the query
def run_query(query):
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return pd.json_normalize(results['results']['bindings'])

# Function to format the results
def format_results(df):
    # Extract necessary fields
    df = df[['authorName.value', 'totalPapers.value']]
    # Rename columns
    df.columns = ['Author Name', 'Total Papers']
    # Convert values to integers
    df['Total Papers'] = df['Total Papers'].astype(int)
    return df

# Execute the query and get the results
df1 = run_query(query1)

# Format the results
formatted_df1 = format_results(df1)

# Print the results in a readable format
print("Top 10 Authors by Total Publications:")
print(formatted_df1.to_string(index=False))

Top 10 Authors by Total Publications:
     Author Name  Total Papers
   A. Bensoussan            18
          S. Yam            18
Bradford W. Mott            18
      N. Vieille            18
 James C. Lester            18
          Yu Bao             9
     Guosun Zeng             9
   Doina Caragea             9
      Xinming Ou             9
        Su Zhang             9


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Total Papers'] = df['Total Papers'].astype(int)


## Additional Query: 6. Trend Analysis: Growth in Paper Publications Over Years by Topic

In [66]:
query2 = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?year (SAMPLE(?keyword) AS ?sampleKeyword) (COUNT(?paper) AS ?totalPapers)
WHERE {
  ?paper ex:year ?year ;
         ex:keywords ?keywords .
  BIND(STRAFTER(?keywords, ",") AS ?keyword)  # Assuming keywords are separated by commas
}
GROUP BY ?year ?keyword
ORDER BY ?year
"""

# Function to run the query
def run_query(query):
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return pd.json_normalize(results['results']['bindings'])

# Function to format the results
def format_results(df):
    # Extract necessary fields
    df = df[['year.value', 'sampleKeyword.value', 'totalPapers.value']]
    # Rename columns
    df.columns = ['Year', 'Sample Keyword', 'Total Papers']
    # Convert values to appropriate data types
    df['Year'] = df['Year'].astype(int)
    df['Total Papers'] = df['Total Papers'].astype(int)
    return df

# Execute the query and get the results
df2 = run_query(query2)

# Format the results
formatted_df2 = format_results(df2)

# Print the results in a readable format
print("Publications by Year with Sample Keywords:")
print(formatted_df2.to_string(index=False))

Publications by Year with Sample Keywords:
 Year                                                                                                                                                                               Sample Keyword  Total Papers
 1978                                                                                                                                                                                                          9
 1986                                                                                                                                                                                                          9
 1987                                     main,database,systems,performance,recovery,declining,cost,need,high,recently,spired,research,massive,amounts,ability,store,complete,databases,recognized             9
 1987                         learning,team,problems,elements,competition,cooperation,present,game,stochastic,automata,ad

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Year'] = df['Year'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Total Papers'] = df['Total Papers'].astype(int)
