In [2]:
from rdflib import Graph

# List of Turtle files to combine
ttl_files = [
    "turtle/journals_enriched.ttl",
    "turtle/conferences_enriched.ttl",
    "turtle/papers_details_enriched.ttl",
    "turtle/citations.ttl",
    "turtle/review_on.ttl",
    "turtle/reviews.ttl",
    "turtle/reviewed_by.ttl",
    "turtle/published_in_enriched_v2.ttl",
    "turtle/affiliated_with.ttl",
    "turtle/affiliations.ttl",
    "turtle/written_by_enriched.ttl",
    "turtle/authors.ttl"
]

# Create an empty Graph
combined_graph = Graph()

# Load each file into the graph
for file_name in ttl_files:
    combined_graph.parse(file_name, format="ttl")

# Serialize and save the combined graph to an RDF/XML file
combined_graph.serialize(destination="combined_graph.rdf", format="xml")

print("All TTL files have been combined and saved as 'combined_graph.rdf' for final ontology")

All TTL files have been combined and saved as 'combined_graph.rdf' for final ontology


In [9]:
from rdflib import Graph, URIRef
from urllib.parse import urlparse

def is_valid_iri(iri):
    parsed = urlparse(iri)
    return all([parsed.scheme, parsed.netloc, parsed.path])

g = Graph()
g.parse("combined_graph.rdf", format="xml")

for s, p, o in g:
    for term in [s, p, o]:
        if isinstance(term, URIRef) and not is_valid_iri(str(term)):
            print(f"Invalid IRI found: {term}")


In [3]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Configuration
endpoint_url = "http://onur-MacBook-Pro.local:7200/repositories/bacaksiz"

# Initialize the SPARQL wrapper with your endpoint
sparql = SPARQLWrapper(endpoint_url)

def run_query(query):
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

# Queries to fetch statistics
prefixes = """
PREFIX ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
"""

queries = {
    "Number of Classes": prefixes + "SELECT (COUNT(DISTINCT ?class) AS ?count) WHERE {[] a ?class.}",
    "Number of Properties": prefixes + "SELECT (COUNT(DISTINCT ?property) AS ?count) WHERE {[] ?property [].}",
    "Number of Instances for Main Classes": prefixes + """
        SELECT ?class (COUNT(?instance) AS ?count)
        WHERE {
            ?instance a ?class.
            FILTER(?class IN (ex:Paper, ex:Author, ex:Affiliation, ex:Review, ex:Citation))
        }
        GROUP BY ?class
    """,
    "Total Number of Triples": prefixes + "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o. }"
}

# Run queries and print results
for description, query in queries.items():
    results = run_query(query)
    print(description)
    for result in results["results"]["bindings"]:
        class_label = result['class']['value'].split('#')[-1] if 'class' in result else 'Total'
        print(f"  {class_label}: {result['count']['value']}")
    print()


Number of Classes
  Total: 18

Number of Properties
  Total: 37

Number of Instances for Main Classes
  Paper: 1566
  Author: 518
  Affiliation: 32
  Review: 180

Total Number of Triples
  Total: 30218

