In [1]:
from rdflib import Graph, URIRef, BNode, RDF, RDFS, OWL, Namespace, Literal

# Define the Turtle data with necessary prefixes
turtle_data = """
@prefix sio: <http://semanticscience.org/resource/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix bio: <http://data.bioontology.org/metadata/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<http://semanticscience.org/resource/Drug> a owl:Class ;
    rdfs:subClassOf <http://semanticscience.org/resource/ChemicalSubstance>, [
        rdf:type owl:Restriction ;
        owl:onProperty <http://semanticscience.org/resource/hasCapability> ;
        owl:someValuesFrom [
            rdf:type owl:Class ;
            owl:intersectionOf (
                <http://semanticscience.org/resource/ToRegulate>
                [
                    rdf:type owl:Restriction ;
                    owl:onProperty <http://semanticscience.org/resource/inRelationTo> ;
                    owl:someValuesFrom <http://semanticscience.org/resource/BiologicalEntity> ;
                ]
            ) ;
        ] ;
    ] ;
    bio:prefixIRI "sio:Drug" ;
    dcterms:description "A drug is a chemical substance that contains one or more active ingredients that regulate one or more biological processes."@en ;
    rdfs:isDefinedBy <http://semanticscience.org/ontology/sio/v1.53/sio-subset-labels.owl> ;
    rdfs:label "drug"@en .
"""

# Create a Graph and parse the data
g = Graph()
g.parse(data=turtle_data, format="turtle")

# Define the necessary namespaces
SIO = Namespace("http://semanticscience.org/resource/")
DC = Namespace("http://purl.org/dc/terms/")
BIO = Namespace("http://data.bioontology.org/metadata/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

# Bind prefixes to the graph (optional but useful for serialization and querying)
g.bind("sio", SIO)
g.bind("dcterms", DC)
g.bind("bio", BIO)
g.bind("owl", OWL)
g.bind("rdfs", RDFS)
g.bind("rdf", RDF)

# Define the resource URI
drug_uri = URIRef("http://semanticscience.org/resource/Drug")

# Function to recursively print blank nodes
def print_blank_node(bnode):
    for p, o in g.predicate_objects(bnode):
        if isinstance(o, BNode):
            print(f"Blank Node Predicate: {p}, Object is another Blank Node")
            print_blank_node(o)
        else:
            print(f"Blank Node Predicate: {p}, Object: {o}")

# Extract Class
for _, _, o in g.triples((drug_uri, RDF.type, None)):
    print(f"Class: {o}")

# Extract SubClass
for _, _, o in g.triples((drug_uri, RDFS.subClassOf, None)):
    if isinstance(o, BNode):
        print("SubClass is a blank node with the following properties:")
        print_blank_node(o)
    else:
        print(f"SubClass: {o}")

# Extract other information
for _, p, o in g.triples((drug_uri, None, None)):
    if p not in {RDF.type, RDFS.subClassOf}:
        print(f"Predicate: {p}, Object: {o}")



Class: http://www.w3.org/2002/07/owl#Class
SubClass: http://semanticscience.org/resource/ChemicalSubstance
SubClass is a blank node with the following properties:
Blank Node Predicate: http://www.w3.org/1999/02/22-rdf-syntax-ns#type, Object: http://www.w3.org/2002/07/owl#Restriction
Blank Node Predicate: http://www.w3.org/2002/07/owl#onProperty, Object: http://semanticscience.org/resource/hasCapability
Blank Node Predicate: http://www.w3.org/2002/07/owl#someValuesFrom, Object is another Blank Node
Blank Node Predicate: http://www.w3.org/1999/02/22-rdf-syntax-ns#type, Object: http://www.w3.org/2002/07/owl#Class
Blank Node Predicate: http://www.w3.org/2002/07/owl#intersectionOf, Object is another Blank Node
Blank Node Predicate: http://www.w3.org/1999/02/22-rdf-syntax-ns#first, Object: http://semanticscience.org/resource/ToRegulate
Blank Node Predicate: http://www.w3.org/1999/02/22-rdf-syntax-ns#rest, Object is another Blank Node
Blank Node Predicate: http://www.w3.org/1999/02/22-rdf-syn

In [2]:
import csv
from rdflib import Graph, URIRef, BNode, RDF, RDFS, OWL, Namespace

# Define the Turtle data with necessary prefixes
turtle_data = """
@prefix sio: <http://semanticscience.org/resource/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix bio: <http://data.bioontology.org/metadata/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<http://semanticscience.org/resource/Drug> a owl:Class ;
    rdfs:subClassOf <http://semanticscience.org/resource/ChemicalSubstance>, [
        rdf:type owl:Restriction ;
        owl:onProperty <http://semanticscience.org/resource/hasCapability> ;
        owl:someValuesFrom [
            rdf:type owl:Class ;
            owl:intersectionOf (
                <http://semanticscience.org/resource/ToRegulate>
                [
                    rdf:type owl:Restriction ;
                    owl:onProperty <http://semanticscience.org/resource/inRelationTo> ;
                    owl:someValuesFrom <http://semanticscience.org/resource/BiologicalEntity> ;
                ]
            ) ;
        ] ;
    ] ;
    bio:prefixIRI "sio:Drug" ;
    dcterms:description "A drug is a chemical substance that contains one or more active ingredients that regulate one or more biological processes."@en ;
    rdfs:isDefinedBy <http://semanticscience.org/ontology/sio/v1.53/sio-subset-labels.owl> ;
    rdfs:label "drug"@en .
"""

# Create a Graph and parse the data
g = Graph()
g.parse(data=turtle_data, format="turtle")

# Define the necessary namespaces
SIO = Namespace("http://semanticscience.org/resource/")
DC = Namespace("http://purl.org/dc/terms/")
BIO = Namespace("http://data.bioontology.org/metadata/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

# Bind prefixes to the graph (optional but useful for serialization and querying)
g.bind("sio", SIO)
g.bind("dcterms", DC)
g.bind("bio", BIO)
g.bind("owl", OWL)
g.bind("rdfs", RDFS)
g.bind("rdf", RDF)

# Define the resource URI
drug_uri = URIRef("http://semanticscience.org/resource/Drug")

# Function to recursively extract blank node information
def extract_blank_node(bnode):
    blank_node_data = []
    for p, o in g.predicate_objects(bnode):
        if isinstance(o, BNode):
            blank_node_data.append((p, "Blank Node"))
            blank_node_data.extend(extract_blank_node(o))
        else:
            blank_node_data.append((p, o))
    return blank_node_data

# Prepare data for CSV
csv_data = [["Subject", "Predicate", "Object"]]

# Extract Class
for _, _, o in g.triples((drug_uri, RDF.type, None)):
    csv_data.append([drug_uri, RDF.type, o])

# Extract SubClass
for _, _, o in g.triples((drug_uri, RDFS.subClassOf, None)):
    if isinstance(o, BNode):
        csv_data.append([drug_uri, RDFS.subClassOf, "Blank Node"])
        csv_data.extend(extract_blank_node(o))
    else:
        csv_data.append([drug_uri, RDFS.subClassOf, o])

# Extract other information
for _, p, o in g.triples((drug_uri, None, None)):
    if p not in {RDF.type, RDFS.subClassOf}:
        csv_data.append([drug_uri, p, o])

# Write data to CSV
with open("drug_info.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)

print("Data saved to drug_info.csv")


Data saved to drug_info.csv


In [3]:
import csv
from rdflib import Graph, URIRef, BNode, RDF, RDFS, Namespace

# Define the Turtle data with necessary prefixes
turtle_data = """
@prefix sio: <http://semanticscience.org/resource/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix bio: <http://data.bioontology.org/metadata/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<http://semanticscience.org/resource/Drug> a owl:Class ;
    rdfs:subClassOf <http://semanticscience.org/resource/ChemicalSubstance>, [
        rdf:type owl:Restriction ;
        owl:onProperty <http://semanticscience.org/resource/hasCapability> ;
        owl:someValuesFrom [
            rdf:type owl:Class ;
            owl:intersectionOf (
                <http://semanticscience.org/resource/ToRegulate>
                [
                    rdf:type owl:Restriction ;
                    owl:onProperty <http://semanticscience.org/resource/inRelationTo> ;
                    owl:someValuesFrom <http://semanticscience.org/resource/BiologicalEntity> ;
                ]
            ) ;
        ] ;
    ] ;
    bio:prefixIRI "sio:Drug" ;
    dcterms:description "A drug is a chemical substance that contains one or more active ingredients that regulate one or more biological processes."@en ;
    rdfs:isDefinedBy <http://semanticscience.org/ontology/sio/v1.53/sio-subset-labels.owl> ;
    rdfs:label "drug"@en .
"""


# Create a Graph and parse the data
g = Graph()
g.parse(data=turtle_data, format="turtle")

# Define the necessary namespaces
SIO = Namespace("http://semanticscience.org/resource/")
DC = Namespace("http://purl.org/dc/terms/")
BIO = Namespace("http://data.bioontology.org/metadata/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

# Bind prefixes to the graph (optional but useful for serialization and querying)
g.bind("sio", SIO)
g.bind("dcterms", DC)
g.bind("bio", BIO)
g.bind("owl", OWL)
g.bind("rdfs", RDFS)
g.bind("rdf", RDF)

# Define the resource URI
drug_uri = URIRef("http://semanticscience.org/resource/Drug")

# Function to get label or URI parts
def get_label_and_uri(uri):
    qres = g.query(
        """
        SELECT ?label WHERE {
            ?uri rdfs:label ?label .
        }
        """,
        initBindings={'uri': uri}
    )
    for row in qres:
        return str(row.label), str(uri)
    return uri.split('/')[-1].split('#')[-1], str(uri)

# Function to recursively extract blank node information
def extract_blank_node(bnode):
    blank_node_data = []
    for p, o in g.predicate_objects(bnode):
        if isinstance(o, BNode):
            pred_label, pred_uri = get_label_and_uri(p)
            blank_node_data.append(("Blank Node", "Blank Node", pred_label, "Blank Node", pred_uri))
            blank_node_data.extend(extract_blank_node(o))
        else:
            pred_label, pred_uri = get_label_and_uri(p)
            obj_label, obj_uri = get_label_and_uri(o)
            blank_node_data.append(("Blank Node", "Blank Node", pred_label, obj_label, pred_uri, obj_uri))
    return blank_node_data

# Prepare data for CSV
csv_data = [["Subject", "Subject URI", "Predicate", "Predicate URI", "Object", "Object URI"]]

# Extract Class
for _, _, o in g.triples((drug_uri, RDF.type, None)):
    subj_label, subj_uri = get_label_and_uri(drug_uri)
    obj_label, obj_uri = get_label_and_uri(o)
    csv_data.append([subj_label, subj_uri, "type", str(RDF.type), obj_label, obj_uri])

# Extract SubClass
for _, _, o in g.triples((drug_uri, RDFS.subClassOf, None)):
    subj_label, subj_uri = get_label_and_uri(drug_uri)
    pred_label, pred_uri = get_label_and_uri(RDFS.subClassOf)
    if isinstance(o, BNode):
        csv_data.append([subj_label, subj_uri, pred_label, pred_uri, "Blank Node", "Blank Node"])
        csv_data.extend(extract_blank_node(o))
    else:
        obj_label, obj_uri = get_label_and_uri(o)
        csv_data.append([subj_label, subj_uri, pred_label, pred_uri, obj_label, obj_uri])

# Extract other information
for _, p, o in g.triples((drug_uri, None, None)):
    if p not in {RDF.type, RDFS.subClassOf}:
        subj_label, subj_uri = get_label_and_uri(drug_uri)
        pred_label, pred_uri = get_label_and_uri(p)
        obj_label, obj_uri = get_label_and_uri(o)
        csv_data.append([subj_label, subj_uri, pred_label, pred_uri, obj_label, obj_uri])

# Write data to CSV
with open("drug_info.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)

print("Data saved to drug_info.csv")


Data saved to drug_info.csv


In [4]:
import csv
from rdflib import Graph, URIRef, BNode, RDF, RDFS, Namespace

# Define the path to the Turtle file
turtle_file_path = "../Ontologies/materialsmine_converted.ttl"

# Create a Graph and parse the data from the file
g = Graph()
g.parse(turtle_file_path, format="turtle")

# Define the necessary namespaces
SIO = Namespace("http://semanticscience.org/resource/")
DC = Namespace("http://purl.org/dc/terms/")
BIO = Namespace("http://data.bioontology.org/metadata/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")

# Bind prefixes to the graph (optional but useful for serialization and querying)
g.bind("sio", SIO)
g.bind("dcterms", DC)
g.bind("bio", BIO)
g.bind("owl", OWL)
g.bind("rdfs", RDFS)
g.bind("rdf", RDF)

# Function to get label or URI parts
def get_label_and_uri(uri):
    qres = g.query(
        """
        SELECT ?label WHERE {
            ?uri rdfs:label ?label .
        }
        """,
        initBindings={'uri': uri}
    )
    for row in qres:
        return str(row.label), str(uri)
    return uri.split('/')[-1].split('#')[-1], str(uri)

# Function to recursively extract blank node information
def extract_blank_node(bnode):
    blank_node_data = []
    for p, o in g.predicate_objects(bnode):
        if isinstance(o, BNode):
            pred_label, pred_uri = get_label_and_uri(p)
            blank_node_data.append(("Blank Node", "Blank Node", pred_label, "Blank Node", pred_uri))
            blank_node_data.extend(extract_blank_node(o))
        else:
            pred_label, pred_uri = get_label_and_uri(p)
            obj_label, obj_uri = get_label_and_uri(o)
            blank_node_data.append(("Blank Node", "Blank Node", pred_label, obj_label, pred_uri, obj_uri))
    return blank_node_data

# Prepare data for CSV
csv_data = [["Subject", "Subject URI", "Predicate", "Predicate URI", "Object", "Object URI"]]

# Extract all triples
for subj, pred, obj in g:
    subj_label, subj_uri = get_label_and_uri(subj)
    pred_label, pred_uri = get_label_and_uri(pred)
    
    if isinstance(obj, BNode):
        csv_data.append([subj_label, subj_uri, pred_label, pred_uri, "Blank Node", "Blank Node"])
        csv_data.extend(extract_blank_node(obj))
    else:
        obj_label, obj_uri = get_label_and_uri(obj)
        csv_data.append([subj_label, subj_uri, pred_label, pred_uri, obj_label, obj_uri])

# Write data to CSV
with open("all_info.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)

print("Data saved to all_info.csv")


Data saved to all_info.csv


In [5]:
import csv
import re
from rdflib import Graph, URIRef, BNode, RDF, RDFS, Namespace

class RDFGraphHandler:
    def __init__(self, turtle_file_path):
        self.turtle_file_path = turtle_file_path
        self.graph = Graph()
        self.SIO = Namespace("http://semanticscience.org/resource/")
        self.DC = Namespace("http://purl.org/dc/terms/")
        self.BIO = Namespace("http://data.bioontology.org/metadata/")
        self.OWL = Namespace("http://www.w3.org/2002/07/owl#")
        self.RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
        self.RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
        self.bind_namespaces()
        self.parse_graph()

    def bind_namespaces(self):
        self.graph.bind("sio", self.SIO)
        self.graph.bind("dcterms", self.DC)
        self.graph.bind("bio", self.BIO)
        self.graph.bind("owl", self.OWL)
        self.graph.bind("rdfs", self.RDFS)
        self.graph.bind("rdf", self.RDF)

    def parse_graph(self):
        self.graph.parse(self.turtle_file_path, format="turtle")

    def get_label_and_uri(self, uri):
        qres = self.graph.query(
            """
            SELECT ?label WHERE {
                ?uri rdfs:label ?label .
            }
            """,
            initBindings={'uri': uri}
        )
        for row in qres:
            return str(row.label), str(uri)
        return uri.split('/')[-1].split('#')[-1], str(uri)

    def extract_blank_node(self, bnode):
        blank_node_data = []
        for p, o in self.graph.predicate_objects(bnode):
            if isinstance(o, BNode):
                pred_label, pred_uri = self.get_label_and_uri(p)
                blank_node_data.append(("Blank Node", "Blank Node", pred_label, "Blank Node", pred_uri))
                blank_node_data.extend(self.extract_blank_node(o))
            else:
                pred_label, pred_uri = self.get_label_and_uri(p)
                obj_label, obj_uri = self.get_label_and_uri(o)
                blank_node_data.append(("Blank Node", "Blank Node", pred_label, obj_label, pred_uri, obj_uri))
        return blank_node_data

    def normalize_string(self, s):
        return re.sub(r'[^a-z0-9]', '', s.lower())

    def save_to_csv(self, class_names, output_csv_path):
        normalized_class_names = {self.normalize_string(name) for name in class_names}
        csv_data = [["Subject", "Subject URI", "Predicate", "Predicate URI", "Object", "Object URI"]]

        for subj, pred, obj in self.graph:
            subj_label, subj_uri = self.get_label_and_uri(subj)
            pred_label, pred_uri = self.get_label_and_uri(pred)
            normalized_subj_label = self.normalize_string(subj_label)
            
            if normalized_subj_label in normalized_class_names:
                if isinstance(obj, BNode):
                    csv_data.append([subj_label, subj_uri, pred_label, pred_uri, "Blank Node", "Blank Node"])
                    csv_data.extend(self.extract_blank_node(obj))
                else:
                    obj_label, obj_uri = self.get_label_and_uri(obj)
                    csv_data.append([subj_label, subj_uri, pred_label, pred_uri, obj_label, obj_uri])

        with open(output_csv_path, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.writer(file)
            writer.writerows(csv_data)

        print(f"Data saved to {output_csv_path}")

# Usage example
turtle_file_path = "../Ontologies/materialsmine_converted.ttl"
class_names = ["Drug"]
output_csv_path = "filtered_info.csv"

rdf_handler = RDFGraphHandler(turtle_file_path)
rdf_handler.save_to_csv(class_names, output_csv_path)


Data saved to filtered_info.csv
