In [1]:
import os
import glob
import rdflib
import concurrent.futures

# Define a function to read and parse a single JSON-LD file into an RDFLib graph
def read_and_parse_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        file_content = file.read()
        graph = rdflib.Graph()
        graph.parse(data=file_content, format='json-ld')
    return graph

# Define a function to load JSON-LD files into an RDFLib graph using concurrent processing
def load_jsonld_files_to_graph(folder_path):
    # Get all JSON-LD files in the specified folder
    jsonld_files = glob.glob(os.path.join(folder_path, "*.json"))

    # Create an empty RDFLib graph
    graph = rdflib.Graph()

    # Use concurrent processing to read and parse files in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Map the read_and_parse_file function to each file path
        results = executor.map(read_and_parse_file, jsonld_files)

    # Combine all graphs into one
    for g in results:
        graph += g

    return graph

# Define the folder containing the JSON-LD files
folder_path = 'BatteryTypeJson'

# Load JSON-LD files into the graph
graph = load_jsonld_files_to_graph(folder_path)


In [5]:

# Print the number of triples in the graph
print(f"Graph has {len(graph)} triples.")

# Optionally, serialize the graph to a file (e.g., in Turtle format)
#output_file = 'output_graph.ttl'
#graph.serialize(destination=output_file, format='turtle')
#print(f"Serialized graph to {output_file}")


Graph has 34886 triples.


In [6]:
# Define the SPARQL query
query = """
PREFIX schema: <https://schema.org/>

SELECT ?name (COUNT(?subjectOf) AS ?subjectOfCount)
WHERE {
  ?thing schema:subjectOf ?subjectOf .
  ?thing schema:name ?name .
}
GROUP BY ?name
ORDER BY DESC(?subjectOfCount)
LIMIT 10
"""

# Execute the query
qres = graph.query(query)

# Print the results
for row in qres:
    print(f"{row.name} is the subject of {row.subjectOfCount} publications")

NCR18650BF is the subject of 54 publications
INR18650 MJ1 is the subject of 20 publications
INR21700 M50T is the subject of 11 publications
SLPB11543140H5 is the subject of 9 publications
AMP20M1HD-A is the subject of 6 publications
INR21700 M50 is the subject of 6 publications
SLPB75106100 is the subject of 6 publications
MP176065 is the subject of 4 publications
MP176065xtd is the subject of 4 publications
SLPB100216216H is the subject of 4 publications


In [24]:
import requests

# Load the context file from the URL
context_url = "https://w3id.org/emmo/domain/battery/context"
context_data = requests.get(context_url).json()

# Extract IRIs from the context file
hasPositiveElectrode = rdflib.URIRef(context_data["@context"]["hasPositiveElectrode"]["@id"])
PositiveElectrode = rdflib.URIRef(context_data["@context"]["PositiveElectrode"])
hasActiveMaterial = rdflib.URIRef(context_data["@context"]["hasActiveMaterial"]["@id"])
NMC = rdflib.URIRef(context_data["@context"]["LithiumNickelManganeseCobaltOxide"])
LFP = rdflib.URIRef(context_data["@context"]["LithiumIronPhosphate"])

# Define the SPARQL query using the extracted IRIs
query = f"""
SELECT (COUNT(?thing) AS ?countInstance)
WHERE {{
  ?thing <{hasPositiveElectrode}> ?positiveElectrode .
  ?positiveElectrode a <{PositiveElectrode}> ;
                     <{hasActiveMaterial}> ?activeMaterial .
  ?activeMaterial a <{NMC}> .
}}
"""

# Execute the query
qres = graph.query(query)

# Print the results
for row in qres:
    print(f"Count of cells with NMC active materials: {row.countInstance}")

Count of things with NMC active materials: 78
