diff --git a/atomrdf/graph.py b/atomrdf/graph.py index 66b9b2f..3a40285 100644 --- a/atomrdf/graph.py +++ b/atomrdf/graph.py @@ -39,7 +39,7 @@ # from pyscal3.core import System from pyscal3.atoms import Atoms -from atomrdf.visualize import visualize_graph +from atomrdf.visualize import visualize_graph, visualize_provenance from atomrdf.network.network import OntologyNetwork from atomrdf.network.ontology import read_ontology from atomrdf.structure import System @@ -49,7 +49,7 @@ from atomrdf.workflow.workflow import Workflow from atomrdf.sample import Sample -from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO, PROV +from atomrdf.namespace import Namespace, CMSO, PLDO, PODO, ASMO, PROV, MATH, UNSAFECMSO, UNSAFEASMO # read element data file file_location = os.path.dirname(__file__).split("/") @@ -103,7 +103,12 @@ def _replace_keys(refdict, indict): def _dummy_log(str): pass - +def _name(term): + try: + return str(term.toPython()) + except: + return str(term) + def _prepare_log(file): logger = logging.getLogger(__name__) handler = logging.FileHandler(file) @@ -1173,10 +1178,13 @@ def get_sample(self, sample, no_atoms=False): return self.sgraph, na return self.sgraph - def get_sample_label(self, sample): - label = self.graph.value(sample, RDFS.label) + def get_label(self, item): + label = self.graph.value(item, RDFS.label) if label is not None: return label.toPython() + + def get_sample_label(self, sample): + label = self.get_label(sample) return label def change_label(self, sample, label): @@ -1229,6 +1237,7 @@ def get_system_from_sample(self, sample): sys.atoms = at sys.sample = sample sys.graph = self + sys._name = sample.toPython().split('sample:')[-1] return sys def to_file(self, sample, filename=None, format="poscar", add_sample_id=True, @@ -1278,7 +1287,7 @@ def to_file(self, sample, filename=None, format="poscar", add_sample_id=True, filename = os.path.join(os.getcwd(), "out") sys = self.get_system_from_sample(sample) - sys.to_file(filename, format=format, add_sample_id=add_sample_id, input_data=input_data, + sys.to_file(filename=filename, format=format, add_sample_id=add_sample_id, input_data=input_data, pseudopotentials=pseudopotentials, kspacing=kspacing, kpts=kpts, koffset=koffset, crystal_coordinates=crystal_coordinates) @@ -1293,3 +1302,153 @@ def add_workflow(self, job, workflow_environment=None, workflow_module=None, job workflow_module=workflow_module, job_dicts=job_dicts, add_intermediate_jobs=add_intermediate_jobs) + + def find_property(self, label): + prop_list = list(self.graph.triples((None, RDFS.label, label))) + if len(prop_list) == 0: + raise RuntimeError(f'Property {label} not found in the graph') + prop = prop_list[0][0] + return prop + + def get_string_label(self, item): + label = self.get_label(item) + + if label is None: + try: + label = str(item.toPython()) + except: + label = str(item) + + if "activity" in label: + method = self.value(item, ASMO.hasComputationalMethod) + if method is not None: + method_name = self.value(method, RDF.type) + if method_name is not None: + label = method_name.toPython().split("/")[-1] + return label + + def _add_to_dict(self, prop, indict): + name = _name(prop) + if name not in indict.keys(): + indict[name] = {} + indict[name]['found'] = False + indict[name]['label'] = self.get_string_label(prop) + + def _get_ancestor(self, prop, prov): + #note that only one operation and parent are present! + if isinstance(prop, str): + prop = URIRef(prop) + propname = _name(prop) + + operation = [x[1] for x in self.triples((prop, ASMO.wasCalculatedBy, None))] + if len(operation) > 0: + parent = [x[2] for x in self.triples((prop, ASMO.wasCalculatedBy, None))] + operation = operation[0] + parent = parent[0] + prov[propname]['operation'] = 'output_parameter' + prov[propname]['inputs'] = {} + prov[propname]['inputs']['0'] = _name(parent) + self._add_to_dict(parent, prov) + prov[_name(parent)]['inputs'] = {} + associated_samples = [x[0] for x in self.triples((None, PROV.wasGeneratedBy, parent))] + for count, sample in enumerate(associated_samples): + prov[_name(parent)]['inputs'][str(count)] = _name(sample) + self._add_to_dict(sample, prov) + prov[_name(parent)]['found'] = True + prov[_name(parent)]['operation'] = 'sample_for_activity' + + else: + operation = [x[1] for x in self.triples((None, None, prop))] + parent = [list(self.triples((None, op, prop)))[0][0] for op in operation] + if len(operation) == 0: + prov[propname]['found'] = True + return prov + operation = operation[0] + parent = parent[0] + + if operation.toPython() == "http://purls.helmholtz-metadaten.de/asmo/hasInputParameter": + #print(f'we ran 1 for {operation.toPython()}') + prov[propname]['operation'] = 'input_parameter' + prov[propname]['inputs'] = {} + prov[propname]['inputs']['0'] = _name(parent) + self._add_to_dict(parent, prov) + prov[_name(parent)]['inputs'] = {} + associated_samples = [x[0] for x in self.triples((None, PROV.wasGeneratedBy, parent))] + for count, sample in enumerate(associated_samples): + prov[_name(parent)]['inputs'][str(count)] = _name(sample) + self._add_to_dict(sample, prov) + prov[_name(parent)]['found'] = True + prov[_name(parent)]['operation'] = 'sample_for_activity' + + elif operation.toPython() == "http://purls.helmholtz-metadaten.de/cmso/hasCalculatedProperty": + #print(f'we ran 2 for {operation.toPython()}') + prov[propname]['operation'] = 'output_parameter' + prov[propname]['inputs'] = {} + prov[propname]['inputs']['0'] = _name(parent) + self._add_to_dict(parent, prov) + prov[_name(parent)]['found'] = True + prov[_name(parent)]['operation'] = 'sample_output' + + elif operation == MATH.hasSum: + addends = list(x[2] for x in self.triples((parent, MATH.hasAddend, None))) + prov[propname]['operation'] = 'addition' + prov[propname]['inputs'] = {} + for count, term in enumerate(addends): + prov[propname]['inputs'][f'{count}'] = _name(term) + self._add_to_dict(term, prov) + + elif operation == MATH.hasDifference: + minuend = self.value(parent, MATH.hasMinuend) + subtrahend = self.value(parent, MATH.hasSubtrahend) + prov[propname]['operation'] = 'subtraction' + prov[propname]['inputs'] = {} + prov[propname]['inputs']['0'] = _name(minuend) + prov[propname]['inputs']['1'] = _name(subtrahend) + self._add_to_dict(minuend, prov) + self._add_to_dict(subtrahend, prov) + + elif operation == MATH.hasProduct: + factors = list(x[2] for x in self.triples((parent, MATH.hasFactor, None))) + prov[propname]['operation'] = 'multiplication' + prov[propname]['inputs'] = {} + for count, term in enumerate(factors): + prov[propname]['inputs'][f'{count}'] = _name(term) + self._add_to_dict(term, prov) + + elif operation == MATH.hasQuotient: + divisor = self.value(parent, MATH.hasDivisor) + dividend = self.value(parent, MATH.hasDividend) + prov[propname]['operation'] = 'division' + prov[propname]['inputs'] = {} + prov[propname]['inputs']['0'] = _name(divisor) + prov[propname]['inputs']['1'] = _name(dividend) + self._add_to_dict(divisor, prov) + self._add_to_dict(dividend, prov) + + prov[propname]['found'] = True + return prov + + def generate_provenance(self, prop=None, label=None, visualize=False): + if (prop is None) and (label is None): + raise ValueError('Either prop or label must be provided') + + if prop is None: + prop = self.find_property(label) + + name = _name(prop) + prov = {} + self._add_to_dict(prop, prov) + + done = False + while not done: + done = True + keys = list(prov.keys()) + for prop in keys: + if not prov[prop]['found']: + prov = self._get_ancestor(prop, prov) + done = False + + if visualize: + return visualize_provenance(prov) + + return prov \ No newline at end of file diff --git a/atomrdf/namespace.py b/atomrdf/namespace.py index 0260ab4..9cefee3 100644 --- a/atomrdf/namespace.py +++ b/atomrdf/namespace.py @@ -72,3 +72,7 @@ def __init__(self, infile, delimiter="/"): ASMO = Namespace(os.path.join(file_location, "data/asmo.owl")) PROV = RDFLibNamespace("http://www.w3.org/ns/prov#") MDO = RDFLibNamespace("https://w3id.org/mdo/calculation/") + +MATH = RDFLibNamespace("http://purls.helmholtz-metadaten.de/asmo/") +UNSAFECMSO = RDFLibNamespace(os.path.join(file_location, "data/cmso.owl")) +UNSAFEASMO = RDFLibNamespace(os.path.join(file_location, "data/asmo.owl")) diff --git a/atomrdf/sample.py b/atomrdf/sample.py index 170a170..8a499f6 100644 --- a/atomrdf/sample.py +++ b/atomrdf/sample.py @@ -5,8 +5,11 @@ """ from pyscal3.atoms import AttrSetter from atomrdf.namespace import CMSO, PLDO, PODO, ASMO, PROV -from rdflib import RDFS +from rdflib import RDFS, Namespace, RDF, URIRef, Literal import numpy as np +import uuid + +MATH = Namespace("http://purls.helmholtz-metadaten.de/asmo/") class Sample: def __init__(self, name, sample_id, graph): @@ -44,16 +47,42 @@ def __str__(self): def __repr__(self): return f"{self._name}" + @property + def structure(self): + return self._graph.get_system_from_sample(self._sample_id) + @property def _volume(self): simcell = self._graph.value(self._sample_id, CMSO.hasSimulationCell) volume = self._graph.value(simcell, CMSO.hasVolume) - return Property(volume.toPython(), graph=self._graph) + + #initially query if there is property with label volume + inps = [k[2] for k in self._graph.triples((self._sample_id, CMSO.hasCalculatedProperty, None))] + labels = [self._graph.value(inp, RDFS.label) for inp in inps] + if not "Volume" in labels: + parent = self._graph.create_node(URIRef(f'property:{uuid.uuid4()}'), CMSO.CalculatedProperty) + self._graph.add((parent, ASMO.hasValue, Literal(volume.toPython()))) + self._graph.add((parent, RDFS.label, Literal("Volume"))) + self._graph.add((self._sample_id, CMSO.hasCalculatedProperty, parent)) + self._graph.add((parent, ASMO.hasUnit, URIRef(f"http://qudt.org/vocab/unit/ANGSTROM3"))) + else: + parent = inps[labels.index("Volume")] + return Property(volume.toPython(), graph=self._graph, parent=parent, unit='ANGSTROM3') @property def _no_of_atoms(self): no_atoms = self._graph.value(self._sample_id, CMSO.hasNumberOfAtoms) - return Property(no_atoms.toPython(), graph=self._graph) + inps = [k[2] for k in self._graph.triples((self._sample_id, CMSO.hasCalculatedProperty, None))] + labels = [self._graph.value(inp, RDFS.label) for inp in inps] + if not "NumberOfAtoms" in labels: + parent = self._graph.create_node(URIRef(f'property:{uuid.uuid4()}'), CMSO.CalculatedProperty) + self._graph.add((parent, ASMO.hasValue, Literal(no_atoms.toPython()))) + self._graph.add((parent, RDFS.label, Literal("NumberOfAtoms"))) + self._graph.add((self._sample_id, CMSO.hasCalculatedProperty, parent)) + else: + parent = inps[labels.index("NumberOfAtoms")] + return Property(no_atoms.toPython(), graph=self._graph, parent=parent) + @property def _input_properties(self): @@ -65,11 +94,12 @@ def _input_properties(self): values = [self._graph.value(inp, ASMO.hasValue) for inp in inps] units = [self._graph.value(inp, ASMO.hasUnit) for inp in inps] units = [unit if unit is None else unit.toPython().split('/')[-1] for unit in units] - - props = [Property(value.toPython(), unit=unit, graph=self._graph) for value, unit in zip(values, units)] + props = [] + for count, value in enumerate(values): + props.append(Property(value.toPython(), unit=units[count], graph=self._graph, parent=inps[count])) return labels, props return [], [] - + @property def _output_properties(self): inps = [k[2] for k in self._graph.triples((self._sample_id, CMSO.hasCalculatedProperty, None))] @@ -78,15 +108,18 @@ def _output_properties(self): values = [self._graph.value(inp, ASMO.hasValue) for inp in inps] units = [self._graph.value(inp, ASMO.hasUnit) for inp in inps] units = [unit if unit is None else unit.toPython().split('/')[-1] for unit in units] - - props = [Property(value.toPython(), unit=unit, graph=self._graph) for value, unit in zip(values, units)] - return labels, props + props = [] + for count, value in enumerate(values): + props.append(Property(value.toPython(), unit=units[count], graph=self._graph, parent=inps[count])) + return labels, props class Property: - def __init__(self, value, unit=None, graph=None): + def __init__(self, value, unit=None, graph=None, parent=None): self._value = value self._unit = unit self._graph = graph + self._parent = parent + self._label = None def __repr__(self): if self._unit is not None: @@ -97,24 +130,106 @@ def __repr__(self): def value(self): return self._value + @property + def label(self): + if self._graph is not None: + label = self._graph.value(self._parent, RDFS.label) + if label is not None: + return label.toPython() + return self._label + + @label.setter + def label(self, value): + self._label = value + if self._graph is not None: + if self._parent is not None: + self._graph.remove((self._parent, RDFS.label, None)) + self._graph.add((self._parent, RDFS.label, Literal(value))) + + def _create_label(self, v1, v2, operation): + if isinstance(v1, Property): + v1 = v1.label + else: + v1 = f'v{str(v1)}' + if isinstance(v2, Property): + v2 = v2.label + else: + v2 = f'v{str(v2)}' + return f'({v1}{operation}{v2})' + def _declass(self, item): if isinstance(item, Property): return item.value else: return item - + + def _wrap(self, value): + if isinstance(value, Property): + return value._parent + else: + return Literal(value) + + #create node with units + def _create_node(self, res): + parent = self._graph.create_node(URIRef(f'property:{uuid.uuid4()}'), CMSO.CalculatedProperty) + self._graph.add((parent, ASMO.hasValue, Literal(res))) + if self._unit is not None: + self._graph.add((parent, ASMO.hasUnit, URIRef(f"http://qudt.org/vocab/unit/{self._unit}"))) + return parent + #overloaded operations def __add__(self, value): - return Property(self._value + self._declass(value), unit=self._unit, graph=self._graph) + res = self._value + self._declass(value) + parent = self._create_node(res) + res_prop = Property(res, unit=self._unit, graph=self._graph, parent=parent) + res_prop.label = self._create_label(self, value, '+') + if self._graph is not None: + operation = URIRef(f'operation:{uuid.uuid4()}') + self._graph.add((operation, RDF.type, MATH.Addition)) + self._graph.add((operation, MATH.hasAddend, self._wrap(value))) + self._graph.add((operation, MATH.hasAddend, self._wrap(self))) + self._graph.add((operation, MATH.hasSum, self._wrap(res_prop))) + return res_prop def __sub__(self, value): - return Property(self._value - self._declass(value), unit=self._unit, graph=self._graph) + res = self._value - self._declass(value) + parent = self._create_node(res) + res_prop = Property(res, unit=self._unit, graph=self._graph, parent=parent) + res_prop.label = self._create_label(self, value, '-') + if self._graph is not None: + operation = URIRef(f'operation:{uuid.uuid4()}') + self._graph.add((operation, RDF.type, MATH.Subtraction)) + self._graph.add((operation, MATH.hasMinuend, self._wrap(self))) + self._graph.add((operation, MATH.hasSubtrahend, self._wrap(value))) + self._graph.add((operation, MATH.hasDifference, self._wrap(res_prop))) + return res_prop def __mul__(self, value): - return Property(self._value * self._declass(value), unit=self._unit, graph=self._graph) + res = self._value * self._declass(value) + parent = self._create_node(res) + res_prop = Property(res, unit=self._unit, graph=self._graph, parent=parent) + res_prop.label = self._create_label(self, value, '*') + if self._graph is not None: + operation = URIRef(f'operation:{uuid.uuid4()}') + self._graph.add((operation, RDF.type, MATH.Multiplication)) + self._graph.add((operation, MATH.hasFactor, self._wrap(self))) + self._graph.add((operation, MATH.hasFactor, self._wrap(value))) + self._graph.add((operation, MATH.hasProduct, self._wrap(res_prop))) + return res_prop def __truediv__(self, value): - return Property(self._value / self._declass(value), unit=self._unit, graph=self._graph) + res = self._value / self._declass(value) + parent = self._create_node(res) + res_prop = Property(res, unit=self._unit, graph=self._graph, parent=parent) + res_prop.label = self._create_label(self, value, '/') + if self._graph is not None: + operation = URIRef(f'operation:{uuid.uuid4()}') + self._graph.add((operation, RDF.type, MATH.Division)) + self._graph.add((operation, MATH.hasDivisor, self._wrap(self))) + self._graph.add((operation, MATH.hasDividend, self._wrap(value))) + self._graph.add((operation, MATH.hasQuotient, self._wrap(res_prop))) + return res_prop + def __eq__(self, value): return self._value == self._declass(value) @@ -135,10 +250,10 @@ def __ge__(self, value): return self._value >= self._declass(value) def __neg__(self): - return Property(-self._value, unit=self._unit, graph=self._graph) + return Property(-self._value, unit=self._unit, graph=self._graph, parent=self._parent) def __abs__(self): - return Property(abs(self._value), unit=self._unit, graph=self._graph) + return Property(abs(self._value), unit=self._unit, graph=self._graph, parent=self._parent) def __round__(self, n): - return Property(round(self._value, n), unit=self._unit, graph=self._graph) \ No newline at end of file + return Property(round(self._value, n), unit=self._unit, graph=self._graph, parent=self._parent) \ No newline at end of file diff --git a/atomrdf/structure.py b/atomrdf/structure.py index 2b5bd3c..a01da69 100644 --- a/atomrdf/structure.py +++ b/atomrdf/structure.py @@ -554,13 +554,15 @@ def __init__( # for post-processing of structures self.graph = graph self.names = names + self._material = None self._atom_ids = None if source is not None: self.__dict__.update(source.__dict__) self.write = AttrSetter() mapdict = {} - mapdict['ase'] = update_wrapper(partial(convert_snap, self), convert_snap) + mapdict['ase'] = update_wrapper(partial(self.to_file, format='ase'), self.to_file) + mapdict['pyiron'] = update_wrapper(partial(self.to_file, format='pyiron'), self.to_file) mapdict['file'] = self.to_file mapdict['dict'] = update_wrapper(partial(serialize.serialize, self, return_type='dict'), serialize.serialize) mapdict['json'] = update_wrapper(partial(serialize.serialize, self, return_type='json'), serialize.serialize) @@ -598,6 +600,16 @@ def __init__( self.schema._add_attribute(mapdict) + @property + def material(self): + if self._material is None: + self._material = self.graph.value(self.sample, CMSO.hasMaterial) + return self._material + + @material.setter + def material(self, value): + self._material = value + def delete(self, ids=None, indices=None, condition=None, selection=False): """ Delete atoms from the structure. @@ -1103,7 +1115,7 @@ def write_quatum_espresso_id(self, outfile): for line in lines: fout.write(line) - def to_file(self, outfile, format='lammps-dump', customkeys=None, customvals=None, + def to_file(self, filename=None, format='lammps-dump', customkeys=None, customvals=None, compressed=False, timestep=0, species=None, add_sample_id=True, input_data=None, pseudopotentials=None, kspacing=None, kpts=None, @@ -1158,28 +1170,49 @@ def to_file(self, outfile, format='lammps-dump', customkeys=None, customvals=Non ------- None """ + if filename is None: + outfile = f"structure.out" + else: + outfile = filename + if format == "ase": - return self.write.ase() + asesys = convert_snap(self) + if self.sample is not None: + asesys.info["sample_id"] = self.sample + return asesys + + elif format == "pyiron": + from pyiron_atomistics.atomistics.structure.atoms import ase_to_pyiron + asesys = convert_snap(self) + pyironsys = ase_to_pyiron(asesys) + if self.sample is not None: + pyironsys.info["sample_id"] = self.sample + return pyironsys + elif format == "poscar": - asesys = self.write.ase() + asesys = convert_snap(self) write(outfile, asesys, format="vasp") if add_sample_id and (self.sample is not None): self.write_poscar_id(outfile) + elif format == "lammps-dump": inputmethods.to_file(self, outfile, format='lammps-dump', customkeys=customkeys, customvals=customvals, compressed=compressed, timestep=timestep, species=species) + elif format == "lammps-data": - asesys = self.write.ase() + asesys = convert_snap(self) write(outfile, asesys, format='lammps-data', atom_style='atomic') + elif format == "quantum-espresso": - asesys = self.write.ase() + asesys = convert_snap(self) write(outfile, asesys, format='espresso-in', input_data=input_data, pseudopotentials=pseudopotentials, kspacing=kspacing, kpts=kpts, koffset=koffset, crystal_coordinates=crystal_coordinates) if add_sample_id and (self.sample is not None): self.write_quatum_espresso_id(outfile) + else: - asesys = self.write.ase() + asesys = convert_snap(self) write(outfile, asesys, format=format) def to_graph(self): diff --git a/atomrdf/visualize.py b/atomrdf/visualize.py index c2ce6ba..ca806fc 100644 --- a/atomrdf/visualize.py +++ b/atomrdf/visualize.py @@ -48,6 +48,16 @@ def get_string_from_URI(x): if len(rawsplit) > 1: return "_".join(rawsplit), "BNode" + if "operation:" in raw: + rawsplit = raw.split(":") + if len(rawsplit) > 1: + return "_".join(rawsplit), "BNode" + + if "property:" in raw: + rawsplit = raw.split(":") + if len(rawsplit) > 1: + return "_".join(rawsplit), "BNode" + # just a normal url split now rawsplit = raw.split("/") if len(rawsplit) > 1: @@ -232,3 +242,83 @@ def visualize_graph( ) return dot + +def _id(item): + return str(item).replace(':', '_') + +def visualize_provenance( + prov, + rankdir="TB", + size=None, + layout="dot", +): + dot = graphviz.Digraph() + dot.attr( + rankdir=rankdir, + style="filled", + size=size, + layout=layout, + overlap="false", + ) + #add all nodes + for key in prov.keys(): + nid = _id(key) + #if "activity" in key: + dot.node(nid, label=prov[key]['label'], + shape='box', + color="#C9DAF8", + style="filled", + fontname='Helvetica', + fontsize='8') + #else: + # dot.node(nid, label=prov[key]['label'], + # shape='parallelogram', + # color="#C9DAF8", + # style="filled", + # fontname='Helvetica', + # fontsize='8') + #add all edges + for key, val in prov.items(): + if 'inputs' in val.keys(): + if val['operation'] == 'input_parameter': + for subkey, subval in val['inputs'].items(): + dot.edge(_id(subval), _id(key), label='input_param', + color="#263238", + fontname='Helvetica', + fontsize='8') + if val['operation'] == 'output_parameter': + for subkey, subval in val['inputs'].items(): + dot.edge(_id(subval), _id(key), label='output_param', + color="#263238", + fontname='Helvetica', + fontsize='8') + elif val['operation'] == 'sample_for_activity': + for subkey, subval in val['inputs'].items(): + dot.edge(_id(subval), _id(key), label='input_sample', + color="#263238", + fontname='Helvetica', + fontsize='8') + elif val['operation'] == 'sample_output': + for subkey, subval in val['inputs'].items(): + dot.edge(_id(subval), _id(key), label='output_sample', + color="#263238", + fontname='Helvetica', + fontsize='8') + else: + operation_id = str(uuid.uuid4()) + operation = dot.node(operation_id, label=val['operation'], + color="#E6B8AF", + shape='box', + style='filled', + fontname='Helvetica', + fontsize='8') + for subkey, subval in val['inputs'].items(): + dot.edge(_id(subval), operation_id, label='input', + color="#263238", + fontname='Helvetica', + fontsize='8') + dot.edge(operation_id, _id(key), label='output', + color="#263238", + fontname='Helvetica', + fontsize='8') + return dot \ No newline at end of file