# Develop and test hash based node storage

The following implementation requires a postgres database, to work and query json data types. To run this notebook requires therefore to setup a postgres server. I use the following tool, which however works only on a Mac: [https://postgresapp.com](https://postgresapp.com)

Add in pyiron_workflows in function.py in def function_node_factory the following code line:

    "__annotate__": node_function.__annotate if hasattr(node_function, '__annotate__') else None,

### Set path, import etc.

In [1]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path(Path.cwd()).parent))

In [2]:
import warnings
warnings.filterwarnings("ignore")

%config IPCompleter.evaluation='unsafe'

In [3]:
from node_library.development import hash_based_storage as hs

### Perform a few tests for the hash_based_storage module

In [4]:
db = hs.create_nodes_table(echo=False)

In [5]:
hs.list_column_names(db, 'node')

['node_id',
 'name',
 'hash_value',
 'lib_path',
 'creation_date',
 'inputs',
 'outputs',
 'output_ready',
 'file_path']

**Note:** Uncomment the following line if you have modified the table structure. After deleting the table comment this block and rerun the notebook (to run create_nodes_table)

In [6]:
# hs.drop_table(db, 'node')
# hs.list_column_names(db, 'node')
db = hs.create_nodes_table(echo=False)

In [7]:
hs.list_table(db)

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,inputs,outputs,output_ready,file_path
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'x': '[17]'},{'sin': '[-0.96139749]'},True,
1,2,,3d4f127b05459a1d1dda98bc3e6f10176e09f27f3ab9b1...,node_library/math/Sin,2024-07-27 17:42:09.551274,{'x': '[12]'},{'sin': '[-0.53657292]'},True,
2,3,,12201da17c241990e5b7725a62de4189bea0cf57ad462c...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.073002,"{'element': ''Pt'', 'engine': 'None'}",{'chemical_potential': '-0.00012460841212025286'},True,
3,4,,0b26dce95241b25be07d968f3a10863d08d1edace00830...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.334980,"{'element': ''Pd'', 'engine': 'None'}",{'chemical_potential': '0.00034226253730329503'},True,
4,5,,d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef3...,node_library/atomistic/engine/ase/M3GNet,2024-07-27 17:42:10.541864,{},,False,
5,6,,4aa32e7e995920ffee1b936fdc0b3e570ffa586fc14080...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.546796,"{'element': ''Pt'', 'engine': 'hash_d87d4a45cd...",{'chemical_potential': '-6.065089225769043'},True,
6,7,,7efde22c8ae69d7da09b577b69ee74e0b74fe3f891cea9...,None/compute_elastic_constants,2024-07-27 17:42:32.307444,"{'element': ''Ni'', 'parameters': 'InputElasti...",{'BV': '215.6460213089325'},True,
7,8,,33682b8b2bd1d646f08081acca6ed97bca54d3ab8730a0...,None/compute_elastic_constants,2024-07-27 17:42:41.159084,"{'element': ''Al'', 'parameters': 'InputElasti...",{'BV': '55.03996979198493'},True,
8,9,,f0af82047d99aa91807ce7ab1b36a92d986a996793cef4...,None/compute_elastic_constants,2024-07-27 17:42:49.398273,"{'element': ''Fe'', 'parameters': 'InputElasti...",{'BV': '93.38382852298619'},True,
9,10,,a971878fcfbd90b7f7770a9b3b7e276e3809f1ff0e1d1c...,node_library/atomistic/structure/build/cubic_b...,2024-07-27 17:42:49.636440,"{'element': ''Ni'', 'cell_size': '3', 'vacancy...",,False,


In [8]:
hs.remove_nodes_from_db(db, indices=[12, 13]);

In [9]:
df = hs.list_table(db)
df

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,inputs,outputs,output_ready,file_path
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'x': '[17]'},{'sin': '[-0.96139749]'},True,
1,2,,3d4f127b05459a1d1dda98bc3e6f10176e09f27f3ab9b1...,node_library/math/Sin,2024-07-27 17:42:09.551274,{'x': '[12]'},{'sin': '[-0.53657292]'},True,
2,3,,12201da17c241990e5b7725a62de4189bea0cf57ad462c...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.073002,"{'element': ''Pt'', 'engine': 'None'}",{'chemical_potential': '-0.00012460841212025286'},True,
3,4,,0b26dce95241b25be07d968f3a10863d08d1edace00830...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.334980,"{'element': ''Pd'', 'engine': 'None'}",{'chemical_potential': '0.00034226253730329503'},True,
4,5,,d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef3...,node_library/atomistic/engine/ase/M3GNet,2024-07-27 17:42:10.541864,{},,False,
5,6,,4aa32e7e995920ffee1b936fdc0b3e570ffa586fc14080...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.546796,"{'element': ''Pt'', 'engine': 'hash_d87d4a45cd...",{'chemical_potential': '-6.065089225769043'},True,
6,7,,7efde22c8ae69d7da09b577b69ee74e0b74fe3f891cea9...,None/compute_elastic_constants,2024-07-27 17:42:32.307444,"{'element': ''Ni'', 'parameters': 'InputElasti...",{'BV': '215.6460213089325'},True,
7,8,,33682b8b2bd1d646f08081acca6ed97bca54d3ab8730a0...,None/compute_elastic_constants,2024-07-27 17:42:41.159084,"{'element': ''Al'', 'parameters': 'InputElasti...",{'BV': '55.03996979198493'},True,
8,9,,f0af82047d99aa91807ce7ab1b36a92d986a996793cef4...,None/compute_elastic_constants,2024-07-27 17:42:49.398273,"{'element': ''Fe'', 'parameters': 'InputElasti...",{'BV': '93.38382852298619'},True,
9,10,,a971878fcfbd90b7f7770a9b3b7e276e3809f1ff0e1d1c...,node_library/atomistic/structure/build/cubic_b...,2024-07-27 17:42:49.636440,"{'element': ''Ni'', 'cell_size': '3', 'vacancy...",,False,


In [10]:
hs.transform_data_column(df)

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,outputs,output_ready,file_path,x,element,engine,parameters,cell_size,vacancy_index,structure,calculator
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'sin': '[-0.96139749]'},True,,[17],,,,,,,
1,2,,3d4f127b05459a1d1dda98bc3e6f10176e09f27f3ab9b1...,node_library/math/Sin,2024-07-27 17:42:09.551274,{'sin': '[-0.53657292]'},True,,[12],,,,,,,
2,3,,12201da17c241990e5b7725a62de4189bea0cf57ad462c...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.073002,{'chemical_potential': '-0.00012460841212025286'},True,,,'Pt',,,,,,
3,4,,0b26dce95241b25be07d968f3a10863d08d1edace00830...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.334980,{'chemical_potential': '0.00034226253730329503'},True,,,'Pd',,,,,,
4,5,,d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef3...,node_library/atomistic/engine/ase/M3GNet,2024-07-27 17:42:10.541864,,False,,,,,,,,,
5,6,,4aa32e7e995920ffee1b936fdc0b3e570ffa586fc14080...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.546796,{'chemical_potential': '-6.065089225769043'},True,,,'Pt',hash_d87d4a45cd97ce5e8787c85826781fbc3adf72afc...,,,,,
6,7,,7efde22c8ae69d7da09b577b69ee74e0b74fe3f891cea9...,None/compute_elastic_constants,2024-07-27 17:42:32.307444,{'BV': '215.6460213089325'},True,,,'Ni',,"InputElasticTensor(num_of_point=5, eps_range=0...",,,,
7,8,,33682b8b2bd1d646f08081acca6ed97bca54d3ab8730a0...,None/compute_elastic_constants,2024-07-27 17:42:41.159084,{'BV': '55.03996979198493'},True,,,'Al',,"InputElasticTensor(num_of_point=5, eps_range=0...",,,,
8,9,,f0af82047d99aa91807ce7ab1b36a92d986a996793cef4...,None/compute_elastic_constants,2024-07-27 17:42:49.398273,{'BV': '93.38382852298619'},True,,,'Fe',,"InputElasticTensor(num_of_point=5, eps_range=0...",,,,
9,10,,a971878fcfbd90b7f7770a9b3b7e276e3809f1ff0e1d1c...,node_library/atomistic/structure/build/cubic_b...,2024-07-27 17:42:49.636440,,False,,,'Ni',,,3.0,0.0,,


In [11]:
hs.db_query_dict(db, x='[17]')

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,inputs,outputs,output_ready,file_path
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'x': '[17]'},{'sin': '[-0.96139749]'},True,


### Connect to pyiron_workflow

In [12]:
from pyiron_workflow import Workflow

In [13]:
# Register the necessary node packages                                                                        
Workflow.register("node_library.math", "math")
Workflow.register("node_library.atomistic", "atomistic") 



In [14]:
sin = Workflow.create.math.Sin(x=[17])
sin.outputs.sin.value

NOT_DATA

In [15]:
df = hs.list_table(db)
df

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,inputs,outputs,output_ready,file_path
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'x': '[17]'},{'sin': '[-0.96139749]'},True,
1,2,,3d4f127b05459a1d1dda98bc3e6f10176e09f27f3ab9b1...,node_library/math/Sin,2024-07-27 17:42:09.551274,{'x': '[12]'},{'sin': '[-0.53657292]'},True,
2,3,,12201da17c241990e5b7725a62de4189bea0cf57ad462c...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.073002,"{'element': ''Pt'', 'engine': 'None'}",{'chemical_potential': '-0.00012460841212025286'},True,
3,4,,0b26dce95241b25be07d968f3a10863d08d1edace00830...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.334980,"{'element': ''Pd'', 'engine': 'None'}",{'chemical_potential': '0.00034226253730329503'},True,
4,5,,d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef3...,node_library/atomistic/engine/ase/M3GNet,2024-07-27 17:42:10.541864,{},,False,
5,6,,4aa32e7e995920ffee1b936fdc0b3e570ffa586fc14080...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.546796,"{'element': ''Pt'', 'engine': 'hash_d87d4a45cd...",{'chemical_potential': '-6.065089225769043'},True,
6,7,,7efde22c8ae69d7da09b577b69ee74e0b74fe3f891cea9...,None/compute_elastic_constants,2024-07-27 17:42:32.307444,"{'element': ''Ni'', 'parameters': 'InputElasti...",{'BV': '215.6460213089325'},True,
7,8,,33682b8b2bd1d646f08081acca6ed97bca54d3ab8730a0...,None/compute_elastic_constants,2024-07-27 17:42:41.159084,"{'element': ''Al'', 'parameters': 'InputElasti...",{'BV': '55.03996979198493'},True,
8,9,,f0af82047d99aa91807ce7ab1b36a92d986a996793cef4...,None/compute_elastic_constants,2024-07-27 17:42:49.398273,"{'element': ''Fe'', 'parameters': 'InputElasti...",{'BV': '93.38382852298619'},True,
9,10,,a971878fcfbd90b7f7770a9b3b7e276e3809f1ff0e1d1c...,node_library/atomistic/structure/build/cubic_b...,2024-07-27 17:42:49.636440,"{'element': ''Ni'', 'cell_size': '3', 'vacancy...",,False,


In [16]:
# hs.get_json_size(hs.extract_node_output(sin))# , 
hs.extract_node_output(sin)

{'sin': 'NOT_DATA'}

In [17]:
hs.extract_node_output(sin, as_string=True)

{'sin': 'NOT_DATA'}

In [18]:
hs.save_node(sin, db) 

1

In [19]:
hs.list_table(db)

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,inputs,outputs,output_ready,file_path
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'x': '[17]'},{'sin': '[-0.96139749]'},True,
1,2,,3d4f127b05459a1d1dda98bc3e6f10176e09f27f3ab9b1...,node_library/math/Sin,2024-07-27 17:42:09.551274,{'x': '[12]'},{'sin': '[-0.53657292]'},True,
2,3,,12201da17c241990e5b7725a62de4189bea0cf57ad462c...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.073002,"{'element': ''Pt'', 'engine': 'None'}",{'chemical_potential': '-0.00012460841212025286'},True,
3,4,,0b26dce95241b25be07d968f3a10863d08d1edace00830...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.334980,"{'element': ''Pd'', 'engine': 'None'}",{'chemical_potential': '0.00034226253730329503'},True,
4,5,,d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef3...,node_library/atomistic/engine/ase/M3GNet,2024-07-27 17:42:10.541864,{},,False,
5,6,,4aa32e7e995920ffee1b936fdc0b3e570ffa586fc14080...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.546796,"{'element': ''Pt'', 'engine': 'hash_d87d4a45cd...",{'chemical_potential': '-6.065089225769043'},True,
6,7,,7efde22c8ae69d7da09b577b69ee74e0b74fe3f891cea9...,None/compute_elastic_constants,2024-07-27 17:42:32.307444,"{'element': ''Ni'', 'parameters': 'InputElasti...",{'BV': '215.6460213089325'},True,
7,8,,33682b8b2bd1d646f08081acca6ed97bca54d3ab8730a0...,None/compute_elastic_constants,2024-07-27 17:42:41.159084,"{'element': ''Al'', 'parameters': 'InputElasti...",{'BV': '55.03996979198493'},True,
8,9,,f0af82047d99aa91807ce7ab1b36a92d986a996793cef4...,None/compute_elastic_constants,2024-07-27 17:42:49.398273,"{'element': ''Fe'', 'parameters': 'InputElasti...",{'BV': '93.38382852298619'},True,
9,10,,a971878fcfbd90b7f7770a9b3b7e276e3809f1ff0e1d1c...,node_library/atomistic/structure/build/cubic_b...,2024-07-27 17:42:49.636440,"{'element': ''Ni'', 'cell_size': '3', 'vacancy...",,False,


In [20]:
sin = Workflow.create.math.Sin(x=[12])
sin12 = hs.run_node(sin, db, verbose=True)

node with id=2 is loaded rather than recomputed


In [21]:
sin

<node_library.math.Sin at 0x1485a6060>

In [22]:
sin.outputs.sin

<pyiron_workflow.mixin.injection.OutputDataWithInjection at 0x149735be0>

In [23]:
%%time
for node_id in range(3):
    sin2 = hs.get_node_from_db_id(node_id, db)
    if sin2 is not None:
        print (f'Node id: {node_id}, inputs: {sin2.inputs.x.value}, outputs: {sin2.outputs.sin.value}')
    else:
        print (f'Node id: {node_id} does not exist')

Node id: 0 does not exist
Node id: 1, inputs: [17], outputs: [-0.96139749]
Node id: 2, inputs: [12], outputs: [-0.53657292]
CPU times: user 6.61 ms, sys: 4.81 ms, total: 11.4 ms
Wall time: 111 ms


### Real world example

In [24]:
from pyiron_workflow import Workflow                                                                        
                                                                                                         
# Register the necessary node packages                                                                      
Workflow.register("node_library.atomistic", "atomistic") 

In [25]:
get_pot = Workflow.create.atomistic.property.thermodynamics.get_chemical_potential(element='Pt')
get_pot.run()

{'chemical_potential': -0.00012460841212025286}

In [26]:
get_pot  # why are inputs/outputs for macro not visible?

<node_library.atomistic.property.thermodynamics.get_chemical_potential at 0x149757020>

In [27]:
hs.save_node(get_pot, db)

3

In [28]:
hs.list_table(db)

Unnamed: 0,node_id,name,hash_value,lib_path,creation_date,inputs,outputs,output_ready,file_path
0,1,,c16d79b83fad085c61d52223687f65ac5f258005921c83...,node_library/math/Sin,2024-07-27 17:42:09.390892,{'x': '[17]'},{'sin': '[-0.96139749]'},True,
1,2,,3d4f127b05459a1d1dda98bc3e6f10176e09f27f3ab9b1...,node_library/math/Sin,2024-07-27 17:42:09.551274,{'x': '[12]'},{'sin': '[-0.53657292]'},True,
2,3,,12201da17c241990e5b7725a62de4189bea0cf57ad462c...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.073002,"{'element': ''Pt'', 'engine': 'None'}",{'chemical_potential': '-0.00012460841212025286'},True,
3,4,,0b26dce95241b25be07d968f3a10863d08d1edace00830...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.334980,"{'element': ''Pd'', 'engine': 'None'}",{'chemical_potential': '0.00034226253730329503'},True,
4,5,,d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef3...,node_library/atomistic/engine/ase/M3GNet,2024-07-27 17:42:10.541864,{},,False,
5,6,,4aa32e7e995920ffee1b936fdc0b3e570ffa586fc14080...,node_library/atomistic/property/thermodynamics...,2024-07-27 17:42:10.546796,"{'element': ''Pt'', 'engine': 'hash_d87d4a45cd...",{'chemical_potential': '-6.065089225769043'},True,
6,7,,7efde22c8ae69d7da09b577b69ee74e0b74fe3f891cea9...,None/compute_elastic_constants,2024-07-27 17:42:32.307444,"{'element': ''Ni'', 'parameters': 'InputElasti...",{'BV': '215.6460213089325'},True,
7,8,,33682b8b2bd1d646f08081acca6ed97bca54d3ab8730a0...,None/compute_elastic_constants,2024-07-27 17:42:41.159084,"{'element': ''Al'', 'parameters': 'InputElasti...",{'BV': '55.03996979198493'},True,
8,9,,f0af82047d99aa91807ce7ab1b36a92d986a996793cef4...,None/compute_elastic_constants,2024-07-27 17:42:49.398273,"{'element': ''Fe'', 'parameters': 'InputElasti...",{'BV': '93.38382852298619'},True,
9,10,,a971878fcfbd90b7f7770a9b3b7e276e3809f1ff0e1d1c...,node_library/atomistic/structure/build/cubic_b...,2024-07-27 17:42:49.636440,"{'element': ''Ni'', 'cell_size': '3', 'vacancy...",,False,


In [29]:
# hs.remove_nodes_from_db(db, [26])

In [30]:
get_pot2 = Workflow.create.atomistic.property.thermodynamics.get_chemical_potential(element='Pd')

In [31]:
node = hs.run_node(get_pot2, db, verbose=True)
node.inputs.element.value

node with id=4 is loaded rather than recomputed


'Pd'

## Workflow with node as input (convert input node rather than its output to hash)

### Structure-repeat example

In [32]:
Al = Workflow.create.atomistic.structure.build.bulk('Al')
repeat = Workflow.create.atomistic.structure.transform.repeat(structure=Al, repeat_scalar=3)
out = repeat.pull()

In [33]:
inp_node = hs.get_all_connected_input_nodes(repeat)['structure']
hs.get_node_hash(inp_node, db)

'0bfedd3665f3b1bc1e81fdd02cd09865a4aa386eafb80048a5ae1bc9c5cce816'

### Chemical potential - engine example

In [35]:
engine = Workflow.create.atomistic.engine.ase.M3GNet()

In [36]:
elastic_M3GNet = Workflow.create.atomistic.property.thermodynamics.get_chemical_potential(element='Pt', engine=engine)

In [37]:
hs.save_node(elastic_M3GNet, db) 

6

In [38]:
new_node = hs.get_node_from_db_id(6, db)
hs.extract_node_output(new_node)

{'chemical_potential': '-6.065089225769043'}

In [39]:
hs.extract_node_output(hs.run_node(new_node, db))

{'chemical_potential': '-6.065089225769043'}

#### Some TODO issues

In [40]:
# TODO: replace hash by node value (it works but is confusing)

hs.extract_node_input(new_node, db)

{'element': "'Pt'",
 'engine': 'hash_d87d4a45cd97ce5e8787c85826781fbc3adf72afc7cef33e7efd1f2b68c7b5e9'}

In [41]:
node = hs.get_node_from_db_id(3, db)
node.inputs.engine

<pyiron_workflow.channels.InputData at 0x17567ddc0>

In [42]:
node = hs.get_node_from_db_id(6, db)
hash = hs.extract_node_input(node, db)['engine']
engine_node = hs.eval_db_value(hash, db).pull()


In [43]:
elastic_M3GNet.inputs.engine = hs.extract_node_input(hs.get_node_from_db_id(6, db), db)['engine']

In [44]:
# it looks like the following assignment/connection does not work 

elastic_M3GNet.inputs.engine = engine_node

In [45]:
new_node.inputs.engine

<pyiron_workflow.channels.InputData at 0x17763f680>

### Elastic constants

#### With locally defined macro 

In [46]:
atomistic = Workflow.create.atomistic
from node_library.atomistic.property.elastic import InputElasticTensor

@Workflow.wrap.as_macro_node('BV')
def compute_elastic_constants(wf, element='Fe', parameters=InputElasticTensor(eps_range=0.02)): # eps_range=0.01):
    
    wf.engine = atomistic.engine.ase.M3GNet()
    wf.supercell = atomistic.structure.build.cubic_bulk_cell(element=element, cell_size=3, vacancy_index=0)
    wf.calc = atomistic.calculator.ase.static(structure=wf.supercell, engine=wf.engine)
    wf.elastic = atomistic.property.elastic.elastic_constants(structure=wf.supercell, engine=wf.engine, parameters=parameters)
    return wf.elastic.outputs.elastic.BV
    
out = compute_elastic_constants(element='Ni').pull() #iter(eps_range=np.linspace(1e-4, 1, 11))
out



{'BV': 207.853238427253}

In [47]:
out

{'BV': 207.853238427253}

In [48]:
for el in ['Ni', 'Al', 'Fe']:
    elastic_node = compute_elastic_constants(element=el, parameters=InputElasticTensor(eps_range=0.001))
    hs.run_node(elastic_node, db)

NodeTable(id=7): Save your node class in a node_library and/or register it!
NodeTable(id=8): Save your node class in a node_library and/or register it!
NodeTable(id=9): Save your node class in a node_library and/or register it!


Note: 
- Works well
- But: for workflows/functions defined in notebook the node cannot be recreated (this is a fundamental issue, save code in db?)

#### With a node defined in the library

In [49]:
engine = Workflow.create.atomistic.engine.ase.M3GNet()
structure = Workflow.create.atomistic.structure.build.bulk('Pb', cubic=True)
elastic = Workflow.create.atomistic.property.elastic.elastic_constants(structure=structure, engine=engine) #, parameters=parameters)
# out = elastic.pull()

The following line failes since the node cannot resolve the dataclass import and thus fails to be loaded. A simple yet elegant approach 
to achieve this would be to transfer the dataclass into a node, which behaves exactly like function_nodes. Then the entire machinery of hash 
based node imports/loading would work. The present formulation of dataclass_nodes fails. A simple approach to achieve this is shown below. 

In [50]:
# hs.run_node(elastic, db)

#### Conversion of a dataclass into node (proof of concept only)

In [51]:
# dataclass nodes cannot be called via Workflow.create -> TODO
# Workflow.create.atomistic.property.elastic.InputElasticTensor
from pyiron_workflow import Workflow
from node_library.atomistic.property.elastic import InputElasticTensor 

supercell = Workflow.create.atomistic.structure.build.cubic_bulk_cell(element='Ni', cell_size=3, vacancy_index=0)
inp_parameters = InputElasticTensor()

In [52]:
# The following node function works (same result should be achieved by as_dataclass_node, i.e., dataclass should be converted into node that 
# can be loaded, used to construct workflows etc.)!
# @Workflow.wrap.as_function_node('dataclass')
# def InputElasticTensorNode(num_of_point: int = 5, eps_range: float = 0.005):
#     return InputElasticTensor(num_of_point=num_of_point, eps_range=eps_range)

In [53]:
elastic_node = Workflow.create.atomistic.property.elastic.generate_structures(structure=supercell, 
                                                                              parameters=InputElasticTensor(num_of_point=11)) 

# elastic_node.save()
out = elastic_node.pull()
hs.run_node(elastic_node, db)
elastic_node.outputs.ready

True

In [54]:
supercell = Workflow.create.atomistic.structure.build.cubic_bulk_cell(element='Ni', cell_size=3, vacancy_index=0)
m3gnet = Workflow.create.atomistic.engine.ase.M3GNet()
elastic_constants = atomistic.property.elastic.elastic_constants(
    structure=supercell,
    engine=m3gnet,
    parameters=InputElasticTensor(),
)

out = elastic_constants.pull()
# hs.run_node(elastic_constants, db)



In [57]:
import pickle

pickle.dumps(m3gnet)

PicklingError: Can't pickle <function _lambdifygenerated at 0x30257b880>: attribute lookup _lambdifygenerated on torch failed

In [58]:
pickle.dumps(engine)

b"\x80\x04\x95\xf5\x03\x00\x00\x00\x00\x00\x00\x8c\x17pyiron_snippets.factory\x94\x8c\x1b_instantiate_from_decorated\x94\x93\x94\x8c!node_library.atomistic.engine.ase\x94\x8c\x06M3GNet\x94)}\x94\x86\x94\x87\x94R\x94}\x94(\x8c\x06_label\x94h\x04\x8c\x07_parent\x94N\x8c\x12_working_directory\x94N\x8c\x10_storage_backend\x94N\x8c\x08_signals\x94\x8c\x12pyiron_workflow.io\x94\x8c\x07Signals\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94h\x0f\x8c\x0cInputSignals\x94\x93\x94)\x81\x94}\x94\x8c\x0cchannel_dict\x94\x8c\x17pyiron_snippets.dotdict\x94\x8c\x07DotDict\x94\x93\x94)\x81\x94(\x8c\x03run\x94\x8c\x18pyiron_workflow.channels\x94\x8c\x0bInputSignal\x94\x93\x94)\x81\x94}\x94(h\nh\x1e\x8c\x05owner\x94N\x8c\x0bconnections\x94]\x94\x8c\t_callback\x94h\x1eub\x8c\x12accumulate_and_run\x94h\x1f\x8c\x17AccumulatingInputSignal\x94\x93\x94)\x81\x94}\x94(h\nh(h$Nh%]\x94h'h\x1e\x8c\x10received_signals\x94\x8f\x94ubu}\x94bsb\x8c\x06output\x94h\x0f\x8c\rOutputSignals\x94\x93\x94)\x81\x94}\x94h\x19h\x1c)\x

In [55]:
m3gnet.save()

save path:  /Users/joerg/python_projects/git_libs/pyiron_nodes/notebooks/M3GNet/project.h5


RecursionError: maximum recursion depth exceeded

In [None]:
m3gnet.inputs.__reduce__()

In [None]:
m3gnet.__getstate__()

I need for each (node) function or datatype a unique ID with class or function definition, The corresponding ID should be stored in the input/output channel (with or without the name). From the database the node with a given ID it should be possible to load the node and run the functions. All necessary inputs must be defined in the database entry. The path to the python file should be sufficient to load/create the node.

In [None]:
from dataclasses import dataclass

@dataclass()
class NodeAsInput:
    lib_path: str = ''
    input_dict = {}

def node_to_dataclass(node):
    data = NodeAsInput()
    lib_path = '.'.join(node.package_identifier.split('.')[1:])
    data.lib_path = '.'.join([lib_path, node.label])
    
    for k, v in node.inputs.to_dict()['channels'].items():
        data.input_dict[k] = node.inputs[k].value
    return data

def dataclass_to_node(data):
    new_node = hs.create_node(data.lib_path)
    for k, v in inp_node.inputs.to_dict()['channels'].items():
        # print (k, v)
        new_node.inputs[k] = data.input_dict[k]
    return new_node    
    

inp_node = hs.get_all_connected_input_nodes(elastic_constants)['structure']
data = node_to_dataclass(inp_node)

dataclass_to_node(data)

In [None]:
elastic_constants.inputs.engine.value.__getstate__()

In [None]:
inp_node = hs.get_all_connected_input_nodes(elastic_constants)['engine']
hs.clone_node_with_inputs(inp_node).outputs.engine.value

In [None]:
inp_node = hs.get_all_connected_input_nodes(elastic_constants)['structure'] # ['engine']
# inp_node.outputs.engine.value = None
inp_node.inputs.to_dict()

lib_path = '.'.join(inp_node.package_identifier.split('.')[1:])
node_lib_path = '.'.join([lib_path, inp_node.label]) 
new_node = hs.create_node(node_lib_path)
for k, v in inp_node.inputs.to_dict()['channels'].items():
    print (k, node_lib_path)
    new_node.inputs[k] = inp_node.inputs[k].value


new_node
# pickle.dumps(new_node)
# elastic_constants.inputs.engine.value # ['calculator']

In [None]:
new_node.

In [None]:
import pickle
pic = pickle.dumps(elastic_constants.inputs.engine)

In [None]:
# does not work (inputs.structure as well as outpus.structure have no data)

import pickle

node_pic = pickle.dumps(elastic_node)
new_node = pickle.loads(node_pic)
new_node

In [None]:
# out = hs.run_node(elastic_node, db)

In [None]:
# gs.save()

#### Convert input/output dataclasses into nested dict (temporary solution)

In [None]:
obj = elastic.outputs['elastic'].value['C_eigval']
out_str = hs.extract_node_output(elastic, as_string=True)['elastic']

In [None]:
import json
json.dumps(out_str, sort_keys=True)

In [None]:
# does not work (RecursionError: maximum recursion depth exceeded while calling a Python object)
# need alternative for serialization
# -> json (need to remove all (unknown) classes, e.g. DataClasses but also numpy arrays etc.)
# pragmatic (intermediate) solution: hs.str_to_dict to convert such datastructures into nested dictionaries
# advantage: can be stored as json in db and can be easily converted into active python datastructure!

# elastic.save()

In [None]:
from pyiron_workflow import Workflow, dataclass_node
from node_library.atomistic.calculator.data import InputCalcMD 
InputCalcMD()

In [None]:
Workflow.wrap.as_dataclass_node?

In [None]:
Workflow.wrap.as_function_node?

In [None]:
dataclass_node?

In [None]:
ICMD = Workflow.wrap.as_dataclass_node(InputCalcMD)
# ICMD = dataclass_node(InputCalcMD) # does not work -> why not equivalent to code line above???

icmd = ICMD()
icmd.inputs.temperature = 400
icmd.inputs.temperature.value

In [None]:
# doc string gets lost in as_dataclass_node wrapper (works now)
InputCalcMD().__doc__[0] , icmd.outputs.dataclass.value.__doc__[0]

In [None]:
icmd.pull()

In [None]:
icmd.outputs.dataclass.value

In [None]:
hs.extract_node_output(elastic, as_string=True)

In [None]:
import numpy as np
np.array(eval(out_dict['strain_energy']))

In [None]:
import json
json.dumps(out_dict, sort_keys=True)


In [None]:
xx

In [None]:
hs.get_node_from_db_id(27, db, data_only=True)

In [None]:
hs.run_node(elastic_node, db)

### TODO

Note: The following save command fails (problem in pyiron_workflow!)

In [None]:
# get_pot.save()

What is the origin of array, which requires to convert array -> np.array in load_node_from_json?

In [None]:
sin2.outputs.sin

**TODO:** 
- For small output use db entry rather than writing file
- What is the origin of array, which requires to convert array -> np.array in load_node_from_json?
- Replace output of node by node hash for input into next node
- add runtime to db
- write output_dic to db rather than to file (if small) 

In [None]:
xx

In [None]:
# hs.save_node(get_pot, db)

#### Node not registered (not working - save fails)

In [None]:
from pyiron_workflow import Workflow                                                                        
                                                                                                         
# Register the necessary node packages                                                                      
Workflow.register("node_library.atomistic", "atomistic")             

In [None]:
atomistic = Workflow.create.atomistic
from node_library.atomistic.property.elastic import InputElasticTensor

@Workflow.wrap.as_macro_node('BV')
def compute_elastic_constants(wf, element='Fe', parameters=InputElasticTensor(eps_range=0.02)): # eps_range=0.01):
    
    wf.engine = atomistic.engine.ase.M3GNet() #.run()
    wf.supercell = atomistic.structure.build.cubic_bulk_cell(element=element, cell_size=3, vacancy_index=0)
    wf.calc = atomistic.calculator.ase.static(structure=wf.supercell, engine=wf.engine)
    wf.elastic = atomistic.property.elastic.elastic_constants(structure=wf.supercell, engine=wf.engine, parameters=parameters)
    return wf.elastic.outputs.elastic.BV
    
out = compute_elastic_constants(element='Ni').run() #iter(eps_range=np.linspace(1e-4, 1, 11))
out

In [None]:
node1 = compute_elastic_constants(element='Pt')
node1.run()

In [None]:
di = [(label, channel) for label, channel in node.outputs.items()]
di[0][1].value

In [None]:
node.storage_directory

In [None]:
node.inputs.to_dict()['channels']

In [None]:
hs.save_node(node, db)

#### Q: What is a good approach to store and load a python dataclass instance. When loading, the required dataclass should be imported. Extend the code to a nested dataclass

In [None]:
from dataclasses import dataclass, asdict, fields
import json
import importlib
from typing import Optional, Union

@dataclass
class Person:
    name: str
    age: int
    neighbor: Optional['Person'] = None

In [None]:
def serialize_dataclass(instance):
    def serialize_instance(instance):
        data = {
            'class_name': instance.__class__.__name__,
            'data': {}
        }
        for field in fields(instance):
            value = getattr(instance, field.name)
            if is_dataclass_instance(value):
                data['data'][field.name] = serialize_instance(value)
            else:
                data['data'][field.name] = value
        return data

    def is_dataclass_instance(obj):
        return hasattr(obj, '__dataclass_fields__')

    return json.dumps(serialize_instance(instance))

def save_to_file(instance, filename):
    with open(filename, 'w') as f:
        json_str = serialize_dataclass(instance)
        f.write(json_str)

# Example usage
person = Person(name='John Doe', age=30, neighbor=Person(name='Jane Doe', age=25))
save_to_file(person, 'person.json')

In [None]:
def deserialize_dataclass(data):
    def deserialize_instance(data):
        class_name = data['class_name']
        class_module = globals()[class_name].__module__
        dataclass_type = getattr(importlib.import_module(class_module), class_name)
        kwargs = {}
        for key, value in data['data'].items():
            if isinstance(value, dict) and 'class_name' in value:
                kwargs[key] = deserialize_instance(value)
            else:
                kwargs[key] = value
        return dataclass_type(**kwargs)

    return deserialize_instance(data)

def load_from_file(filename):
    with open(filename, 'r') as f:
        json_str = f.read()
        data = json.loads(json_str)
        return deserialize_dataclass(data)

# Example usage
loaded_person = load_from_file('person.json')
print(loaded_person)  # Output: Person(name='John Doe', age=30, neighbor=Person(name='Jane Doe', age=25))

In [None]:
elastic_node.pull()
save_to_file(elastic_node.outputs.structures.value, 'person.json')