In [1]:
import pandas as pd 

import json
import h5py
import ase 
from ase import io
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.core import Molecule, Structure

elem_to_num = {
    "H": 1,
    "He": 2,
    "Li": 3,
    "Be": 4,
    "B": 5,
    "C": 6,
    "N": 7,
    "O": 8,
    "F": 9,
    "Ne": 10,
    "Na": 11,
    "Mg": 12,
    "Al": 13,
    "Si": 14,
    "P": 15,
    "S": 16,
    "Cl": 17
}    

num_to_elem = {v: k for k, v in elem_to_num.items()}

In [2]:
df = pd.read_json("/home/santiagovargas/dev/berkeley_pes/data/test_libe.json")
file_xyz = "/home/santiagovargas/dev/SpookyNet/data/10/rad_qm9_traj_subset_train10.xyz"
statistics = "/home/santiagovargas/dev/SpookyNet/data/10/h5/statistics.json"
h5_test = '/home/santiagovargas/dev/SpookyNet/data/10/h5/train/train_0.h5'

In [3]:
df.iloc[0].molecule

{'@module': 'pymatgen.core.structure',
 '@class': 'Molecule',
 'charge': 0,
 'spin_multiplicity': 1,
 'sites': [{'name': 'O',
   'species': [{'element': 'O', 'occu': 1}],
   'xyz': [-2.6925826572, 0.7768596788000001, -0.6232921263],
   'properties': {}},
  {'name': 'C',
   'species': [{'element': 'C', 'occu': 1}],
   'xyz': [-1.8891435037000002, -0.0008620098, -1.1063488904],
   'properties': {}},
  {'name': 'C',
   'species': [{'element': 'C', 'occu': 1}],
   'xyz': [-1.0414994726, -0.9508093493, -0.3183232657],
   'properties': {}},
  {'name': 'C',
   'species': [{'element': 'C', 'occu': 1}],
   'xyz': [0.42070744720000003, -0.6954733305, -0.5925367549],
   'properties': {}},
  {'name': 'O',
   'species': [{'element': 'O', 'occu': 1}],
   'xyz': [-3.7580214127, 1.7756652839, 2.6235669689],
   'properties': {}},
  {'name': 'H',
   'species': [{'element': 'H', 'occu': 1}],
   'xyz': [-3.966847318, 2.0676673205, 3.5044355521],
   'properties': {}},
  {'name': 'H',
   'species': [{'eleme

In [4]:
# items needed 

In [5]:
df[["molecule", "energy", "gradient"]].iloc[0]["molecule"]
row = df.iloc[0]
spin = row["molecule"]["spin_multiplicity"]
charge = row["molecule"]["charge"]
force = row["gradient"]
energy = row["energy"]

In [6]:

pos_elem_list = [(elem_to_num[i["name"]], i["xyz"]) for i in row["molecule"]["sites"]] 
elem = [i[0] for i in pos_elem_list]
pos = [i[1] for i in pos_elem_list]

In [7]:

def convert_to_rows(file_xyz):
    atoms = io.read(file_xyz, index=':')
    list_of_rows = []
    for atom in atoms: 
        
        info = atom.info
        full_info = atom.__dict__["arrays"]
        spin = info['spin']
        charge = info['charge']
        relative_energy = info['relative_energy']
        total_energy = info['total_energy']
        resp_dipole_moments = info['calc_resp_dipole_moments']
        atomic_num = atom.get_atomic_numbers()
        forces = atom.get_forces()
        pos = atom.get_positions()
        mulliken_partial_charges = full_info['mulliken_partial_charges']
        mulliken_partial_spins = full_info['mulliken_partial_spins']
        resp_partial_charges = full_info['resp_partial_charges']

        dict_temp = {
            "sites": [
                {
                #'species': [{'element': num_to_elem[atomic_num[i]], 'occu': 1}],
                'xyz': pos[i],
                'name': num_to_elem[atomic_num[i]],
                'properties': {}
                }
                for i in range(len(atomic_num))
            ],
        }
        dict_temp["charge"] = charge
        dict_temp["spin_multiplicity"] = spin
        
        dict_total = {
            "molecule": dict_temp,
            "energy": total_energy,
            "gradient": forces, 
            "relative_energy": relative_energy,
            "calc_resp_dipole_moments": resp_dipole_moments,
            "mulliken_partial_charges": mulliken_partial_charges,
            "mulliken_partial_spins": mulliken_partial_spins,
            "resp_partial_charges": resp_partial_charges
        }
        list_of_rows.append(dict_total)
    
    # convert to dataframe 
    return pd.DataFrame(list_of_rows)

df = convert_to_rows(file_xyz)


In [10]:
df.iloc[0].calc_resp_dipole_moments

array([1.41290987, 1.82822827, 0.12072373])

In [108]:
# save to json 
df.to_json("./train_test_radqm9.json")

In [19]:
#h5_data = h5py.File(h5_test, 'r')

In [30]:
#h5_data['config_batch_0']['config_0'].keys()

<KeysViewHDF5 ['atomic_numbers', 'calc_resp_dipole_moments', 'cell', 'charges', 'config_type', 'dipole', 'energy_weight', 'forces', 'forces_weight', 'mulliken_partial_charges', 'mulliken_partial_spins', 'pbc', 'positions', 'relative_energy', 'resp_partial_charges', 'spin', 'stress', 'stress_weight', 'total_charge', 'total_energy', 'virials', 'virials_weight', 'weight']>

In [4]:
#df[["molecule", "energy", "gradient"]].iloc[0]["molecule"]

{'@module': 'pymatgen.core.structure',
 '@class': 'Molecule',
 'charge': 0,
 'spin_multiplicity': 1,
 'sites': [{'name': 'O',
   'species': [{'element': 'O', 'occu': 1}],
   'xyz': [-2.6925826572, 0.7768596788000001, -0.6232921263],
   'properties': {}},
  {'name': 'C',
   'species': [{'element': 'C', 'occu': 1}],
   'xyz': [-1.8891435037000002, -0.0008620098, -1.1063488904],
   'properties': {}},
  {'name': 'C',
   'species': [{'element': 'C', 'occu': 1}],
   'xyz': [-1.0414994726, -0.9508093493, -0.3183232657],
   'properties': {}},
  {'name': 'C',
   'species': [{'element': 'C', 'occu': 1}],
   'xyz': [0.42070744720000003, -0.6954733305, -0.5925367549],
   'properties': {}},
  {'name': 'O',
   'species': [{'element': 'O', 'occu': 1}],
   'xyz': [-3.7580214127, 1.7756652839, 2.6235669689],
   'properties': {}},
  {'name': 'H',
   'species': [{'element': 'H', 'occu': 1}],
   'xyz': [-3.966847318, 2.0676673205, 3.5044355521],
   'properties': {}},
  {'name': 'H',
   'species': [{'eleme

In [6]:
from ase import Atoms
from spookynet import SpookyNetCalculator
carbene = Atoms('CH2', positions=[
	( 0.000,  0.000,  0.000), 
	(-0.865, -0.584,  0.000), 
	( 0.865, -0.584,  0.000), 
])

ModuleNotFoundError: No module named 'torch'

In [7]:
carbene = Atoms('CH2', positions=[
	( 0.000,  0.000,  0.000), 
	(-0.865, -0.584,  0.000), 
	( 0.865, -0.584,  0.000), 
])

carbene.numbers
carbene.positions

array([[ 0.   ,  0.   ,  0.   ],
       [-0.865, -0.584,  0.   ],
       [ 0.865, -0.584,  0.   ]])

In [8]:
carbene.numbers

array([6, 1, 1])

In [9]:
carbene.positions

array([[ 0.   ,  0.   ,  0.   ],
       [-0.865, -0.584,  0.   ],
       [ 0.865, -0.584,  0.   ]])