In [1]:
import lmdb
import pickle

from pymatgen.core import Structure, Lattice
import numpy as np

In [3]:
# Dataset generation

class Dataset():
  """
  Custom class for reading NOMAD dataset from MatSciML Zenodo
  
  """

  def __init__(self, lmdb_path, max_readers=1, transform=None, pre_transform=None):
    """
    Constructor for dataset
    param: lmdb_path -> path to lmdb_file
    param: max_readers -> maximum number of concurrent read processes accessing lmdb file
    """
    self.env = lmdb.open(lmdb_path,
        subdir=False,
        readonly=True,
        lock=False,
        readahead=False,
        meminit=False,
        max_readers=max_readers)
    self.txn = self.env.begin()
    
  def len(self):
    
    return self.txn.stat()['entries']
    

  def get(self, index):
    """
    Return a  datapoint
    """
    # Select graph sample
    id = f"{index}".encode("ascii")
    datapoint = pickle.loads(self.txn.get(id))
    
    return datapoint



In [4]:
lmdb_path = "/home/nawaf/n0w0f/material_db/nomad/all/data.lmdb"
dataset = Dataset(lmdb_path, 1)

In [5]:
dataset.len()

138820

In [6]:
mat_dict = dataset.get(1)

In [29]:
mat_dict['structure']

KeyError: 'structure'

In [168]:
# crystal structure properties

material_name = mat_dict['material']['material_name']
chemical_formula = mat_dict['material']['chemical_formula_descriptive']
space_group_symbol = mat_dict['material']['symmetry']['space_group_symbol']
crystal_system = mat_dict['material']['symmetry']['crystal_system']


mass_density = mat_dict['properties']['structures']['structure_original']['mass_density']

# Electronic structure properties

spin_polarized = mat_dict['properties']['electronic']['dos_electronic']['spin_polarized']
energy_fermi = mat_dict['properties']['electronic']['dos_electronic']['energy_fermi']
energy_highest_occupied = mat_dict['properties']['electronic']['dos_electronic']['band_gap'][0]['energy_highest_occupied']
energy_lowest_unoccupied = mat_dict['properties']['electronic']['dos_electronic']['band_gap'][0]['energy_lowest_unoccupied']

# Energy
total_energy = mat_dict['energies']['total']['value']
fermi = mat_dict['energies']['fermi']

#method 
method = mat_dict['method']

In [15]:
scale_factor : int = 1e10
lattice_vectors = mat_dict['properties']['structures']['structure_original']['lattice_vectors']

[[v * scale_factor for v in row] for row in lattice_vectors ]

[[5.495655, 0.0, 0.0], [0.0, 5.495655, 0.0], [0.0, 0.0, 5.495655]]

In [20]:
cartesian_pos = mat_dict['properties']['geometry_optimization']['structure_optimized']['cartesian_site_positions']
for row in cartesian_pos:
    print(row)

dimension_types
lattice_vectors
cartesian_site_positions
species_at_sites
cell_volume
atomic_density
mass_density
species
lattice_parameters


In [30]:
scale_factor : int = 1e10
atom_species = mat_dict["properties"]["structures"]["structure_original"]["species_at_sites"]
lattice_vectors = mat_dict['properties']['structures']['structure_original']['lattice_vectors']
cartesian_pos = mat_dict['properties']['geometry_optimization']['structure_optimized']['cartesian_site_positions']
scale_factor : int = 1e10
lattice  = [[v * scale_factor for v in row] for row in lattice_vectors ]
atom_positions = [[v * scale_factor for v in row] for row in cartesian_pos]


In [34]:
lattice_params = mat_dict["properties"]["structures"]["structure_original"][
            "lattice_parameters"
        ]
lattice_abc = (
            lattice_params["a"] * 1e10,
            lattice_params["b"] * 1e10,
            lattice_params["c"] * 1e10,
        )
lattice_angles = (
            lattice_params["alpha"],
            lattice_params["beta"],
            lattice_params["gamma"],
        )
lattice_params = (lattice_abc + lattice_angles)

In [54]:
from math import pi

def create_lattice(lattice_params : dict):

    lattice_abc = (
                    lattice_params["a"] * scale_factor,
                    lattice_params["b"] * scale_factor,
                    lattice_params["c"] * scale_factor,
                )
    lattice_angles = (
                    lattice_params["alpha"],
                    lattice_params["beta"],
                    lattice_params["gamma"],
                )
    a, b, c  = lattice_abc
    alpha, beta, gamma = lattice_angles       
    lattice = Lattice.from_parameters(
                a, b, c, alpha * 180/pi, beta * 180/pi, gamma * 180/pi
            )
    return lattice

In [55]:
def create_cif(mat_dict:dict):

    atom_species = mat_dict["properties"]["structures"]["structure_original"]["species_at_sites"]
    lattice_vectors = mat_dict['properties']['structures']['structure_original']['lattice_vectors']
    cartesian_pos = mat_dict['properties']['geometry_optimization']['structure_optimized']['cartesian_site_positions']


    scale_factor : int = 1e10
    atom_positions = [
            [v * scale_factor for v in row] for row in cartesian_pos
        ]
    

    lattice_params = mat_dict["properties"]["structures"]["structure_original"][
            "lattice_parameters"
        ]
    lattice = create_lattice(lattice_params)
    
    
    
    # Handle atom species which could be either symbols or atomic numbers
    atom_symbols = []
    for species in atom_species:
        if isinstance(species, int) and species != 0:
            element = Element.from_Z(species)
            atom_symbols.append(element.symbol)
        else:
            atom_symbols.append(species)


    # Create a pymatgen Structure
    pymatgen_structure = Structure(lattice, species=atom_symbols, coords=atom_positions )

    # Generate CIF content using pymatgen
    cif_content = pymatgen_structure.to(fmt="cif")
    return cif_content
            

In [56]:
cif = create_cif(mat_dict)

In [44]:
cif

"# generated using pymatgen\ndata_CuCl\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   0.00000000\n_cell_length_b   0.00000000\n_cell_length_c   0.00000000\n_cell_angle_alpha   90.00000000\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   CuCl\n_chemical_formula_sum   'Cu4 Cl4'\n_cell_volume   0.00000000\n_cell_formula_units_Z   4\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Cl  Cl0  1  1.37391400  1.37391400  1.37391400  1\n  Cl  Cl1  1  4.12174100  4.12174100  1.37391400  1\n  Cl  Cl2  1  4.12174100  1.37391400  4.12174100  1\n  Cl  Cl3  1  1.37391400  4.12174100  4.12174100  1\n  Cu  Cu4  1  0.00000000  0.00000000  0.00000000  1\n  Cu  Cu5  1  0.00000000  2.74782800  2.74782800  1\n  Cu