In [52]:
import lmdb
import pickle

In [53]:
# Dataset generation

class Dataset():
  """
  Custom class for reading NOMAD dataset from MatSciML Zenodo
  
  """

  def __init__(self, lmdb_path, max_readers=1, transform=None, pre_transform=None):
    """
    Constructor for dataset
    param: lmdb_path -> path to lmdb_file
    param: max_readers -> maximum number of concurrent read processes accessing lmdb file
    """
    self.env = lmdb.open(lmdb_path,
        subdir=False,
        readonly=True,
        lock=False,
        readahead=False,
        meminit=False,
        max_readers=max_readers)
    self.txn = self.env.begin()
    
  def len(self):
    
    return self.txn.stat()['entries']
    

  def get(self, index):
    """
    Return a  datapoint
    """
    # Select graph sample
    id = f"{index}".encode("ascii")
    datapoint = pickle.loads(self.txn.get(id))
    
    return datapoint



In [54]:
lmdb_path = "/home/nawaf/n0w0f/material_db/nomad/all/data.lmdb"
dataset = Dataset(lmdb_path, 1)

In [62]:
dataset.len()

138820

In [55]:
mat_dict = dataset.get(1)

In [168]:
# crystal structure properties

material_name = mat_dict['material']['material_name']
chemical_formula = mat_dict['material']['chemical_formula_descriptive']
space_group_symbol = mat_dict['material']['symmetry']['space_group_symbol']
crystal_system = mat_dict['material']['symmetry']['crystal_system']


mass_density = mat_dict['properties']['structures']['structure_original']['mass_density']

# Electronic structure properties

spin_polarized = mat_dict['properties']['electronic']['dos_electronic']['spin_polarized']
energy_fermi = mat_dict['properties']['electronic']['dos_electronic']['energy_fermi']
energy_highest_occupied = mat_dict['properties']['electronic']['dos_electronic']['band_gap'][0]['energy_highest_occupied']
energy_lowest_unoccupied = mat_dict['properties']['electronic']['dos_electronic']['band_gap'][0]['energy_lowest_unoccupied']

# Energy
total_energy = mat_dict['energies']['total']['value']
fermi = mat_dict['energies']['fermi']

#method 
method = mat_dict['method']

In [137]:
atom_species = mat_dict['material']['elements']
lattice_vectors = mat_dict['properties']['structures']['structure_original']['lattice_vectors']
cartesian_pos = mat_dict['properties']['geometry_optimization']['structure_optimized']


In [138]:
scale_factor : int = 1e10
lattice  = [
        [v * scale_factor for v in row] for row in lattice_vectors
    ]
atom_positions = [
        [v * scale_factor for v in row] for row in cartesian_pos
    ]

In [None]:
def create_material_dict(matsci_dict:dict):

    atom_species = mat_dict['material']['elements']
    lattice_vectors = mat_dict['properties']['structures']['structure_original']['lattice_vectors']
    cartesian_pos = mat_dict['properties']['geometry_optimization']['structure_optimized']


    scale_factor : int = 1e10
    lattice  = [
            [v * scale_factor for v in row] for row in lattice_vectors
        ]
    atom_positions = [
            [v * scale_factor for v in row] for row in cartesian_pos
        ]
    
        # Handle atom species which could be either symbols or atomic numbers
    atom_symbols = []
    for species in atom_species:
        if isinstance(species, int) and species != 0:
            element = Element.from_Z(species)
            atom_symbols.append(element.symbol)
        else:
            atom_symbols.append(species)

    # Construct the lattice using lattice vectors
    lattice_matrix = np.array(lattice_vectors )
    lattice = Lattice(lattice_matrix)

    # Create a pymatgen Structure
    pymatgen_structure = Structure(lattice, species=atom_symbols, coords=atom_positions )

    # Generate CIF content using pymatgen
    cif_content = pymatgen_structure.to(fmt="cif")
            

In [None]:
def convert_to_cif(response_json):
    # Extract relevant information from the response JSON
    entry_id = response_json['entry_id']
    lattice_vectors = [
        [v * scale_factor for v in row] for row in response_json['archive']['run'][0]['system'][0]['atoms']['lattice_vectors']
    ]
    atom_labels = response_json['archive']['run'][0]['system'][0]['atoms']['labels']
    atom_species = response_json['archive']['run'][0]['system'][0]['atoms']['species']
    atom_positions = [
        [v * scale_factor for v in row] for row in response_json['archive']['run'][0]['system'][0]['atoms']['positions']
    ] 

    # Handle atom species which could be either symbols or atomic numbers
    atom_symbols = []
    for species in atom_species:
        if isinstance(species, int) and species != 0:
            element = Element.from_Z(species)
            atom_symbols.append(element.symbol)
        else:
            atom_symbols.append(species)

    # Construct the lattice using lattice vectors
    lattice_matrix = np.array(lattice_vectors )
    lattice = Lattice(lattice_matrix)

    # Create a pymatgen Structure
    pymatgen_structure = Structure(lattice, species=atom_symbols, coords=atom_positions )

    # Generate CIF content using pymatgen
    cif_content = pymatgen_structure.to(fmt="cif")

    return {"material_id": entry_id, "cif": cif_content}