In [1]:
import pandas as pd
from pymatgen.io.cif import CifParser
from pyxtal import pyxtal
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
import numpy as np
from pyxtal.io import write_cif

from pymatgen.core.structure import Structure
from tqdm import tqdm
import pickle
from p_tqdm import p_map
import argparse

In [None]:
csv_path = "/home/holywater2/crystal_gen/mattergen/datasets/alex_mp_20"
mode = "val"

In [None]:
df = pd.read_csv(csv_path +  f"/{mode}.csv", index_col=0)

In [5]:
def process_cif_to_conventional(cif_str):
    structure = CifParser.from_str(cif_str).get_structures()[0]
    sga = SpacegroupAnalyzer(structure)
    pyx = pyxtal()
    pyx.from_seed(structure, tol=0.01)
    space_group = pyx.group.number
    species = []
    anchors = []
    matrices = []
    coords = []
    for site in pyx.atom_sites:
        specie = site.specie
        anchor = len(matrices)
        coord = site.position
        for syms in site.wp:
            species.append(specie)
            matrices.append(syms.affine_matrix)
            coords.append(syms.operate(coord))
            anchors.append(anchor)
    anchors = np.array(anchors)
    matrices = np.array(matrices)
    coords = np.array(coords) % 1.
    sym_info = {
        'anchors':anchors,
        'wyckoff_ops':matrices,
        'spacegroup':space_group
    }
    cif = write_cif(pyx)
    num_sites = len(species)
    formula = pyx.formula
    return cif, sym_info, num_sites, formula

In [6]:
def process_data(data):
    cif_str = data['cif']
    cif, sym_info, num_sites, formula = process_cif_to_conventional(cif_str)
    data['cif'] = cif
    data['sym_info'] = sym_info
    data['num_sites'] = num_sites
    data['formula'] = formula
    return data

In [7]:
new_data = p_map(process_data, df[:100].to_dict(orient='records'), num_cpus=16)

  0%|          | 0/100 [00:00<?, ?it/s]

The only difference is that primitive defaults to False in the new parse_structures method.So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
  structure = CifParser.from_str(cif_str).get_structures()[0]
The only difference is that primitive defaults to False in the new parse_structures method.So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
  structure = CifParser.from_str(cif_str).get_structures()[0]
The only difference is that primitive defaults to False in the new parse_structures method.So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
  structure = CifParser.from_str(cif_str).get_structures()[0]
The only difference is that primitive defaults to False in the new parse_structures method.So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
  structure = CifParser.from_str(cif_str).get_structures()[0]
The only difference is t

In [8]:
new_df = pd.DataFrame(new_data)

In [15]:
mode="val"

"\ndata_\n\n_symmetry_space_group_name_H-M 'R-3'\n_symmetry_Int_Tables_number                  148\n_symmetry_cell_setting                  trigonal\n_cell_length_a           13.189991\n_cell_length_b           13.189991\n_cell_length_c            5.749951\n_cell_angle_alpha        90.000000\n_cell_angle_beta         90.000000\n_cell_angle_gamma       120.000000\n_cell_volume            866.330839\n\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n1 'x, y, z'\n2 '-y, x-y, z'\n3 '-x+y, -x, z'\n4 '-x, -y, -z'\n5 'y, -x+y, -z'\n6 'x-y, x, -z'\n7 'x+2/3, y+1/3, z+1/3'\n8 '-y+2/3, x-y+1/3, z+1/3'\n9 '-x+y+2/3, -x+1/3, z+1/3'\n10 '-x+2/3, -y+1/3, -z+1/3'\n11 'y+2/3, -x+y+1/3, -z+1/3'\n12 'x-y+2/3, x+1/3, -z+1/3'\n13 'x+1/3, y+2/3, z+2/3'\n14 '-y+1/3, x-y+2/3, z+2/3'\n15 '-x+y+1/3, -x+2/3, z+2/3'\n16 '-x+1/3, -y+2/3, -z+2/3'\n17 'y+1/3, -x+y+2/3, -z+2/3'\n18 'x-y+1/3, x+2/3, -z+2/3'\n\nloop_\n _atom_site_label\n _atom_site_type_symbol\n _atom_site_symmetry_multiplicity\n _at

In [25]:
CifParser.from_str(new_data[0]["cif"][805:]).get_structures()[0]

The only difference is that primitive defaults to False in the new parse_structures method.So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
  CifParser.from_str(new_data[0]["cif"][805:]).get_structures()[0]


Structure Summary
Lattice
    abc : 5.749951 7.852738547387598 7.8527385473876015
 angles : 114.24515353895484 104.12712433366798 104.12712433366801
 volume : 288.77694598991116
      A : -0.0 -0.0 5.749951
      B : 6.594995499999998 -3.807622427229371 -1.9166503333333333
      C : 1.7763568394002505e-15 7.6152448544587426 -1.9166503333333325
    pbc : True True True
PeriodicSite: Ho (5.74, -1.4, -0.5674) [0.2752, 0.8704, 0.2513]
PeriodicSite: Ho (4.938, 1.864, -2.484) [0.02392, 0.7487, 0.6191]
PeriodicSite: Ho (2.512, -0.4633, 1.349) [0.4048, 0.3809, 0.1296]
PeriodicSite: Ho (0.8548, 5.208, 2.484) [0.7248, 0.1296, 0.7487]
PeriodicSite: Ho (1.657, 1.944, 4.401) [0.9761, 0.2513, 0.3809]
PeriodicSite: Ho (4.083, 4.271, 0.5674) [0.5952, 0.6191, 0.8704]
PeriodicSite: Pd (0.0, 0.0, 0.0) [0.0, 0.0, 0.0]
PeriodicSite: Pd (0.0, 0.0, 2.875) [0.5, 0.0, 0.0]
PeriodicSite: Pd (2.11, 2.477, 1.614) [0.5492, 0.3199, 0.4852]
PeriodicSite: Pd (3.395, 4.397, -2.219) [0.06396, 0.5148, 0.8348]
PeriodicSi

In [None]:
# import os
# from pathlib import Path
# os.makedirs(Path(csv_path).parent / 'conventional', exist_ok=True)
# new_df.to_csv(csv_path + f"/conventional/{mode}.csv")

OSError: Cannot save file into a non-existent directory: '/home/holywater2/crystal_gen/mattergen/datasets/alex_mp_20/conventional'