In [1]:
# %%
import pandas as pd
from pymatgen.io.cif import CifParser
from pyxtal import pyxtal
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
import numpy as np
from pyxtal.io import write_cif

from pymatgen.core.structure import Structure
from tqdm import tqdm
import pickle
from p_tqdm import p_map
import argparse
import os
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")


# %%
def process_cif_to_conventional(cif_str):
    structure = CifParser.from_str(cif_str).get_structures()[0]
    sga = SpacegroupAnalyzer(structure)
    pyx = pyxtal()
    pyx.from_seed(structure, tol=0.01)
    space_group = pyx.group.number
    species = []
    anchors = []
    matrices = []
    coords = []
    for site in pyx.atom_sites:
        specie = site.specie
        anchor = len(matrices)
        # coord = site.position
        for syms in site.wp:
            species.append(specie)
            matrices.append(syms.affine_matrix)
            # coords.append(syms.operate(coord))
            anchors.append(anchor)
    # anchors = np.array(anchors)
    matrices = np.array(matrices).tolist()
    # coords = np.array(coords) % 1.0
    sym_info = {"anchors": anchors, "wyckoff_ops": matrices, "spacegroup": space_group}
    cif = write_cif(pyx)[805:]
    num_sites = len(species)
    formula = pyx.formula
    return cif, sym_info, num_sites, formula


# %%
def process_data(data):
    cif_str = data["cif"]
    cif, sym_info, num_sites, formula = process_cif_to_conventional(cif_str)
    data["cif"] = cif
    data["sym_info"] = sym_info
    data["num_sites"] = num_sites
    data["formula"] = formula
    return data


# main code
# parser = argparse.ArgumentParser()
# parser.add_argument(
#     "--csv_path", type=str, default="/home/holywater2/crystal_gen/mattergen/datasets"
# )
# parser.add_argument("--data_name", type=str, default="mp_20")
# parser.add_argument("--mode", type=str, default="val")
# parser.add_argument("--num_cpus", type=int, default=16)
# args = parser.parse_args()
args = argparse.Namespace(
    csv_path="/home/holywater2/crystal_gen/mattergen/datasets",
    data_name="mp_20",
    mode="val",
    num_cpus=16,
)
print("Starting...")
csv_path = Path(args.csv_path) / args.data_name
print(f"Processing {csv_path}/{args.mode}.csv")
df = pd.read_csv(csv_path / f"{args.mode}.csv", index_col=0)
new_data = p_map(process_data, df[:100].to_dict(orient="records"), num_cpus=args.num_cpus)
new_df = pd.DataFrame(new_data)
# os.makedirs(f"conventional/{args.data_name}", exist_ok=True)
# print(f"Saving to conventional/{args.data_name}/{args.mode}.csv")
# new_df.to_csv(f"conventional/{args.data_name}/{args.mode}.csv")
# print("Done!")


Starting...
Processing /home/holywater2/crystal_gen/mattergen/datasets/mp_20/val.csv


  0%|          | 0/100 [00:00<?, ?it/s]

In [10]:
def process_cif(cif_str):
    structure = CifParser.from_str(cif_str).get_structures()[0]
    sga = SpacegroupAnalyzer(structure)
    pyx = pyxtal()
    pyx.from_seed(structure, tol=0.01)
    return pyx

In [34]:
struc = process_cif(df.iloc[1]["cif"])

In [35]:
struc


------Crystal from Seed------
Dimension: 3
Composition: Ho4W4Cl4O16
Group: C 1 2/m 1 (12)
 10.3540,   7.3243,   6.9713,  90.0000, 107.8402,  90.0000, monoclinic
Wyckoff sites:
	Ho @ [ 0.2225  0.0000  0.1178], WP [4i] Site [m]
	 W @ [ 0.3631  0.0000  0.7335], WP [4i] Site [m]
	Cl @ [ 0.0111  0.0000  0.7706], WP [4i] Site [m]
	 O @ [ 0.2959  0.0000  0.4637], WP [4i] Site [m]
	 O @ [ 0.4537  0.0000  0.1936], WP [4i] Site [m]
	 O @ [ 0.7008  0.8071  0.1553], WP [8j] Site [1]

In [36]:
struc.atom_sites[1].wp[3]

SymmOp(self.affine_matrix=array([[-1. ,  0. ,  0. ,  0.5],
       [ 0. ,  0. ,  0. ,  0.5],
       [ 0. ,  0. , -1. ,  0. ],
       [ 0. ,  0. ,  0. ,  1. ]]))

In [41]:
print(struc.atom_sites[0].wp)

Wyckoff position 4i in space group 12 with site symmetry m
x, 0, z
-x, 0, -z
x+1/2, 1/2, z
-x+1/2, 1/2, -z


In [49]:
print(struc.atom_sites)

[Ho @ [ 0.2225  0.0000  0.1178], WP [4i] Site [m],  W @ [ 0.3631  0.0000  0.7335], WP [4i] Site [m], Cl @ [ 0.0111  0.0000  0.7706], WP [4i] Site [m],  O @ [ 0.2959  0.0000  0.4637], WP [4i] Site [m],  O @ [ 0.4537  0.0000  0.1936], WP [4i] Site [m],  O @ [ 0.7008  0.8071  0.1553], WP [8j] Site [1]]


In [37]:
wops = np.array(new_df["sym_info"][1]["wyckoff_ops"])

In [57]:
wops[:,:3,:3]

array([[[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],


In [56]:
np.linalg.pinv(wops[:,:3,:3])

array([[[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],

       [[-1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0., -1.]],

       [[ 1.,  0.,  0.],
        [ 0.,  0.,  0.],
        [ 0.,  0.,  1.]],


In [47]:
wops[3]

array([[-1. ,  0. ,  0. ,  0.5],
       [ 0. ,  0. ,  0. ,  0.5],
       [ 0. ,  0. , -1. ,  0. ],
       [ 0. ,  0. ,  0. ,  1. ]])