In [None]:
from xtal2txt.core import TextRep
from pymatgen.core.structure import Structure

In [4]:
example_structure_1 = "./data/N2_p1.cif"
structure_1 = Structure.from_file(str(example_structure_1), "cif")

example_structure_2 = "./data/SrTiO3_p1.cif"
structure_2 = Structure.from_file(str(example_structure_2), "cif")


### Original Structure

In [20]:
text_rep = TextRep.from_input(structure_2)
text_representations_requested = ["atoms","crystal_llm_rep"] # add more text representations here
print("Original Pymatgen Structure")
print(text_rep.structure)

print("Original Text Representations")
print(text_rep.get_requested_text_reps(text_representations_requested))


Original Pymatgen Structure
Full Formula (Sr1 Ti1 O3)
Reduced Formula: SrTiO3
abc   :   3.912701   3.912701   3.912701
angles:  90.000000  90.000000  90.000000
pbc   :       True       True       True
Sites (5)
  #  SP      a    b    c
---  ----  ---  ---  ---
  0  Sr2+  0    0    0
  1  Ti4+  0.5  0.5  0.5
  2  O2-   0.5  0    0.5
  3  O2-   0.5  0.5  0
  4  O2-   0    0.5  0.5
Original Text Representations
{'atoms': 'Sr Ti O O O', 'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nSr2+\n0.00 0.00 0.00\nTi4+\n0.50 0.50 0.50\nO2-\n0.50 0.00 0.50\nO2-\n0.50 0.50 0.00\nO2-\n0.00 0.50 0.50'}


## Permute the order of atoms in representations

In the example below we will permute the order of atoms.
Notice with seed the order might change, we recommend fixing the seed.


Notice here the order of atoms changes in different representations


In [21]:
transformations = [
    ("permute_structure", {"seed": 42}),]

text_rep = TextRep.from_input(structure_2, transformations)
text_representations_requested = ["atoms","crystal_llm_rep"] # add more text representations here
print("Permuted Pymatgen Structure")
print(text_rep.structure)

print("Permuted Text Representations")
print(text_rep.get_requested_text_reps(text_representations_requested))


Permuted Pymatgen Structure
Full Formula (Sr1 Ti1 O3)
Reduced Formula: SrTiO3
abc   :   3.912701   3.912701   3.912701
angles:  90.000000  90.000000  90.000000
pbc   :       True       True       True
Sites (5)
  #  SP      a    b    c
---  ----  ---  ---  ---
  0  O2-   0.5  0.5  0
  1  Ti4+  0.5  0.5  0.5
  2  O2-   0.5  0    0.5
  3  O2-   0    0.5  0.5
  4  Sr2+  0    0    0
Permuted Text Representations
{'atoms': 'O Ti O O Sr', 'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.50 0.50 0.00\nTi4+\n0.50 0.50 0.50\nO2-\n0.50 0.00 0.50\nO2-\n0.00 0.50 0.50\nSr2+\n0.00 0.00 0.00'}


## Translate atoms

In [23]:
transformations = [
  ("translate_structure", {"seed": 42, "vector": [0.1, 0.1, 0.1]})
]
text_rep = TextRep.from_input(structure_2, transformations)
text_representations_requested = ["atoms","crystal_llm_rep"] # add more text representations here
print("Translated Pymatgen Structure")
print(text_rep.structure)

print("Translated Text Representations")
print(text_rep.get_requested_text_reps(text_representations_requested))

Translated Pymatgen Structure
Full Formula (Sr1 Ti1 O3)
Reduced Formula: SrTiO3
abc   :   3.912701   3.912701   3.912701
angles:  90.000000  90.000000  90.000000
pbc   :       True       True       True
Sites (5)
  #  SP      a    b    c
---  ----  ---  ---  ---
  0  Sr2+  0.1  0.1  0.1
  1  Ti4+  0.6  0.6  0.6
  2  O2-   0.6  0.1  0.6
  3  O2-   0.6  0.6  0.1
  4  O2-   0.1  0.6  0.6
Translated Text Representations
{'atoms': 'Sr Ti O O O', 'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nSr2+\n0.10 0.10 0.10\nTi4+\n0.60 0.60 0.60\nO2-\n0.60 0.10 0.60\nO2-\n0.60 0.60 0.10\nO2-\n0.10 0.60 0.60'}


> Notice that translation won't effect Composition,  Atom Sequences, Atom Sequences++, SLICES, Local Env Representations

### Apply all transformations 💪🏼

In [24]:
transformations = [
  ("translate_single_atom", {"seed": 42}),
  ("perturb_structure", {"seed": 42, "max_distance": 0.1}),
  ("translate_structure", {"seed": 42, "vector": [0.1, 0.1, 0.1], "frac_coords": True})
]

text_rep = TextRep.from_input(structure_2, transformations)
text_representations_requested = ["atoms","crystal_llm_rep"] # add more text representations here
print("Transformed Pymatgen Structure")
print(text_rep.structure)

print("Transformed Text Representations")
print(text_rep.get_requested_text_reps(text_representations_requested))

Transformed Pymatgen Structure
Full Formula (Sr1 Ti1 O3)
Reduced Formula: SrTiO3
abc   :   3.912701   3.912701   3.912701
angles:  90.000000  90.000000  90.000000
pbc   :       True       True       True
Sites (5)
  #  SP           a         b         c
---  ----  --------  --------  --------
  0  Sr2+  0.459805  0.447271  0.462786
  1  Ti4+  0.715969  0.697545  0.697545
  2  O2-   0.7142    0.206901  0.695779
  3  O2-   0.710406  0.691112  0.191068
  4  O2-   0.201528  0.687915  0.689105
Transformed Text Representations
{'atoms': 'Sr Ti O O O', 'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nSr2+\n0.46 0.45 0.46\nTi4+\n0.72 0.70 0.70\nO2-\n0.71 0.21 0.70\nO2-\n0.71 0.69 0.19\nO2-\n0.20 0.69 0.69'}


## Augmenting dataset with transformations

In [32]:
import numpy as np


translation_vectors = [
    [0.1, 0.1, 0.1],
    [0.2, 0.2, 0.2],
    [0.3, 0.3, 0.3]
]

for vector in translation_vectors:
    transformations = [
        ("permute_structure", {"seed": 42}),
        ("translate_structure", {"seed": 42, "vector": vector})
    ]
    text_rep = TextRep.from_input(structure_2, transformations)
    text_representations_requested = ["crystal_llm_rep"] # add more text representations here

    print("Translated Text Representations")
    print(text_rep.get_requested_text_reps(text_representations_requested))


Permuted and Translated Text Representations
{'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.81 0.79 0.29\nTi4+\n0.82 0.80 0.80\nO2-\n0.81 0.31 0.80\nO2-\n0.30 0.79 0.79\nSr2+\n0.56 0.55 0.56'}
Permuted and Translated Text Representations
{'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.91 0.89 0.39\nTi4+\n0.92 0.90 0.90\nO2-\n0.91 0.41 0.90\nO2-\n0.40 0.89 0.89\nSr2+\n0.66 0.65 0.66'}
Permuted and Translated Text Representations
{'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.01 0.99 0.49\nTi4+\n0.02 1.00 1.00\nO2-\n0.01 0.51 1.00\nO2-\n0.50 0.99 0.99\nSr2+\n0.76 0.75 0.76'}


>Translating with random vectors with meaningful range can generate valid material text representations

In [8]:
from xtal2txt.core import TextRep

# Define transformations
translation_vectors = np.random.uniform(low=0.1, high=0.5, size=(3, 3))
for vector in translation_vectors:
    transformations = [
        ("permute_structure", {"seed": 42}),
        ("perturb_structure", {"seed": 42, "max_distance": 0.1}),
        ("translate_structure", {"seed": 42, "vector": vector.tolist()})
    ]
    text_rep = TextRep.from_input(structure_2, transformations)
    text_representations_requested = ["crystal_llm_rep"]
    print("Translated Text Representations:")
    print(text_rep.get_requested_text_reps(text_representations_requested))


Translated Text Representations:
{'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.76 0.98 0.41\nTi4+\n0.77 0.98 0.89\nO2-\n0.76 0.49 0.89\nO2-\n0.26 0.97 0.88\nSr2+\n0.25 0.47 0.38'}
Translated Text Representations:
{'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.85 0.66 0.18\nTi4+\n0.86 0.66 0.66\nO2-\n0.85 0.17 0.66\nO2-\n0.35 0.65 0.65\nSr2+\n0.34 0.15 0.15'}
Translated Text Representations:
{'crystal_llm_rep': '3.9 3.9 3.9\n90 90 90\nO2-\n0.63 0.94 0.35\nTi4+\n0.64 0.94 0.84\nO2-\n0.64 0.45 0.84\nO2-\n0.13 0.94 0.83\nSr2+\n0.12 0.43 0.33'}
