In [1]:
from aflow import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
result = search(batch_size=100
        ).filter(K.species == "O"
#        ).filter(K.spacegroup_orig == 221
#        ).filter(K.Pearson_symbol_orig == "cP5"
        ).filter(K.nspecies == 3
        ).select(
            K.compound, K.composition, K.species, K.natoms,
            K.spacegroup_orig, K.Pearson_symbol_orig,
            K.geometry, K.natoms, K.positions_fractional, K.positions_cartesian,
            K.enthalpy_atom, K.enthalpy_formation_atom
        )

In [None]:
result = search(catalog="icsd", batch_size=100
        ).filter(K.spacegroup_orig == 221
        ).filter(K.Pearson_symbol_orig == "cP5"
        ).filter(K.nspecies == 3
        ).select(
            K.compound, K.composition, K.species, K.natoms,
            K.spacegroup_orig, K.Pearson_symbol_orig,
            K.geometry, K.natoms, K.positions_fractional, K.positions_cartesian,
            K.enthalpy_atom, K.enthalpy_formation_atom
        )

In [3]:
max_N = result.max_N
max_N

12029

In [5]:
df = pd.DataFrame(columns=[
"auid", "aurl",
"compound", "composition", "species", "natoms",
"spacegroup", "pearson_symbol",
"geometry", "positions_fractional", "positions_cartesian",
"enthalpy_atom", "enthalpy_formation_atom"
])



counter = 1
for entry in result:
    # print(f"{counter}. Found compound", entry.compound, "with auid", entry.auid)
    print(f"Fetched {counter}/{max_N}", end="\r")
    row = {
        "auid": entry.auid,
        "aurl": entry.aurl,
        
        "compound": entry.compound,
        "composition": entry.composition,
        "species": entry.species,
        "natoms": entry.natoms,
        
        "spacegroup": entry.spacegroup_orig,
        "pearson_symbol": entry.Pearson_symbol_orig,
        
        "geometry": entry.geometry,
        "positions_fractional": entry.positions_fractional,
        "positions_cartesian": entry.positions_cartesian,
        
        "enthalpy_atom": entry.enthalpy_atom,
        "enthalpy_formation_atom": entry.enthalpy_formation_atom
    }
    df = df.append(row, ignore_index=True)
    counter += 1

print("Done.")

Done.ed 11929/12029


In [6]:
df.shape

(11929, 13)

In [None]:
df.to_pickle("icsd_221_cp5_3_species.pkl")

In [None]:
df.to_pickle("all_221_cp5_3_species_oxides.pkl")

In [7]:
df.to_pickle("all_3_species_oxides.pkl")

In [None]:
df = pd.read_pickle("all_221_cp5_3_species_oxides.pkl")

In [None]:
df = pd.read_pickle("icsd_221_cp5_3_species.pkl")

In [None]:
df = df.drop_duplicates("compound")
df

In [None]:
np.unique(df.compound).shape

In [None]:
oxides = df[df.compound.str.contains('O')]

In [None]:
np.unique(oxides.compound).shape

In [None]:
elements = np.unique(np.hstack(np.array(df.species)))
print(elements)
print(elements.shape)

In [None]:
elements = np.unique(np.hstack(np.array(oxides.species)))
print(elements)
print(elements.shape)

In [None]:
df.enthalpy_formation_atom.isna().sum() / df.shape[0] * 100

In [None]:
oxides.enthalpy_formation_atom.isna().sum() / oxides.shape[0] * 100

In [None]:
df.enthalpy_atom.isna().sum() / df.shape[0] * 100

In [None]:
oxides.enthalpy_atom.isna().sum() / oxides.shape[0] * 100

In [None]:
pd.Series(np.hstack(np.array(df.species))).value_counts().plot.bar(figsize=(16,4))
plt.yscale("log")

In [None]:
pd.Series(np.hstack(np.array(oxides.species))).value_counts().plot.bar(figsize=(16,4))
plt.yscale("log")

In [None]:
variances = df.groupby("compound").enthalpy_atom.var()

In [None]:
variances[variances.isna()] = 0 # only 1 entry

In [None]:
variances.mean()

In [None]:
plt.hist(variances, bins=25)

In [None]:
from voxel import *

In [None]:
# https://en.wikipedia.org/wiki/Fractional_coordinates#In_crystallography
def calc_basis(geometry):
    a = geometry[0]
    b = geometry[1]
    c = geometry[2]
    alpha = geometry[3] / 180 * np.pi
    beta = geometry[4] / 180 * np.pi
    gamma = geometry[5] / 180 * np.pi
    
    cosa = np.cos(alpha)
    cosb = np.cos(beta)
    cosg = np.cos(gamma)
    sing = np.sin(gamma)
    
    omega = a*b*c*np.sqrt(1 - cosa*cosa - cosb*cosb - cosg*cosg + 2*cosa*cosb*cosg)
    
    A = np.array([
        [ a, b * cosg,                      c * cosb],
        [0., b * sing, c * (cosa - cosb*cosg) / sing],
        [0.,       0.,        omega / (a * b * sing)]
    ])
    
    A = np.round(A, decimals=14)
    
    return A


def calc_cartesian_positions(A, positions_fractional):
    positions_cartesian = np.apply_along_axis(
        lambda p: A.dot(p), 1, positions_fractional
    )
    return positions_cartesian

In [None]:
# C,D,H,W
def make_descriptor(mol, sigma, L, N, elements=None):
    if elements is None:
        elements = mol.species # for testing
    elements = np.array(elements)
        
    descriptor = np.zeros((len(elements), N, N, N))
    
    A = calc_basis(mol.geometry)
    coords = calc_cartesian_positions(A, mol.positions_fractional)
    
    
    mx, my, mz = get_mesh_coords(A, L, N)
        
    # atom coordinations are order accoring to composition and species
    cs = np.cumsum(mol.composition)
    cs = np.insert(cs, 0, 0) # insert 0 at beginning
    
    print(elements)
        
    for i, element in enumerate(mol.species):
        print(element)
        element_coords = coords[cs[i]:cs[i+1]]
        print(cs[i], cs[i+1])
        print(element_coords, element_coords.shape)
        B, G, SG = reciprocal_lattice_gaussian(A, element_coords, sigma, mx, my, mz)
        element_descriptor = adapt_to_voxel_grid(G, SG, L, N) # (N,N,N)
        
        j = np.where(elements == element)[0][0]
        descriptor[j,:,:,:] = element_descriptor
    
    return descriptor
        

In [None]:
mol = oxides.loc[0]
mol

In [None]:
elements = np.unique(np.hstack(np.array(oxides.species)))
elements

In [None]:
d = make_descriptor(mol, sigma=0.1, L=12.8, N=32, elements=elements)
d.shape

In [None]:
np.prod(d.shape)

In [None]:
np.sum(d.nonzero()) / np.prod(d.shape) * 100

In [None]:
import ase
%matplotlib widget
import nglview

def elements_coords(entry, augment=True):
    elements = []
    for e, n in zip(entry.species, entry.composition):
        elements = elements + [e]*n
        
        
    pfs_list = list(entry.positions_fractional)
    
    if augment:
        for element, coords in zip(elements, pfs_list):
            for i, c in enumerate(coords):
                if c == 0:
                    new_coords = coords.copy()
                    new_coords[i] = 1.

                    already_in = False
                    for other_p in pfs_list:
                        if all(other_p == new_coords):
                            already_in = True
                            break

                    if not already_in:
                        pfs_list.append(new_coords)
                        elements.append(element)
    
    pfs = np.array(pfs_list)
    
    A = calc_basis(entry.geometry)
    cartesian_coords = calc_cartesian_positions(A, pfs)
    
    return elements, cartesian_coords

def show_molecule(mol, augment=False):
    A = calc_basis(mol.geometry)
    elements, coords = elements_coords(mol, augment=augment)
    ase_mol = ase.Atoms(symbols=elements, positions=coords, pbc=True, cell=A)
    view = nglview.show_ase(ase_mol)
    view.background="black"
    display(view)

In [None]:
def get_element_list(mol):
    elements = []
    for e, n in zip(mol.species, mol.composition):
        elements = elements + [e]*n
    return elements

In [None]:
def show_molecule(mol, calc_coords=True):
    A = calc_basis(mol.geometry)
    if calc_coords:
        coords = calc_cartesian_positions(A, mol.positions_fractional)
    else:
        coords = mol.positions_cartesian
        
    elements = get_element_list(mol)
    ase_mol = ase.Atoms(symbols=elements, positions=coords, pbc=False, cell=A)
    view = nglview.show_ase(ase_mol)
    view.background="black"
    display(view)

In [None]:
show_molecule(mol, calc_coords=False)

In [None]:
ase_mol = ase.Atoms(
    positions=np.array([
        [0., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 1.],
        [1., 0., 1.],
        [1., 1., 0.],
        [1., 1., 1.]
    ]),
    pbc=True, cell=np.eye(3)*2)

view = nglview.show_ase(ase_mol)
view.background="black"
display(view)

In [None]:
o_sr_ti = pd.read_pickle("OSrTi.pkl")

In [None]:
r = o_sr_ti.loc[2]
r.compound

In [None]:
show_molecule(r, augment=False)

In [None]:
ase_mol = ase.Atoms(
    positions=r.positions_cartesian,
    pbc=False)

view = nglview.show_ase(ase_mol)
view.background="black"
display(view)

In [None]:
A = calc_basis(r.geometry)

fig = p√•lt.figure()
ax = Axes3D(fig)

colors = ["red", "green", "blue", "orange", "purple", "black"]


elements, coords = elements_coords(r, augment=True)
    
    

color_dict = {}
for i, element in enumerate(r.species):
    color_dict[element] = colors[i]


print(color_dict)

cs = []
for element in elements:
    cs.append(color_dict[element])


corners = np.array([
        [0., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 1.],
        [1., 0., 1.],
        [1., 1., 0.],
        [1., 1., 1.]
    ])
import itertools
for corner1, corner2 in itertools.product(corners, repeat=2):
    if np.sum(np.abs(corner1 - corner2)) == 1:
        cc = list(zip(A.dot(corner1), A.dot(corner2)))
        ax.plot(cc[0], cc[1], cc[2], color="black")

ax.scatter(coords[:,0], coords[:,1], coords[:,2], alpha=1, c=cs, s=25)
for i in range(3):
    ax.plot([0, A[0,i]], [0,A[1,i]], [0,A[2,i]])
    
legend_handels = []
for i, n in enumerate(r.composition):
    legend_element = plt.Line2D([0], [0], marker='o', color="w",
                                markerfacecolor= colors[i], label=r.species[i], markersize=10)
    legend_handels.append(legend_element)

plt.suptitle(r.compound)
ax.legend(handles=legend_handels)
plt.show()

In [None]:
r