In [2]:
""" This cell creates an unflattened Sine Coulomb matrix (SCM). The sine matrix captures features of interacting atoms in a periodic system with a very low computational cost.
   Refer to https://hackingmaterials.lbl.gov/matminer/matminer.featurizers.structure.html#matminer.featurizers.structure.matrix.SineCoulombMatrix for details"""

""" Generates a list of the SCM's of all the structures in the training set. Dimensions are 8x8 or 16x16 depending on the number of atioms in each structure."""

from pymatgen.core import Structure
from matminer.featurizers.structure.matrix import SineCoulombMatrix

scm = SineCoulombMatrix(flatten=False)
scm_raw_all_structures = [] #Contains the necessary data3

for idx in range(0,5):
    struct = Structure.from_file('./size_1/POSCAR_'+str(idx))
    scm_raw = scm.featurize(struct)
    scm_raw_all_structures.append(scm_raw)

for idx in range(5,54):
    struct = Structure.from_file('./size_2/POSCAR_'+str(idx))
    scm_raw_all_structures.append(scm_raw) 



In [3]:
"""This cell creates a flattened SCM with eigen values of the raw matrix for all structures in training set"""

from pymatgen.core import Structure
from matminer.featurizers.structure.matrix import SineCoulombMatrix
import numpy as np


n_sites_array = np.zeros(54)
struct_list =[]
for idx in range(0,5):
    struct_list.append(Structure.from_file('./size_1/POSCAR_'+str(idx)))
    n_sites_array[idx] = len(struct_list[idx].sites) 
for idx in range(5,54):
    struct_list.append(Structure.from_file('./size_2/POSCAR_'+str(idx)))
    n_sites_array[idx] = len(struct_list[idx].sites)  

scm_flattened_all_structures = [] #Contains the necessary data

"""Fitting a one-dimensional array of eigenvalues (somewhat in relation with Principal Component Analysis) of the Sine Coulomb Matrix.
 Dimensions are 1x16, as the size 2 structures contain 16 atoms. The corresponding entries in the size 1 structures, which contain 8 atoms are zeros."""

scm = SineCoulombMatrix()
scm.fit(struct_list)

for idx in range(0,5):
    flattened_scm= scm.featurize(Structure.from_file('./size_1/POSCAR_'+str(idx)))
    scm_flattened_all_structures.append(flattened_scm)
for idx in range(5,54):
    flattened_scm= scm.featurize(Structure.from_file('./size_2/POSCAR_'+str(idx)))
    scm_flattened_all_structures.append(flattened_scm)



  zeros[: len(eigs)] = eigs


In [13]:
"""Bag of Bonds Vector calculation"""

"""Bag of bonds is an alternative way of representing the SCM, being invariant to permutations of atom indices. 
It is inspired by the so-called “bag of words” descriptor used in natural language processing where, a bag encodes the frequency of a particular word appearing in text. 
BoB follows a similar approach by making bags of different types of bonds (Sr-S,Pb-S) and order of the bond (single, double, triple). Each bag is basically a vector. 
These bags are then populated by the corresponding entrees in the SCM. A fingerprint is formed by vectorizing this information, by simply concatenating all bags of bonds in a pre-specified and consistent order."""

from pymatgen.core import Structure
from matminer.featurizers.structure.bonding import BagofBonds
bob_data=[] #Contains the necessary data
bob = BagofBonds()
bob.fit(struct_list)
bob_data =[bob.bag(struct_list[i]) for i in range(0,len(struct_list))]


  ar = np.asanyarray(ar)


54
