In [1]:
import os
from molSimplify.Informatics.MOF.MOF_descriptors import get_MOF_descriptors, get_primitive
import pandas as pd

In [10]:
featurization_list = []
#### Adapt this directory for your use.
featurization_directory = '/Users/<your username>/Desktop/example_MOF/'

#### Place your clean cif files of interest inside of this directory, under cif/
#### Thus, the path where the cif file should be is: /Users/<your username>/Desktop/example_MOF/cif/

### The code expects to look for a directory called /cif/ in the example directory
for cif_file in os.listdir(featurization_directory+'/cif/'):
    #### This first part gets the primitive cells ####
    if not os.path.exists(featurization_directory+'/primitive/'):
        os.mkdir(featurization_directory+'/primitive/')
    get_primitive(featurization_directory+'/cif/'+cif_file, featurization_directory+'/primitive/'+cif_file)
    #### With the primitive cells, we can generate descriptors and write them
    full_names, full_descriptors = get_MOF_descriptors(featurization_directory+'/primitive/'+cif_file,3,path=featurization_directory+'/',
        xyzpath=featurization_directory+'/xyz/'+cif_file.replace('cif','xyz'))
    full_names.append('filename')
    full_descriptors.append(cif_file)
    featurization = dict(zip(full_names, full_descriptors))
    featurization_list.append(featurization)
df = pd.DataFrame(featurization_list) 
### Write the RACs to the directory. Full featurization frame contains everything.
df.to_csv(featurization_directory+'/full_featurization_frame.csv',index=False) 


#### The full featurization frame contains all features. 
# The following table can help decode features:
# mc --> metal centered products 
# D_mc --> metal centered differences
# lc --> linker connecting atom centered products
# D_lc --> linker connecting atom centered differences
# f- --> full MOF unit cell (not used in https://www.nature.com/articles/s41467-020-17755-8)
# f-lig --> full linker RACs
# func --> functional group centered products
# D_func --> functional group centered differences

# All Zeo++ features should be computed separately.

alpha , beta , gama = 119.96277134, 90.0 ,90.0
[16.58, 0, 0]
[2.447456628095985e-15, 39.97, 0]
[2.4483756299962953e-15, -19.96999999721939, 34.64101615221368]
[0. 0. 0. 0.]
[  1.7689  36.6016  26.4936 105.07  ]
[  3.5378  73.2032  54.9822 203.357 ]
[  5.3067 109.8048  83.4708 301.644 ]
[  7.0756 146.4064 111.9594 399.931 ]
[  8.8445 183.008  138.453  505.001 ]
[ 10.6134 219.6096 166.9416 603.288 ]
[ 12.3823 256.2112 193.4352 708.358 ]
[ 14.1512 292.8128 221.9238 806.645 ]
[ 15.9201 329.4144 250.4124 904.932 ]
[  17.689  366.016  278.901 1003.219]
[  19.4579  402.6176  305.3946 1108.289 ]
[  21.2268  439.2192  333.8832 1206.576 ]
[  22.9957  475.8208  360.3768 1311.646 ]
[  24.7646  512.4224  388.8654 1409.933 ]
[  26.5335  549.024   417.354  1508.22  ]
[  28.3024  585.6256  445.8426 1606.507 ]
[  30.0713  622.2272  472.3362 1711.577 ]
[0. 0. 0. 0.]
[1600. 2560. 7440. 7360.]
[ 3200.  5120. 14520. 14240.]
[ 4800.  7680. 21600. 21120.]
[ 6400. 10240. 28680. 28000.]
[ 8000. 12800. 36120. 3

(176, 176)


In [None]:
### Aside: If you get messages that your MOF has solvent, or contains overlapping atoms, you will need to "clean" the structure for use.
### This process is shown below.

from molSimplify.Informatics.MOF.PBC_functions import solvent_removal, overlap_removal

cif_name = "your_filename_here"
working_path = 'your_path_here'

# If you know your MOF has unusually long bonds (i.e. you get the solvent error in the .log file even though your MOF has no solvent),
# you can set wiggle_room to something like 1.15.
# You don't want to set it much higher than that, since the code may start to assign bonds where none exist.
wiggle_room = 1

get_primitive(f'{working_path}/{cif_name}.cif', f'{working_path}/{cif_name}_primitive.cif')
overlap_removal(f'{working_path}/{cif_name}_primitive.cif', f'{working_path}/{cif_name}_no_overlap.cif', wiggle_room=1)
solvent_removal(f'{working_path}/{cif_name}_no_overlap.cif', f'{working_path}/{cif_name}_no_solvent.cif', wiggle_room=1)

# Now you can featurize your "clean" MOF.
if not os.path.isdir(f'{working_path}/RACs'):
	os.mkdir(f'{working_path}/RACs')

full_names, full_descriptors = get_MOF_descriptors(f'{working_path}/{cif_name}.cif',
	3, 
	path = f'{working_path}/RACs', 
	xyzpath = f'{working_path}/RACs/{CIF_name}.xyz',
	wiggle_room = wiggle_room
	);