In [1]:
# HTSOHM workflow 0.9.1

This notebook takes a user through the process of generating a seed population of hypothetical porous materials and successively mutating libraries of said materials until the method as converged--that is the structure-property space has been sufficiently uniformly sampled.

In [9]:
HTSOHM_dir = '~/HTSOHM-dev' #specifies HTSOHM directory

import sys

sys.path.insert(0, './bin') #adds HTSOHM modules to Python

from generate import *
from find_missing import *
from binning import *
from mutate import *

%run ./bin/screen.ipy
%run ./bin/cat_data.ipy
%run ./bin/dummy_screen.ipy
%run ./bin/dummy_test.ipy

number_of_atom_types = 4
number_of_materials = 20
bins = 5
mutation_strength = 0.2

## Generate seed population 

Now that the number of atom-types and population size have been specified, the seed can be generated.

In [4]:
generate(number_of_materials,
         number_of_atom_types)

## Screen seed

Now that the seed population has been generated, each material can then be screened using RASPA. Because the number of materials is typically large, the work is distributed across a computing cluster. For each material methane loading (see `${HTSOHM}/bin/ch4simNOHVF` for RASPA input file), void fraction (see `${HTSOHM}/bin/HVsim`), and surface area (see `${HTSOHM}/bin/SAsim`) is calculated. Raw data will be saved in `${HTSOHM}/data/`.

<b>DEV NOTE: ln 17, screen.sh<br>
`export HTSOHM_DIR=${HOME}/HTSOHM-dev`<br>
`export LIB_DIR=${HOME}/HTSOHM-dev/$LIBRARY_DIR`'<br>

ALSO MATS PER CORE REDUCED FROM 100 TO 20

In [5]:
screen(HTSOHM_dir, 'gen0', 0, number_of_materials)

/ihome/cwilmer/ark111/HTSOHM-dev/bin
Preparing to submit jobs...
See terminal for status.

Jobs submitted!
/ihome/cwilmer/ark111/HTSOHM-dev


<font color="red"><b>WAIT FOR JOBS.

## Collect output

Data for the entire library is now divided into different directories, one for each node used to carry out the calculations. All of this data is then collected into three files, one for each of the properties calculated: `ch4_abs_cc_cc.txt`, `HVdata2col.txt`, and `SAdata_m2_cc.txt`. These data files can be find found in `${HTSOHM}/data/` and the directory containing all materials from that library.

<b>DEV NOTE: rmdata is commented out...<br>
`find_missing` only checks names, not values

In [6]:
prep_gen0(HTSOHM_dir, number_of_materials)
find_missing('gen0')

/ihome/cwilmer/ark111/HTSOHM-dev/bin
/ihome/cwilmer/ark111/HTSOHM-dev
No missing data points!


([], [], [])

# First generation 

## Bin library, select parents

Now that structure-properties have been calculated for the seed population, the library can be binned--sorted in three dimensions: methane loading, surface area, and void fraction--so that those materials with the rarest combinations of structure properties can be selected to <i>parent</i> new materials.

In [7]:
bin_count_gen0, bin_IDs_gen0 = bin3d('gen0', bins)

p_list_gen0 = pick_parents('gen0',
                           bin_count_gen0,
                           bin_IDs_gen0,
                           number_of_materials)

## <i>Dummy</i> test

Because properties are calculated using MC methods, it is possible that a partiular data point (combination of properties) seems to be unique, but is in fact a statistical anomaly. To deal with this, each parent is re-screened 5 times so that the results can be compared to the original calculation.

In [8]:
dummy_screen(HTSOHM_dir, 'gen0')

/ihome/cwilmer/ark111/HTSOHM-dev/bin
Screening dummies...
See terminal for status.

Jobs submitted!
/ihome/cwilmer/ark111/HTSOHM-dev


<font color="red"><b>WAIT FOR JOBS.

In [None]:
dummy_test(HTSOHM_dir, 'gen0')

## Mutate `gen0`, create `gen1`

In [None]:
gen = 1                                  








p_dir = 'gen' + str(gen - 1)
c_dir = 'gen' + str(gen)
l_dir = 'tgen' + str(gen)

firstS('gen0', mutation_strength, bins)
mutate('gen0', number_of_atom_types, 'gen1')

## Screen `gen1`

In [None]:
screen(HTSOHM_dir,
       'gen1',
       1 * number_of_materials,
       number_of_materials)

## Collect `gen1` output

In [None]:
prep4mut(HTSOHM_dir,
         'gen1',
         1 * number_of_materials,
         number_of_materials,
         'gen0',
         'tgen1')

find_missing('tgen1')

## Bin `tgen1`, select parents 

In [None]:
from binning import *

p_dir = 'tgen1'
n_child = number_of_materials

bin_count, bin_IDs = bin3d(p_dir, bins)

p_list_tgen1 = pick_parents(p_dir,
                           bin_count,
                           bin_IDs,
                           n_child)

## `tgen1` dummy test

In [None]:
%run ./bin/dummy_screen.ipy

dummy_screen(HTSOHM_dir, 'tgen1')

In [None]:
%run ./bin/dummy_test.ipy

dummy_test(HTSOHM_dir, 'tgen1')

# Second generation

## Calculate mutation strength(s), create `gen2`

In [None]:
from mutate import *

calc_S('tgen1', bins)
mutate('tgen1', number_of_atom_types, 'gen2')

## Screen `gen2`

In [None]:
%run ./bin/screen.ipy

screen(HTSOHM_dir,
       'gen2',
       2 * number_of_materials,
       number_of_materials)

In [None]:
%run ./bin/cat_data.ipy
from find_missing import *

prep4mut(HTSOHM_dir,
         'gen2',
         2 * number_of_materials,
         number_of_materials,
         'gen1',
         'tgen2')

find_missing('tgen2')

from binning import *

p_dir = 'tgen2'
n_child = number_of_materials

bin_count, bin_IDs = bin3d(p_dir, bins)

p_list_tgen2 = pick_parents(p_dir,
                           bin_count,
                           bin_IDs,
                           n_child)

%run ./bin/dummy_screen.ipy

dummy_screen(HTSOHM_dir, 'tgen2')

In [None]:
dummy_test(HTSOHM_dir, 'tgen2')                        # dummy test, check output

firstS('tgen2', mutation_strength, bins)               # set mutation strength(s)
mutate('gen2', number_of_atom_types, 'gen3')          # mutate gen0, create gen1

screen(HTSOHM_dir, 'gen3', 3 * number_of_materials,   # screen gen1
       number_of_materials)
