ATMO compositions are stored in a `csv` and can be loaded as a `AtmoStructureGroup`

In [8]:
from cacgan.data.schema import atmo_csv_path
from cacgan import AtmoStructureGroup
print(atmo_csv_path)
atmo_structures = AtmoStructureGroup.from_csv()
print(atmo_structures)
print(atmo_structures.possible_elements)
atmo_structures_grouped_by_amine = atmo_structures.group_by("amine")
unique_amines = [ag.first_amine for ag in atmo_structures_grouped_by_amine]
print(unique_amines)



E:\projects\CompAugCycleGAN\cacgan\data\atmo.csv




<cacgan.data.schema.AtmoStructureGroup object at 0x0000029A90120B50>
['Ag', 'Al', 'Am', 'As', 'B', 'Ba', 'Be', 'Bi', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cl', 'Co', 'Cr', 'Cs', 'Cu', 'Dy', 'Er', 'Eu', 'F', 'Fe', 'Ga', 'Gd', 'Ge', 'H', 'Hf', 'Ho', 'I', 'In', 'K', 'La', 'Li', 'Lu', 'Mg', 'Mn', 'Mo', 'N', 'Na', 'Nb', 'Nd', 'Ni', 'Np', 'O', 'P', 'Pb', 'Pr', 'Pt', 'Pu', 'Rb', 'Re', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tm', 'U', 'V', 'W', 'Y', 'Yb', 'Zn', 'Zr']
['C(=Cc1ccncc1)c1ccncc1', 'C(=Nn1cnnc1)c1ccncc1', 'C(CC1CCNCC1)CC1CCNCC1', 'C1=C2N=C(C=C3N=C(C=C4N=C(C=C5N=C1C1=C5CCCC1)C1=C4CCCC1)C1=C3CCCC1)C1=C2CCCC1', 'C1=CNCN1', 'C1=Cc2nc1c(-c1ccccc1)c1ccc([nH]1)c(-c1ccccc1)c1nc(c(-c3ccccc3)c3ccc([nH]3)c2-c2ccccc2)C=C1', 'C1=Cc2nc1c(-c1cccnc1)c1ccc([nH]1)c(-c1cccnc1)c1nc(c(-c3cccnc3)c3ccc([nH]3)c2-c2cccnc2)C=C1', 'C1=Cc2nc1c(-c1ccncc1)c1ccc([nH]1)c(-c1ccncc1)c1nc(c(-c3ccncc3)c3ccc([nH]3)c2-c2ccncc2)C=C1', 'C1CC(C2CCNCC2)CCN1', 'C1CC2CCC1CNC2', 'C1CCC

There is a warning complaining about a invalid entry `YIQRUO`: its CSD formula has two multipliers `n` and `x`,
so it cannot be normalized. This entry will be excluded.

Now we use two amine groups to construct a dataset for our model.

In [10]:
from cacgan import GroupAB, FormulaDataset
gb, ga = sorted(atmo_structures_grouped_by_amine, key=lambda x: len(x), reverse=True)[:2]
dataset = FormulaDataset(GroupAB(ga, gb))
# using default setup params
dataset.setup()
print(dataset.details)

CNC
NCCN
=== meta ===
A group size: 314
B group size: 427
A group amine: CNC
B group amine: NCCN
total chem pairs: 413
total alchemy pairs: 133665
=== dataset ===
train a single: 212
train b single: 277
train a haspair: 102
train b haspair: 100
train a pool: 314
train b pool: 377
test a: 102
test b: 50
test pairs: 131



With this dataset we can train an augmented cycleGAN model, a list of default model/training parameters can be found
in `cacgan.gans.trainer._aug_params`.

In [13]:
from cacgan import Trainer
from cacgan.utils import create_dir
from cacgan.gans.trainer import _aug_params, deepcopy

# create a working directory
model_name = "aug"
wdir = "dummy-{}".format(model_name)
create_dir(wdir)

# modify default parameters
params = deepcopy(_aug_params)
params["nepochs"] = 10

# create trainer and train the model
trainer = Trainer(params, dataset, model_name=model_name, wdir=wdir )
trainer.save() # model will be saved in its `wdir`
prior_mean = trainer.train(load_pre_trained=False, save_freq=10, eval_after_train=True)
print(prior_mean)



Epoch 001/010 [0001/0001] -- D_A: 0.5000 | G_A: 0.9996 | Cyc_A: 0.0138 | D_B: 0.5000 | G_B: 0.9996 | Cyc_B: 0.0127 | G_z_A: 0.9170 | G_z_B: 0.9417 | Cyc_z_B: 0.8223 | Cyc_z_A: 0.8456 | D: 1.9558 | G: 4.1274 | C: 2.6944 -- ETA: 0:00:02.281721
Epoch 002/010 [0001/0001] -- D_A: 0.4998 | G_A: 0.9993 | Cyc_A: 0.0138 | D_B: 0.4998 | G_B: 0.9993 | Cyc_B: 0.0127 | G_z_A: 0.8954 | G_z_B: 0.9379 | Cyc_z_B: 0.8311 | Cyc_z_A: 0.8655 | D: 1.9390 | G: 4.1042 | C: 2.7231 -- ETA: 0:00:01.840715
Epoch 003/010 [0001/0001] -- D_A: 0.4996 | G_A: 0.9990 | Cyc_A: 0.0138 | D_B: 0.4996 | G_B: 0.9990 | Cyc_B: 0.0127 | G_z_A: 0.8805 | G_z_B: 0.9244 | Cyc_z_B: 0.9243 | Cyc_z_A: 0.8906 | D: 1.9339 | G: 4.0869 | C: 2.8414 -- ETA: 0:00:01.365349
Epoch 004/010 [0001/0001] -- D_A: 0.4994 | G_A: 0.9987 | Cyc_A: 0.0138 | D_B: 0.4994 | G_B: 0.9986 | Cyc_B: 0.0126 | G_z_A: 0.8698 | G_z_B: 0.9137 | Cyc_z_B: 0.9303 | Cyc_z_A: 0.8469 | D: 1.9177 | G: 4.0611 | C: 2.8037 -- ETA: 0:00:01.108904
Epoch 005/010 [0001/0001] -- D_A



0.1318897560188154


<Figure size 432x288 with 0 Axes>