# Instructions

**README** 

* Download the code from this [link](https://github.com/gmh14/data_efficient_grammar).
* Download and unzip the log & checkpoint files from this [link](https://drive.google.com/file/d/12g28WNAgRGzaLtuG6ESg25W-uzlNrpLQ/view). 


## 1. Setup

In [None]:
!cd data_efficient_grammar
!conda create -n DEG_test python=3.6
!conda activate DEG_test 
!conda install scipy pandas numpy scikit-learn
!conda install pytorch torchvision torchaudio cpuonly -c pytorch
!conda install -c rdkit rdkit

!pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
!pip install torch-geometric
!pip install torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-1.10.0+cpu.html

!pip install setproctitle
!pip install graphviz

In [None]:
!conda install scipy pandas numpy scikit-learn
!conda install pytorch torchvision torchaudio cpuonly -c pytorch
!conda install -c rdkit rdkit
!pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
!pip install torch-geometric
!pip install torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
!pip install setproctitle
!pip install graphviz
!pip install pickle5

In [None]:
!pip install -e retro_star/packages/mlp_retrosyn
!pip install -e retro_star/packages/rdchiral

## 2. Play with the trained model

In [None]:
from private.hypergraph import Hypergraph, hg_to_mol
from grammar_generation import random_produce


from rdkit import Chem
from rdkit.Chem import Draw
import numpy as np
from copy import deepcopy
import pickle5 as pickle
import torch
from os import listdir

In [None]:
expr_name_dict = dict()
expr_name_dict['polymer_117motif'] = 'grammar-log/log_117motifs'
expr_name_dict['iso'] = 'grammar-log/log_iso'
expr_name_dict['acrylates'] = 'grammar-log/log_acy'
expr_name_dict['chain_extender'] = 'grammar-log/log_ce'

expr_names = list(expr_name_dict.keys())
generated_mols = dict()
for expr_name in expr_names:
    print('dealing with {}'.format(expr_name))
    ckpt_list = listdir(expr_name_dict[expr_name])
    max_R = 0
    max_R_ckpt = None
    for ckpt in ckpt_list:
        if 'grammar' in ckpt:
            curr_R = float(ckpt.split('_')[4][:-4])
            if curr_R > max_R:
                max_R = curr_R
                max_R_ckpt = ckpt
    print('loading {}'.format(max_R_ckpt))
    with open('{}/{}'.format(expr_name_dict[expr_name], max_R_ckpt), 'rb') as fr:
        grammar = pickle.load(fr)
    for i in range(8):
        mol, _ = random_produce(grammar)
        if expr_name not in generated_mols.keys():
            generated_mols[expr_name] = [mol]
        else:
            generated_mols[expr_name].append(mol)

In [None]:
exp = 'polymer_117motif' # 'iso', 'acrylates', 'chain_extender'
Chem.Draw.MolsToGridImage(generated_mols[exp], molsPerRow=4, subImgSize=(200,200))

## 3. Train your own model (w/o optimization)

In [None]:
!python main.py --training_data=./datasets/**dataset_path**

Check your model in "log-num_generated_samples100-_timestamp_"