# Generate molecules

In [11]:
# Imports

from hyformer.configs.tokenizer import TokenizerConfig
from hyformer.configs.model import ModelConfig

from hyformer.utils.tokenizers.auto import AutoTokenizer
from hyformer.models.auto import AutoModel

from hyformer.utils.chemistry import is_valid


# auxiliary imports
import torch
import torch.nn.functional as F

# autoreload magic
%reload_ext autoreload
%autoreload 2


In [2]:
# Paths

MODEL_NAME = 'hyformer_small'
TASK_NAME = 'lm'

TOKENIZER_CONFIG_PATH = 'configs/tokenizers/smiles/config.json'
MODEL_CONFIG_PATH = 'configs/models/hyformer_small/config.json'
MODEL_CHECKPOINT_PATH = f'/lustre/groups/aih/hyformer/results/distribution_learning/guacamol/{MODEL_NAME}/{TASK_NAME}/checkpoint.pt'

In [87]:
# Load model

tokenizer_config = TokenizerConfig.from_config_path(TOKENIZER_CONFIG_PATH)
tokenizer = AutoTokenizer.from_config(tokenizer_config)

# Load model
model_config = ModelConfig.from_config_path(MODEL_CONFIG_PATH)
model = AutoModel.from_config(model_config)
model.load_pretrained(filepath=MODEL_CHECKPOINT_PATH)



In [145]:
batch_size = 2
temperature = 1.3
top_k = 50
top_p = 1.0
max_sequence_length = 10

device = 'cuda:0'

generator = model.to_generator(
    tokenizer=tokenizer,
    batch_size=batch_size,
    temperature=temperature,
    top_k=top_k,
    top_p=top_p,
    max_sequence_length=max_sequence_length,
    device=device
    )

In [146]:
samples = generator.generate(number_samples=30)

Generating samples: 100%|██████████| 15/15 [00:00<00:00, 20.25it/s]


In [147]:
samples

['CS#CcNN',
 'C=C==Oc',
 'CcCc[n+]7c',
 'ONc1CN',
 'BrCc3O=S',
 'CcCC[n+]c[H]',
 'cOC)C=(',
 'OnCccCN',
 'Cc=[o+]c)c',
 'Ccc(ccc',
 'FCCC#Cc',
 'C=#5CNC',
 'COCNCCC',
 'NNC)(O',
 'CccCc(C',
 'CCc1C(C',
 'Cc((C[N+]C',
 'CCCCnC#',
 'CcCONCn',
 'Cc=2Cl#N',
 'CCc1N)c',
 'NCC(Cc(',
 'CcONCCc',
 'CSN==c(',
 'CCNc1CC',
 'Oc=cN(C',
 'NC(CCNN',
 'cCCCCCC',
 'CCC(N)C',
 '-[S+]C(=(C']