# MoleculeDiffusionTransformer: Basic model setups

Reference:

#### Generative Discovery of Novel Chemical Designs using Diffusion Modeling and Transformer Architectures with Application to Deep Eutectic Solvents 

Rachel K. Luu1,2, Marcin Wysokowski1 , Markus J. Buehler1,3*

1 Laboratory for Atomistic and Molecular Mechanics (LAMM), Massachusetts Institute of Technology, 77 Massachusetts Ave., Cambridge, MA 02139, USA
2 Department of Materials Science and Engineering, Massachusetts Institute of Technology, 77 Massachusetts Ave., Cambridge, MA 02139, USA
3 Center for Computational Science and Engineering, Schwarzman College of Computing, Massachusetts Institute of Technology, 77 Massachusetts Ave., Cambridge, MA 02139, USA

mbuehler@MIT.EDU 

In [2]:
import torch

## Generative inverse diffusion mode: Basic model setup 

In [3]:
from MoleculeDiffusion import QMDiffusion 

device='cpu'
max_length = 64
pred_dim=16 #dimension equals number of unique tokens
context_embedding_max_length=12 #dimension equals length of conditioning, i.e. number of molecular features to be considered

model =QMDiffusion( 
        max_length=max_length,#length of predicted results, i.e. max length of the SMILES string
        pred_dim=pred_dim,
        channels=64,
        unet_type='cfg', #'base', #'cfg',
        context_embedding_max_length=context_embedding_max_length,#length of conditioning 
        pos_emb_fourier=True,
        pos_emb_fourier_add=False,
        text_embed_dim = 64,
        embed_dim_position=64,
        )  .to(device)

sequences= torch.randn(4, context_embedding_max_length ).to (device) #conditioning sequence; note, max_text_len=12, 
output=torch.randint (0,pred_dim, (4, pred_dim , max_length)).to(device).float() #batch, number of tokens, length (length is flexible)
 
loss=model(sequences=sequences, #conditioning sequence (set of floating points)
           output=output, #desired result (e.g. one-hot encoded sequence
        )
loss.backward()
loss

#Generate
generated=model.sample (sequences,
              device,
              cond_scale=1.,
              timesteps=64,
              clamp=False,
              )
 
print (generated.shape) #(b, pred_dim, max_length])

Using unet type:  cfg
[<class 'MoleculeDiffusion.diffusion.VDiffusion'>, <class 'MoleculeDiffusion.diffusion.KDiffusion'>, <class 'MoleculeDiffusion.diffusion.VKDiffusion'>, <class 'MoleculeDiffusion.diffusion.KDiffusion_mod'>]
torch.Size([4, 16, 64])


## Generative inverse transformer model: Basic model setup 

#### Model that takes input in the form (batch, num_tokens, length); MSE loss


In [5]:
#In this case, the input and output dimension is the same.

from  MoleculeDiffusion import MoleculeTransformer 

logits_dim = 32 #number of tokens
MolTrans = MoleculeTransformer(
        dim=128,
        depth=6,
        logits_dim=logits_dim, #number of tokens, and also input/output dimension
        dim_head = 16,
        heads = 8,
        dropout = 0.,
        ff_mult = 4,
        text_embed_dim = 32,
        cond_drop_prob = 0.25,
        max_text_len = 12, #max length of conditioning sequence
        pos_fourier_graph_dim= 32, #entire graph fourier embedding, will be added to logits_dim
        
).cuda()

sequences= torch.randn(4, 12 ).cuda() #conditioning sequence; note, max_text_len=12, 
output=torch.randint (0,logits_dim, (4, logits_dim , 128)).cuda().float() #batch, number of tokens, length (length is flexible)
 
loss=MolTrans(
        sequences=sequences,#conditioning sequence
        output=output,
        text_mask = None,
        return_loss = True,
)
loss.backward()
loss

#Generate
generated = MolTrans.generate(   sequences=sequences,#conditioning
                                 tokens_to_generate=128, #can also generate less....
                                 cond_scale = 1., temperature=1,  
     )  
print (generated.shape) #(b, number_tokens, tokens_to_generate])

Generating 128 tokens...


  0%|          | 0/128 [00:00<?, ?it/s]

torch.Size([4, 32, 128])


#### Model that takes input in the form of a sequence (batch, length); Cross Entropy loss (used in the paper)


In [6]:
from   MoleculeDiffusion import MoleculeTransformerSequence, count_parameters
logits_dim = 32 #number of tokens

model = MoleculeTransformerSequence(
        dim=128,
        depth=6,
        logits_dim=logits_dim, #number of tokens  
        dim_head = 16,
        heads = 8,
        dropout = 0.,
        ff_mult = 4,
        text_embed_dim = 32,
        cond_drop_prob = 0.25,
        max_text_len = 12, #max length of conditioning sequence
        pos_fourier_graph_dim= 32, #entire graph fourier embedding, will be added to logits_dim
              
).cuda()

sequences= torch.randn(4, 12 ).cuda() #conditioning sequence; note, max_text_len=12, 
output=torch.randint (0,logits_dim, (4,  23)).cuda().long() #batch, length (length is flexible)
print (output.shape) #(4, 23)

loss=model(
          sequences=sequences,#conditioning sequence
          output=output,
          text_mask = None,
          return_loss = True,
          )
loss.backward()
loss

#if no start token provided: Model will randomly select one
generated = model.generate(    sequences=sequences,#conditioning
        tokens_to_generate=32, #can also generate less....
        cond_scale = 1., #temperature=3,  
        )  
     
#Generate start token
output_start=torch.randint (0,logits_dim, (4,  1)).cuda().long() #batch, length (length is flexible)

generated = model.generate(sequences=sequences,#conditioning
                           output=output_start, #this is the sequence to start with...
                           tokens_to_generate=32, #can also generate less....
                           cond_scale = 1., temperature=1,  
                           )  
print (generated.shape) #(b, tokens_to_generate+1) 

torch.Size([4, 23])
Since start token not provided, generating random token.


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

torch.Size([4, 33])
