In [None]:
import random
import numpy as np
from argparse import ArgumentParser

from tqdm.auto import tqdm
import matplotlib.pyplot as plt

import einops
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from rdkit import Chem

from smiles_dataset import SmilesDataset
from helpers import *
from ddpm import *
from unet import *
from train import *

In [None]:
SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

no_train = False
batch_size = 128
n_epochs = 20
lr = 0.001

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = SmilesDataset(csv_file="data/smiles_data/chembl_smiles_sample_50000.csv", max_molecule_size=28)
loader = DataLoader(dataset, batch_size, shuffle=True)

In [None]:
n_steps, min_beta, max_beta = 1000, 10 ** -4, 0.02  # Originally used by the authors
ddpm = MyDDPM(MyUNet(n_steps), n_steps=n_steps, min_beta=min_beta, max_beta=max_beta, device=device)

store_path = "smiles_diffuser.pt"
if not no_train:
    training_loop(ddpm, loader, n_epochs, optim=Adam(ddpm.parameters(), lr), device=device, store_path=store_path)

In [None]:
# Loading the trained model
best_model = MyDDPM(MyUNet(), n_steps=n_steps, device=device)
best_model.load_state_dict(torch.load(store_path, map_location=device))
best_model.eval()
print("Model loaded")

In [None]:
print("Generating new images")
generated = generate_new_images(
        best_model,
        n_samples=100,
        device=device,
    )

out = []
for i in range(100):
    input = generated[i,0,:,:].cpu()
    try:
        a = dataset.embedder.unembed_smiles(input)
        out.append(a)
    except:
        pass

for item in out:
    print(Chem.MolToSmiles(item))
    print("***********************")