In [1]:
import os
import time
import subprocess
import shutil
import pandas as pd
from rdkit.Chem import PandasTools as pt
from rdkit.Chem import Draw
from decimer import DECIMER as dc
from joblib import Parallel, delayed
import time
import multiprocessing
from natsort import natsorted

In [2]:
def SmilesToImage(path):
    '''Function to convert SMILES in a csv with column mol to images.'''
    start_time = time.time()
    df = pd.read_csv(path)
    pt.AddMoleculeColumnToFrame(df, smilesCol='mol')
    split_path = os.path.split(path)
    gen_dir = f"{split_path[0]}/gen_images"
    print(f'Images saving to: {gen_dir}')
    os.mkdir(gen_dir)
    count = 1
    for mol in df['ROMol']:
        name = f"{gen_dir}/test_{count}.png"
        Draw.MolToFile(mol, name)
        count = count + 1
    print(f'The time taken to generate images for {len(df)} moecules is:', time.time()-start_time,'seconds')    

In [3]:
SmilesToImage("/home/administrator/satvik/sandbox/molGAN/dcm_random.csv")

Images saving to: /home/administrator/satvik/sandbox/molGAN/gen_images
The time taken to generate images for 200 moecules is: 0.7801647186279297 seconds


In [4]:
def ImageToSmiles_helper(self):
    model_name = "Canonical"
    smiles = dc.predict_SMILES(self,model_name)
    return f'{os.path.basename(self)},{smiles}'

In [5]:
def ImageToSmiles(folder_path):
    images_list = [os.path.abspath(os.path.join(folder_path, p)) for p in os.listdir(folder_path)]
    output = Parallel(n_jobs=(multiprocessing.cpu_count()-1))(delayed(ImageToSmiles_helper)(images) for images in images_list)
    return output

In [6]:
start_time = time.time()
set1 = ImageToSmiles("/home/administrator/satvik/sandbox/molGAN/gen_images")
print(f'The time taken to generate smiles from images is:', time.time()-start_time,'seconds')

The time taken to generate smiles from images is: 281.4844722747803 seconds


In [7]:
set1df = pd.DataFrame([sub.split(",") for sub in natsorted(set1)],columns=['Image_Name','SMILES'])
set1df.to_csv('gen_images-converted.csv',index=False)

In [8]:
set1df

AttributeError: module 'pandas.io.formats.format' has no attribute '_get_adjustment'

       Image_Name                                         SMILES
0      test_1.png      O=C(NCCC2=NC=1C=CC=CC=1N2CCC#C)C=3OC=CC=3
1      test_2.png  O=C(N)C(=CC)C(P1C2C=CC=CC1CC2)CCCC3C=CC=CCCC3
2      test_3.png   FC(I)=CC=C2C(=CC=C1C=CCC1)CCN2C3=CC=C(O)C=C3
3      test_4.png  O=C2OC1C=CC=CC=C1N2CCC(=O)NC3=CC=C(CI)C(I)=C3
4      test_5.png     O=C(O)CC=1C=CC=CC=1N2C=CC3=CC2S(=O)(=O)N3C
..            ...                                            ...
195  test_196.png         O=C(N2C1=CC=CC=CC=C1CC2)CC3C=CC=CCC3NC
196  test_197.png   C=1C=CC(=CC=1)CCC2CC(C)C2(C)CCC=3C=CC=CC=3CC
197  test_198.png    S=C2C(NC=1C=CC=CC=1CC)=C(N)N2CC=3C=CC=CC=3I
198  test_199.png      O=C(C=1C=CC=CC=1)C3CCCC=2C=CC=CC=2P3(=O)C
199  test_200.png      O=CC=1C=CC=CC=1C=3C(NC)=CC2=CC=CC=C2NC=3C

[200 rows x 2 columns]