In [1]:
import os
import json
import deepsmiles

from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
from rdkit import RDLogger
from rdkit.rdBase import DisableLog

for level in RDLogger._levels:
    DisableLog(level)
    
IPythonConsole.ipython_useSVG=False

from typing import NoReturn

In [2]:
def create_dir_for_images(folder_name: str) -> NoReturn:
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)

def create_filename(num: int) -> str:
    return str(num + 1) + '.png'

In [4]:
INPUT_FILENAME = 'PubChem.json'
FOLDER_NAME = 'images'
OUTPUT_FILENAME = 'num_of_images.json'

# read json from file
with open(INPUT_FILENAME, 'r') as json_file:
    data = json.load(json_file)
print('FINISH JSON DOWNLOADING')

# create folder for images
create_dir_for_images(FOLDER_NAME)

# create converter from smiles to deepsmiles
converter = deepsmiles.Converter(rings=True, branches=True)

# draw images and save information about number and SMILES in json
json_output = []
for num, mol_datum in enumerate(data):
    mol = Chem.MolFromSmiles(mol_datum['isosmiles'])
    if mol and len(mol_datum['isosmiles']) <= 30:
        canvas = Draw.rdMolDraw2D.MolDraw2DCairo(*(128, 128))
        canvas.drawOptions().setAtomPalette({-1:(0,0,0)})
        Draw.rdMolDraw2D.PrepareAndDrawMolecule(canvas, mol)
        canvas.WriteDrawingText(os.path.join(FOLDER_NAME, create_filename(num)))
        mol_json = {
            'deepsmiles': converter.encode(mol_datum['isosmiles']),
            'smiles': mol_datum['isosmiles'], 
            'image_num': num + 1
        }
        json_output.append(mol_json)
print('FINISH MAKING JSON')
        
# save json to file
with open(OUTPUT_FILENAME, 'w') as outfile:
    json.dump(json_output, outfile, indent='\t')
print('FINISH SAVING JSON')

FINISH JSON DOWNLOADING
FINISH MAKING JSON
FINISH SAVING JSON
