The aim of this notebook is to get the 3d image of some molecules, which is rendered weirdly in two dimension by rdkit. 

You can find an example below. 

See also this (notebook)[https://www.kaggle.com/nayuts/let-s-understand-dataset-with-molecular-3d-models] by Nayu TS

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import PIL.Image as Image
import PIL.ImageOps as ops


Install the necessary libraries
-------------------------------

In [None]:
! conda install -y -c rdkit rdkit;
!pip install py3Dmol

In [None]:
import rdkit.Chem as Chem
from rdkit.Chem import AllChem
import py3Dmol

Some helper functions
---------------------

In [None]:
def InChi2Smiles(x):
    return Chem.MolToSmiles(Chem.inchi.MolFromInchi(x))
def InChi2CXSmiles(x):
    return Chem.MolToCXSmiles(Chem.inchi.MolFromInchi(x))

def InChi2CX_Smiles(x):
    mol = Chem.inchi.MolFromInchi(x)
    return [Chem.MolToSmiles(mol),Chem.MolToCXSmiles(mol)]
def Smiles2InChi(x):
    return Chem.MolToInchi(Chem.MolFromSmiles(x))
def show_InChi(x):
    display(Chem.inchi.MolFromInchi(x))
def show_Smiles(x):
    display(Chem.MolFromSmiles(x))
root ="../input/bms-molecular-translation/train"
def show_image(x):
    path = f"{root}/{x[0]}/{x[1]}/{x[2]}/{x}.png" 
    plt.imshow(plt.imread(path),cmap="gray")
    plt.show()

In [None]:
def show_3dmol(image_id=None,inchi=None,smiles=None,mol=None):
    mol = (mol or 
           (smiles and Chem.MolFromSmiles(smiles)) or 
           (inchi and Chem.MolFromInchi(inchi)) or
           (image_id and (image_id in train.index) 
            and Chem.MolFromInchi(train.InChI[image_id])))
    if not mol:
        print('No molecule is provided')
        return
           
    molh = Chem.AddHs(mol)
    if AllChem.EmbedMolecule(molh,randomSeed=0xf00d)<0:
        print('Failed to embed in 3d')
        return
    pdb_data = Chem.MolToPDBBlock(molh)
    view = py3Dmol.view(width=680, height=300, query=None, data=pdb_data, linked=False)
    view.setStyle({'stick': {}})
    view.setBackgroundColor('#f9f4fb')
    return view

A portion of the training data
------------------------------

In [None]:
%%time
n_samples = 10_000
n_samples = None
train = pd.read_csv("../input/bms-molecular-translation/train_labels.csv",nrows=n_samples)
train = train.set_index('image_id')


In [None]:
image_id = '0000f7467baa'
show_image(image_id)
display(Chem.inchi.MolFromInchi(train.InChI[image_id]))
show_3dmol(image_id).show()

A failure
---------

In [None]:
image_id = '4dbbaa6fa2d2'
show_image(image_id)
display(Chem.inchi.MolFromInchi(train.InChI[image_id]))
show_3dmol(image_id)
