# Descriptors

In [None]:
!pip install rdkit
!pip install pandas

In [None]:
from rdkit.Chem import Descriptors
import pandas as pd
from rdkit.Chem import MolFromSmiles, Draw
from rdkit.Chem.Descriptors import CalcMolDescriptors
from rdkit.Chem.Draw import MolsToGridImage
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## RDKit descriptors

In [None]:
all_desc = [desc_name[0] for desc_name in Descriptors._descList]
print(all_desc)

In [None]:
data_smiles = pd.read_csv('data_smiles.csv')

In [None]:
data_smiles

In [None]:
mol = [MolFromSmiles(i) for i in data_smiles['smiles']]
descriptors = [CalcMolDescriptors(i) for i in mol]
df_descriptors = pd.DataFrame(descriptors)
data_descriptors = pd.concat([data_smiles, df_descriptors],axis=1)

In [None]:
data_descriptors

In [None]:
data_descriptors.to_csv('data_descriptors.csv',index=False)

## Visualizing molecules

In [None]:
legends = []
for i in range(len(data_descriptors)):
    name = data_descriptors['name'].iloc[i]
    molwt = data_descriptors['MolWt'].iloc[i]
    legend = f'{name} | MW = {round(molwt,2)}'
    legends.append(legend)

opts = Draw.MolDrawOptions()
img = MolsToGridImage(mol,
                      subImgSize=(600, 300),
                      legends=legends,
                      returnPNG=True,
                      drawOptions=opts)
img

## Visualizing descriptors

In [None]:
data_descriptors = pd.read_csv('data_descriptors.csv')
param = ['MolWt',
				'MolLogP',
				'NumHDonors',
				'NumHAcceptors',
				'NumRotatableBonds',
         'TPSA']
phys = pd.concat([data_descriptors['name'],
                  data_descriptors[param]],axis=1)

fig = make_subplots(rows=1,
                    cols=len(param),
                    subplot_titles=param)

for i, col in enumerate(param):
    fig.add_trace(
                  go.Box(
                  y=phys[col],
                  boxpoints='all',
                  marker=dict(size=8),
                  text=phys['name'],
                  hoverinfo='text+y',
                  showlegend=False
                  ),
                  row=1,
                  col=i+1
                  )

    fig.update_xaxes(showticklabels=False,
                     row=1,
                     col=i+1)

fig.update_layout(width=1000,
                  height=400,
                  title_text="Calculated descriptors of beta lactam molecules")

fig.show()
fig.write_html("calc_desc.html")