In [2]:
from rdkit import Chem
from rdkit.Chem import Descriptors
import pandas as pd

In [3]:
# Define a function to calculate multiple physicochemical properties of molecules
# Computes molecular weight, LogP (lipophilicity), number of rotatable bonds, 
# topological polar surface area (TPSA), total ring count, number of aromatic rings, 
# number of hydrogen bond donors, and number of hydrogen bond acceptors
def calc_mol_properties(smiles):
    mol = Chem.MolFromSmiles(smiles)  # Convert SMILES string to RDKit molecule object
    mol_weight = Descriptors.MolWt(mol)  # Calculate molecular weight
    logp = Descriptors.MolLogP(mol)  # Calculate LogP (lipophilicity)
    rotatable_bonds = Descriptors.NumRotatableBonds(mol)  # Calculate number of rotatable bonds
    tpsa = Descriptors.TPSA(mol)  # Calculate topological polar surface area (TPSA)
    rings = Descriptors.RingCount(mol)  # Calculate total number of rings
    AroRings = Descriptors.NumAromaticRings(mol)  # Calculate number of aromatic rings
    h_donors = Descriptors.NumHDonors(mol)  # Calculate number of hydrogen bond donors
    h_acceptors = Descriptors.NumHAcceptors(mol)  # Calculate number of hydrogen bond acceptors
    return mol_weight, logp, rotatable_bonds, tpsa, rings, AroRings, h_donors, h_acceptors

In [4]:
# Read SMILES data for GABAA-related molecules
data_smiles = pd.read_csv('../data/GABAA.csv', encoding='gb18030')['smiles']

In [7]:
# Read SMILES data for plant-derived molecules
plant_smiles = pd.read_csv('../data/plant.csv', encoding='gb18030')['SMILES']

# gabaa_data

In [8]:
# Dictionary to store calculated properties for GABAA molecules
data = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],
        'rings': [], 'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [9]:
# Iterate through the GABAA molecule SMILES and compute physicochemical properties
for smiles in data_smiles:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data['smiles'].append(smiles)
    data['weight'].append(mol_weight)
    data['logp'].append(logp)
    data['rotatable_bonds'].append(rotatable_bonds)
    data['tpsa'].append(tpsa)
    data['rings'].append(rings)
    data['AroRings'].append(AroRings)
    data['h_donors'].append(h_donors)
    data['h_acceptors'].append(h_acceptors)

In [10]:
df_data = pd.DataFrame(data)

In [11]:
df_data

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,C(CC(=O)O)CN,103.121,-0.19010,3,63.32,0,0,2,2
1,CC1CCC(C(C1)O)C(C)C,156.269,2.43950,1,20.23,1,0,1,1
2,CCC1(C(=O)NC(=O)NC1=O)C2=CC=CC=C2,232.239,0.70040,2,75.27,2,1,2,3
3,C1CNCC2C1(O2)C(=O)O,143.142,-0.79810,1,61.86,2,0,2,3
4,CC1=C(CCCl)SC=N1,161.657,2.23282,2,12.89,1,1,0,2
...,...,...,...,...,...,...,...,...,...
483,C1C(C(C(CC1(C(=O)O)O)OC(=O)C=CC2=CC(=C(C=C2)O)...,354.311,-0.64590,4,164.75,2,1,6,8
484,CC1=C(C(=O)C=CO1)O,126.111,0.65382,0,50.44,1,1,1,3
485,CN1C=NC2=C1C(=O)NC(=O)N2C,180.167,-1.03970,0,72.68,2,2,1,5
486,CN1C2=C(C(=O)N(C1=O)C)NC=N2,180.167,-1.03970,0,72.68,2,2,1,5


In [12]:
df_data.to_csv('../data/gaba_MolecularProperties.csv')

# plant

In [13]:
# Dictionary to store calculated properties for plant-derived molecules
plant = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [14]:
# Iterate through the plant molecule SMILES and compute physicochemical properties
for smiles in plant_smiles:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    plant['smiles'].append(smiles)
    plant['weight'].append(mol_weight)
    plant['logp'].append(logp)
    plant['rotatable_bonds'].append(rotatable_bonds)
    plant['tpsa'].append(tpsa)
    plant['rings'].append(rings)
    plant['AroRings'].append(AroRings)
    plant['h_donors'].append(h_donors)
    plant['h_acceptors'].append(h_acceptors)

In [15]:
df_plant = pd.DataFrame(plant)
df_plant

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,CC(C)CCCC(C)CCCC(C)CCCC(=O)C,268.485,6.0145,12,17.07,0,0,0,1
1,CC(=O)CC(C)(C)O,116.160,0.7364,2,37.30,0,0,1,2
2,CCCCCO,88.150,1.1689,3,20.23,0,0,1,1
3,CCCCCCCCCCCCCCCCCCCCCCCC,338.664,9.6084,21,0.00,0,0,0,0
4,CCCCCCCCCCCCCCCCCCCCCCC,324.637,9.2183,20,0.00,0,0,0,0
...,...,...,...,...,...,...,...,...,...
2389,CC1=C2CC(C(CC2OC1=O)(C)C=C)C(=C)C(=O)OC,276.332,2.5598,3,52.60,2,0,0,4
2390,CC1=CCCC(C1=CCC(=O)C)(C)C,192.302,3.6582,2,17.07,1,0,0,1
2391,CC1(C2CC=C(C1C2)CCO)C,166.264,2.3612,2,20.23,3,0,1,1
2392,CC1C2CC(C1=O)CC2(C)C,152.237,2.2576,0,17.07,2,0,0,1


In [16]:
df_plant.to_csv('../data/plant_MolecularProperties.csv')