In [None]:
!pip install rdkit-pypi

Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit-pypi
Successfully installed rdkit-pypi-2022.9.5


In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import requests

In [None]:
gamma_secretase_inhibitors = [
    "CC(C)[C@H](O)C(=O)N[C@@H](C)C(=O)N[C@@H]1C(=O)N(C)CCc2ccccc21", "CC(C)[C@H](O)C(=O)N[C@@H](C)C(=O)N[C@@H]1C(=O)N(C)CCc2ccccc21", "CCC[C@H](N[C@H]1CCc2cc(F)cc(F)c2C1)C(=O)Nc1cn(C(C)(C)CNCC(C)(C)C)cn1", "CC(C)(C(=O)NCC(F)(F)C(F)(F)F)C(=O)N[C@@H]1C(=O)Nc2ccccc2-c2ccccc21", "NC(=O)[C@@H](CCC(F)(F)F)N(Cc1ccc(-c2ncon2)cc1F)S(=O)(=O)c1ccc(Cl)cc1", "O=S(=O)(N[C@H](CO)C(C(F)(F)F)C(F)(F)F)c1ccc(Cl)s1"
]

In [None]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem

# Assuming 'gamma_secretase_inhibitors' contains the correct SMILES strings
fingerprints = [AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smiles), radius=2, nBits=2048) for smiles in gamma_secretase_inhibitors]

# Convert the fingerprints to bit strings for easier use in machine learning models
fingerprints_bit_strings = [fp.ToBitString() for fp in fingerprints]

# Create a DataFrame
df_gamma_secretase = pd.DataFrame({
    'SMILES': gamma_secretase_inhibitors,
    'Fingerprint': fingerprints_bit_strings
})

print(df_gamma_secretase.head())

                                              SMILES  \
0  CC(C)[C@H](O)C(=O)N[C@@H](C)C(=O)N[C@@H]1C(=O)...   
1  CC(C)[C@H](O)C(=O)N[C@@H](C)C(=O)N[C@@H]1C(=O)...   
2  CCC[C@H](N[C@H]1CCc2cc(F)cc(F)c2C1)C(=O)Nc1cn(...   
3  CC(C)(C(=O)NCC(F)(F)C(F)(F)F)C(=O)N[C@@H]1C(=O...   
4  NC(=O)[C@@H](CCC(F)(F)F)N(Cc1ccc(-c2ncon2)cc1F...   

                                         Fingerprint  
0  0100010000000000000000000000000000000000000000...  
1  0100010000000000000000000000000000000000000000...  
2  0100000000000000001000000001000000000000000000...  
3  0000000100000000000000000000000000000010000000...  
4  0100000000010000000000000000000000000000000000...  


In [None]:
from chembl_webresource_client.new_client import new_client

# Initialize the ChEMBL client
activities = new_client.activity

chembl_id = ["CHEMBL190083", "CHEMBL520733", "CHEMBL1770916", "CHEMBL4297422", "CHEMBL1090771", "CHEMBL463981"]

# Fetch the IC50 values for each SMILES string
ic50_values = []
for chembl in chembl_id:
   response = activities.filter(molecule_chembl_id=chembl, standard_type='IC50')
   ic50_values.append([result['standard_value'] for result in response])

# Add the IC50 values to the DataFrame
df_gamma_secretase['IC50'] = ic50_values

print(df_gamma_secretase.head())


                                              SMILES  \
0  CC(C)[C@H](O)C(=O)N[C@@H](C)C(=O)N[C@@H]1C(=O)...   
1  CC(C)[C@H](O)C(=O)N[C@@H](C)C(=O)N[C@@H]1C(=O)...   
2  CCC[C@H](N[C@H]1CCc2cc(F)cc(F)c2C1)C(=O)Nc1cn(...   
3  CC(C)(C(=O)NCC(F)(F)C(F)(F)F)C(=O)N[C@@H]1C(=O...   
4  NC(=O)[C@@H](CCC(F)(F)F)N(Cc1ccc(-c2ncon2)cc1F...   

                                         Fingerprint  \
0  0100010000000000000000000000000000000000000000...   
1  0100010000000000000000000000000000000000000000...   
2  0100000000000000001000000001000000000000000000...   
3  0000000100000000000000000000000000000010000000...   
4  0100000000010000000000000000000000000000000000...   

                                                IC50  
0  [280000.0, 44000.0, 123000.0, 44000.0, 250000....  
1  [39.81, 39.81, 316.23, 125.89, 199.53, 100.0, ...  
2                        [1.2, 6.2, 1.3, 19.15, 6.2]  
3                           [4.0, 20000.0, 19952.62]  
4  [0.3, 0.225, 0.3, 38227.84, 514324.3, 71671.85..

Data Collection Done || Now Preprocessing