In [None]:
import pandas as pd
import numpy as np
import os
import warnings
!pip install rdkit

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem, PandasTools, MACCSkeys, AtomPairs, rdFingerprintGenerator
from rdkit import DataStructs
from rdkit.Chem.rdmolops import PatternFingerprint
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem.AtomPairs.Pairs import GetAtomPairFingerprintAsBitVect
from rdkit import RDConfig

pd.set_option('display.max_rows', None)

warnings.filterwarnings("ignore")


Collecting rdkit
  Downloading rdkit-2024.3.6-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Downloading rdkit-2024.3.6-cp310-cp310-manylinux_2_28_x86_64.whl (32.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2024.3.6


In [None]:
df = pd.read_excel("/content/act_cleaned.xlsx")
for smiles in df:
    PandasTools.AddMoleculeColumnToFrame(df, 'smiles', 'mol')



**Atom Pair Fingerprint**

In [5]:
df_ap = []
apgen = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=2048)

for mol in df['mol']:
  ap_bitvector = apgen.GetFingerprint(mol)
  arr = np.array(ap_bitvector)
  df_ap.append(arr)

AP = pd.concat([df, pd.DataFrame(df_ap)], axis=1)
AP.drop('mol', axis=1).to_excel('atompair.xlsx', index=None)

KeyboardInterrupt: 

**Avalon Fingerprint**

In [6]:
df_avalon = []
for mol in df['mol']:
  af_bitvector = pyAvalonTools.GetAvalonFP(mol, nBits=2048)
  arr = np.zeros((0,), dtype=np.int8)
  DataStructs.ConvertToNumpyArray(af_bitvector,arr)
  df_avalon.append(arr)

AVALON = pd.concat([df, pd.DataFrame(df_avalon)], axis=1)
AVALON.drop('mol', axis=1).to_excel('avalon.xlsx', index=None)


**Pattern Fingerprint**

In [None]:
df_pattern= []
for mol in df['mol']:
  pattern_bitvector = PatternFingerprint(mol)
  arr = np.zeros((0,), dtype=np.int8)
  DataStructs.ConvertToNumpyArray(pattern_bitvector,arr)
  df_pattern.append(arr)

PATTERN = pd.concat([df, pd.DataFrame(df_pattern)], axis=1)
PATTERN.drop('mol', axis=1).to_excel('pattern.xlsx', index=None)

**RDKit Fingerprint**

In [None]:
df_rdkit = []
rdkitgen = rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=2048)
for mol in df['mol']:
  rdkit_bitvector = rdkitgen.GetFingerprint(mol)
  arr = np.array(rdkit_bitvector)
  df_rdkit.append(arr)

RDKIT = pd.concat([df, pd.DataFrame(df_rdkit)], axis=1)
RDKIT.drop('mol', axis=1).to_excel('rdkit.xlsx', index=None)


**Toplogical Torsion Fingerprint**

In [None]:
df_tt = []
ttgen = rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=2048)
for mol in df['mol']:
  tt_bitvector = ttgen.GetFingerprint(mol)
  arr = np.array(tt_bitvector)
  df_tt.append(arr)

TT = pd.concat([df, pd.DataFrame(df_tt)], axis=1)
TT.drop('mol', axis=1).to_excel('toptor.xlsx', index=None)

**MACCS Keys**

In [None]:
df_maccs = []
for mol in df['mol']:
    maccs_bitvector = MACCSkeys.GenMACCSKeys(mol)
    arr = np.zeros((0,), dtype=np.int8)
    DataStructs.ConvertToNumpyArray(maccs_bitvector, arr)
    df_maccs.append(arr)

MACCS = pd.concat([df, pd.DataFrame(df_maccs)], axis=1)
MACCS.drop('mol', axis=1).to_excel('maccs.xlsx', index=None)


**Morgan Fingerprint**

In [None]:
df_morgan = []
for mol in df['mol']:
  fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=1,fpSize=2048)
  morgan_bitvector = fpgen.GetFingerprint(mol)
  arr = np.zeros((0,), dtype=np.int8)
  DataStructs.ConvertToNumpyArray(morgan_bitvector,arr)
  df_morgan.append(arr)

MORGAN = pd.concat([df, pd.DataFrame(df_morgan)], axis=1)
MORGAN.drop('mol', axis=1).to_excel('morgan.xlsx', index=None)


**Extended Circular Fingerprint**

In [None]:
df_ecfp = []
for mol in df['mol']:
  fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2,fpSize=2048)
  ecfp_bitvector = fpgen.GetFingerprint(mol)
  arr = np.zeros((0,), dtype=np.int8)

  DataStructs.ConvertToNumpyArray(ecfp_bitvector,arr)
  df_ecfp.append(arr)

ECFP = pd.concat([df, pd.DataFrame(df_ecfp)], axis=1)
ECFP.drop('mol', axis=1).to_excel('ecfp.xlsx', index=None)
