In [23]:
from msecif import *

import os 
import glob 

### Multi-shelled ECIF feature generation

In [24]:
shells = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.0]
protein_pdb = './SampleStructures/1a0q/1a0q_protein.pdb'
ligand_sdf = './SampleStructures/1a0q/1a0q_ligand_H_added.sdf'
feature = get_multi_shelled_ecif(protein_pdb, ligand_sdf, shells)

In [26]:
print(feature)

[0 0 0 ... 0 0 0]


### Weighted ECIF feature generation

In [27]:
protein_pdb = './SampleStructures/1a0q/1a0q_protein.pdb'
ligand_sdf = './SampleStructures/1a0q/1a0q_ligand_H_added.sdf'
feature = get_weighted_ecif(protein_pdb, ligand_sdf, distance_cutoff=12.0)

In [28]:
print(feature)

[0. 0. 0. ... 0. 0. 0.]


### Multi-shelled ECIF feature generation from multiple files

In [15]:
def make_multi_shelled_feature_from_dir(pdbid_dir, shells):
    pdbid = os.path.basename(os.path.dirname(pdbid_dir))
    protein_pdb = os.path.join(pdbid_dir, f"{pdbid}_protein.pdb")
    ligand_sdf = os.path.join(pdbid_dir, f"{pdbid}_ligand_H_added.sdf")
    feature = get_multi_shelled_ecif(protein_pdb, ligand_sdf, shells)
    return pdbid, feature

In [16]:
shells = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.0]

structure_dir = './SampleStructures/'
pdbid_dirs = glob.glob(os.path.join(structure_dir, '*/'))

In [17]:
output_path = 'Ex1_multi-shelledECIF_feature.csv'
feature = [make_multi_shelled_feature_from_dir(pdbid_dir, shells) for pdbid_dir in pdbid_dirs]
feature_values = [f[1] for f in feature]
pdbids = [f[0] for f in feature]
columns = make_feature_names_multi_shelled_ecif(shells)
multi_shelled_ecif_feature = pd.DataFrame(feature_values, index=pdbids, columns=columns)
multi_shelled_ecif_feature.index.name = 'PDB'
multi_shelled_ecif_feature.to_csv(output_path)

In [18]:
multi_shelled_ecif_feature.head()

Unnamed: 0_level_0,C;4;1;3;0;0-Br;1;1;0;0;0-2.5,C;4;1;3;0;0-C;3;3;0;1;1-2.5,C;4;1;3;0;0-C;4;1;1;0;0-2.5,C;4;1;3;0;0-C;4;1;2;0;0-2.5,C;4;1;3;0;0-C;4;1;3;0;0-2.5,C;4;1;3;0;0-C;4;2;0;0;0-2.5,C;4;1;3;0;0-C;4;2;1;0;0-2.5,C;4;1;3;0;0-C;4;2;1;0;1-2.5,C;4;1;3;0;0-C;4;2;1;1;1-2.5,C;4;1;3;0;0-C;4;2;2;0;0-2.5,...,S;2;2;0;0;0-S;2;1;1;0;0-10.0,S;2;2;0;0;0-S;2;2;0;0;0-10.0,S;2;2;0;0;0-S;2;2;0;0;1-10.0,S;2;2;0;0;0-S;2;2;0;1;1-10.0,S;2;2;0;0;0-S;3;3;0;0;0-10.0,S;2;2;0;0;0-S;3;3;0;0;1-10.0,S;2;2;0;0;0-S;4;3;0;0;0-10.0,S;2;2;0;0;0-S;6;4;0;0;0-10.0,S;2;2;0;0;0-S;6;4;0;0;1-10.0,S;2;2;0;0;0-S;7;4;0;0;0-10.0
PDB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1a1b,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a1e,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a1c,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a0q,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a3e,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Weighted ECIF feature generation from multiple files

In [9]:
def make_weighted_feature_from_dir(pdbid_dir, distance_cutoff):
    pdbid = os.path.basename(os.path.dirname(pdbid_dir))
    protein_pdb = os.path.join(pdbid_dir, f"{pdbid}_protein.pdb")
    ligand_sdf = os.path.join(pdbid_dir, f"{pdbid}_ligand_H_added.sdf")
    feature = get_weighted_ecif(protein_pdb, ligand_sdf, distance_cutoff)
    return pdbid, feature

In [10]:
distance_cutoff = 12.0

structure_dir = './SampleStructures/'
pdbid_dirs = glob.glob(os.path.join(structure_dir, '*/'))

In [13]:
output_path = 'Ex1_weightedECIF_feature.csv'
feature = [make_weighted_feature_from_dir(pdbid_dir, distance_cutoff) for pdbid_dir in pdbid_dirs]
feature_values = [f[1] for f in feature]
pdbids = [f[0] for f in feature]
columns = PossibleECIF
weighted_ecif_feature = pd.DataFrame(feature_values, index=pdbids, columns=columns)
weighted_ecif_feature.index.name = 'PDB'
weighted_ecif_feature.to_csv(output_path)

In [19]:
weighted_ecif_feature.head()

Unnamed: 0_level_0,C;4;1;3;0;0-Br;1;1;0;0;0,C;4;1;3;0;0-C;3;3;0;1;1,C;4;1;3;0;0-C;4;1;1;0;0,C;4;1;3;0;0-C;4;1;2;0;0,C;4;1;3;0;0-C;4;1;3;0;0,C;4;1;3;0;0-C;4;2;0;0;0,C;4;1;3;0;0-C;4;2;1;0;0,C;4;1;3;0;0-C;4;2;1;0;1,C;4;1;3;0;0-C;4;2;1;1;1,C;4;1;3;0;0-C;4;2;2;0;0,...,S;2;2;0;0;0-S;2;1;1;0;0,S;2;2;0;0;0-S;2;2;0;0;0,S;2;2;0;0;0-S;2;2;0;0;1,S;2;2;0;0;0-S;2;2;0;1;1,S;2;2;0;0;0-S;3;3;0;0;0,S;2;2;0;0;0-S;3;3;0;0;1,S;2;2;0;0;0-S;4;3;0;0;0,S;2;2;0;0;0-S;6;4;0;0;0,S;2;2;0;0;0-S;6;4;0;0;1,S;2;2;0;0;0-S;7;4;0;0;0
PDB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1a1b,0.0,0.0,0.0,0.0,0.362749,0.0,0.0,0.0,0.623144,1.140722,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a1e,0.0,0.0,0.0,0.0,0.394528,0.0,0.0,0.0,0.582484,0.94295,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a1c,0.0,0.0,0.0,0.0,0.07392,0.0,0.0,0.0,0.585712,0.728586,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a0q,0.0,0.0,0.0,0.0,0.377491,0.0,0.0,0.0,1.650604,0.909777,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a3e,0.0,0.0,0.0,0.0,0.541475,0.0,0.0,0.153224,1.694323,0.974863,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
