In [1]:
from msecif_v2 import *

import os 
import glob 

### Multi-shelled ECIF feature generation

In [2]:
shells = [2.5, 4.5, 6.5, 8.5, 10.0]
protein_pdb = './SampleStructures/1a0q/1a0q_protein.pdb'
ligand_sdf = './SampleStructures/1a0q/1a0q_ligand_H_added.sdf'
feature = get_multi_shelled_ecif(protein_pdb, ligand_sdf, shells)

In [3]:
print(feature)

[0 0 0 ... 0 0 0]


### Weighted ECIF feature generation

In [6]:
protein_pdb = './SampleStructures/1a0q/1a0q_protein.pdb'
ligand_sdf = './SampleStructures/1a0q/1a0q_ligand_H_added.sdf'
feature = get_weighted_ecif(protein_pdb, ligand_sdf, distance_cutoff=10.0, squared=True)

In [7]:
print(feature)

[0. 0. 0. ... 0. 0. 0.]


### Multi-shelled ECIF feature generation from multiple files

In [2]:
def make_multi_shelled_feature_from_dir(pdbid_dir, shells):
    pdbid = os.path.basename(os.path.dirname(pdbid_dir))
    protein_pdb = os.path.join(pdbid_dir, f"{pdbid}_protein.pdb")
    ligand_sdf = os.path.join(pdbid_dir, f"{pdbid}_ligand_H_added.sdf")
    feature = get_multi_shelled_ecif(protein_pdb, ligand_sdf, shells)
    return pdbid, feature

In [3]:
shells = [2.5, 4.5, 6.5, 8.5, 10.0]

structure_dir = './SampleStructures/'
pdbid_dirs = glob.glob(os.path.join(structure_dir, '*/'))

In [4]:
output_path = 'Ex1_multi-shelledECIF_feature.csv'
feature = [make_multi_shelled_feature_from_dir(pdbid_dir, shells) for pdbid_dir in pdbid_dirs]
feature_values = [f[1] for f in feature]
pdbids = [f[0] for f in feature]
columns = make_feature_names_multi_shelled_ecif(shells)
multi_shelled_ecif_feature = pd.DataFrame(feature_values, index=pdbids, columns=columns)
multi_shelled_ecif_feature.index.name = 'PDB'
multi_shelled_ecif_feature.to_csv(output_path)

In [5]:
multi_shelled_ecif_feature.head()

Unnamed: 0_level_0,C;4;1;3;0-Br;1;1;0;0-2.5,C;4;1;3;0-C;3;3;0;1-2.5,C;4;1;3;0-C;4;1;1;0-2.5,C;4;1;3;0-C;4;1;2;0-2.5,C;4;1;3;0-C;4;1;3;0-2.5,C;4;1;3;0-C;4;2;0;0-2.5,C;4;1;3;0-C;4;2;1;0-2.5,C;4;1;3;0-C;4;2;1;1-2.5,C;4;1;3;0-C;4;2;2;0-2.5,C;4;1;3;0-C;4;2;2;1-2.5,...,S;2;2;0;0-S;2;1;0;0-10.0,S;2;2;0;0-S;2;1;1;0-10.0,S;2;2;0;0-S;2;2;0;0-10.0,S;2;2;0;0-S;2;2;0;1-10.0,S;2;2;0;0-S;3;3;0;0-10.0,S;2;2;0;0-S;3;3;0;1-10.0,S;2;2;0;0-S;4;3;0;0-10.0,S;2;2;0;0-S;6;4;0;0-10.0,S;2;2;0;0-S;6;4;0;1-10.0,S;2;2;0;0-S;7;4;0;0-10.0
PDB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1a1b,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a3e,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a0q,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a1c,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1a1e,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Weighted ECIF feature generation from multiple files

In [11]:
def make_weighted_feature_from_dir(pdbid_dir, distance_cutoff, squared):
    pdbid = os.path.basename(os.path.dirname(pdbid_dir))
    protein_pdb = os.path.join(pdbid_dir, f"{pdbid}_protein.pdb")
    ligand_sdf = os.path.join(pdbid_dir, f"{pdbid}_ligand_H_added.sdf")
    feature = get_weighted_ecif(protein_pdb, ligand_sdf, distance_cutoff, squared)
    return pdbid, feature

In [12]:
distance_cutoff = 10.0
squared = True

structure_dir = './SampleStructures/'
pdbid_dirs = glob.glob(os.path.join(structure_dir, '*/'))

In [13]:
output_path = 'Ex1_weightedECIF_feature.csv'
feature = [make_weighted_feature_from_dir(pdbid_dir, distance_cutoff, squared) for pdbid_dir in pdbid_dirs]
feature_values = [f[1] for f in feature]
pdbids = [f[0] for f in feature]
columns = PossibleECIF
weighted_ecif_feature = pd.DataFrame(feature_values, index=pdbids, columns=columns)
weighted_ecif_feature.index.name = 'PDB'
weighted_ecif_feature.to_csv(output_path)

In [14]:
weighted_ecif_feature.head()

Unnamed: 0_level_0,C;4;1;3;0-Br;1;1;0;0,C;4;1;3;0-C;3;3;0;1,C;4;1;3;0-C;4;1;1;0,C;4;1;3;0-C;4;1;2;0,C;4;1;3;0-C;4;1;3;0,C;4;1;3;0-C;4;2;0;0,C;4;1;3;0-C;4;2;1;0,C;4;1;3;0-C;4;2;1;1,C;4;1;3;0-C;4;2;2;0,C;4;1;3;0-C;4;2;2;1,...,S;2;2;0;0-S;2;1;0;0,S;2;2;0;0-S;2;1;1;0,S;2;2;0;0-S;2;2;0;0,S;2;2;0;0-S;2;2;0;1,S;2;2;0;0-S;3;3;0;0,S;2;2;0;0-S;3;3;0;1,S;2;2;0;0-S;4;3;0;0,S;2;2;0;0-S;6;4;0;0,S;2;2;0;0-S;6;4;0;1,S;2;2;0;0-S;7;4;0;0
PDB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1a1b,0.0,0.0,0.0,0.0,0.272427,0.0,0.0,0.342231,0.808034,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a3e,0.0,0.0,0.0,0.0,0.493727,0.0,0.0,1.477888,0.749985,0.085048,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a0q,0.0,0.0,0.0,0.0,0.296459,0.0,0.0,1.204299,0.663103,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a1c,0.0,0.0,0.0,0.0,0.050087,0.0,0.0,0.322992,0.490483,0.775853,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1a1e,0.0,0.0,0.0,0.0,0.316048,0.0,0.0,0.32367,0.679202,0.165415,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
