In [8]:
import os
import imageio
import nglview as nv
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import pandas as pd

# import seaborn as sns
from hilbertcurve.hilbertcurve import HilbertCurve
from scipy.ndimage import gaussian_filter
from scipy.stats import entropy

def cgenff_reader(filename):
  with open(filename) as file1:
    lst = list(filter(lambda i: re.match(r"^ATOM.*!", i), file1))
  theatom  = [i.strip("\n").split()[1] for i in lst]
  atomtype = [i.strip("\n").split()[2] for i in lst]
  charge   = [float(i.strip("\n").split()[3]) for i in lst]
  penalty  = [float(i.strip("\n").split()[-1]) for i in lst]
  return {"name":theatom, "type":atomtype, "charge":charge, "penalty":penalty}
    
def lig_xml(dic, write_file=False, source=False):
  root = ET.Element('ForceField')
  info = ET.SubElement(root, 'Info')
  info_date = ET.SubElement(info, "date")
  info_date.text = str(date.today())
  if source != False: 
    info_file = ET.SubElement(info, 'source')
    info_file.text = source

  data_lig = ET.SubElement(root, 'LIG')
  for i in range(len(dic["name"])):
    tmpattrib={
      "name":dic["name"][i], 
      "type": dic["type"][i], 
      "charge": str(dic["charge"][i]), 
      'penalty': str(dic["penalty"][i]),
    }
    tmpatom = ET.SubElement(data_lig, 'ATOM', attrib = tmpattrib)

  ligxml_str = ET.tostring(root , encoding="unicode")
  dom = minidom.parseString(ligxml_str)
  ligxml_str = dom.toprettyxml()

  if write_file != False :
    with open(write_file, "w") as file1: 
      file1.write(ligxml_str)
  return ligxml_str

class ffreader:
    def __init__(self, ff):
        self.domff = minidom.parse(ff)
        self.residues = self.domff.getElementsByTagName("Residue")
        self.residuemap = {
            "TIP3":"T3P",
            "HOH":"T3P",
            "WAT":"T3P",
            'HID':"HSD",
            'HIE':"HSE",
            'HIP':"HSP", 
            "CYX":"CYS",
        }
        self.map_resnames()
        
    def map_resnames(self):
        reslst = [i.getAttribute("name") for i in self.residues]
        for i in reslst: 
            if i in self.residuemap.keys():
                print(f"mapping residue {i}")
                idx = reslst.index(i)
                self.residues[idx].setAttribute("name", self.residuemap[i])
        self.residues = self.domff.getElementsByTagName("Residue")
        
    def format_resname(self, resname):
        resname = resname.upper()
        if resname in self.residuemap.keys():
            resname = self.residuemap[resname]
        return resname
        
    def format_atomname(self, retdic):
        keys = retdic.keys()
        values = retdic.values()
        return {i:j for i,j in zip(keys, values)}
    
    def getTypes(self, resname):
        # TODO: possible changes to map the residue name 
        # TODO: map HIS/HIE/HID, CYX, etc
        resname = self.format_resname(resname);
        for i in self.residues: # self.domff.getElementsByTagName("Residue"):
            if i.attributes["name"].value == resname:
                names = [_.attributes["name"].value for _ in i.getElementsByTagName("Atom")]
                types = [_.attributes["type"].value for _ in i.getElementsByTagName("Atom")]
                break
        ret = {i:j for i,j in zip(names, types)}
        return self.format_atomname(ret)
    
    def getCharges(self, resname):
        # TODO: possible changes to map the residue name 
        # TODO: map HIS/HIE/HID, CYX, etc
        resname = self.format_resname(resname); 
        for i in self.residues: 
            if i.attributes["name"].value == resname:
                names = [_.attributes["name"].value for _ in i.getElementsByTagName("Atom")]
                charges = [_.attributes["charge"].value for _ in i.getElementsByTagName("Atom")]
                break
        ret = {i:float(j) for i,j in zip(names, charges)}
        return self.format_atomname(ret)
    
    def getAtomCharge(self, resname, atomname):
        resname = self.format_resname(resname); 
        found_res = False
        for i in self.residues: 
            if i.attributes["name"].value == resname:
                names = [_.attributes["name"].value for _ in i.getElementsByTagName("Atom")]; 
                charges = [_.attributes["charge"].value for _ in i.getElementsByTagName("Atom")]; 
                found_res = True;
                break
        if found_res: 
            found_atom = False;
            for i,j in zip(names, charges):
                if i == atomname: 
                    return float(j); 
                    found_atom = True; 
                    break
            if not found_atom:
                print(f"Found the residue {resname}, however, Not found the atom {atomname}"); 
                return 0; 
        else : 
            print(f"Not found the residue {resname}")
            return 0; 
    
    def addFF(self, ff, waitlist=[]):
        # use minidom purely
        residues_oldff = self.domff.getElementsByTagName('Residues')
        newff = minidom.parse(ff)
        residues_newff = newff.getElementsByTagName('Residue')
        for i in residues_newff:
            if len(waitlist) == 0 or i.getAttribute("name") in waitlist:
                residues_oldff[0].appendChild(i)
            else: 
                pass
        self.residues = self.domff.getElementsByTagName("Residue")
        self.map_resnames()

In [8]:
import time
import pickle 
import numpy as np 
import pytraj as pt 
import matplotlib.pyplot as plt

from scipy.spatial import distance_matrix
from scipy.ndimage import gaussian_filter
from scipy.stats import entropy

# Forcefield and xml related functions
import re
from xml.dom import minidom
import xml.etree.ElementTree as ET

tmptop = pt.load("./tests/featurizer_test1.pdb")
def getprotein(traj):
  reslst = []
  for i in traj.top.atoms:
    if i.name=="CA":
      reslst.append(i.resid+1)
  mask = ":"+",".join([str(i) for i in reslst])
  return traj.top.select(mask)
  


  
a = utils.getprotein(tmptop)
print((a+1).astype(str).tolist()+a.tolist())



['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '

In [58]:
import time
import pickle 
import json
import numpy as np 
import pytraj as pt 
import matplotlib.pyplot as plt

from scipy.spatial import distance_matrix
from scipy.ndimage import gaussian_filter
from scipy.stats import entropy

from BetaPose import utils  

# Forcefield and xml related functions
import re
from xml.dom import minidom
import xml.etree.ElementTree as ET

class featurizer_3d:
  def __init__(self, pdbfile, trajfile, parmdic):
    print("Initializing the featurizer object......")
    self.pdbfile = pdbfile; 
    self.trajfile = trajfile; 
    if isinstance(parmdic, dict):
      # Loading a dictionary 
      self.parmdic = parmdic; 
      self._init_settings(); 
    elif isinstance(parmdic, str):
      # Loading from a json file
      self.load_settings(parmdic); 
    
    #     self.length3D = grid_length   # lattice nr
    
    # Load trajectory
    self.trajpdb = pt.load(self.trajfile, top=self.pdbfile, stride=self.stride); 
    self.frameNr = self.trajpdb.n_frames; 
    self.reslist = [i.name for i in self.trajpdb.top.residues]; 
    print("Residue list ", self.reslist)

    self.coordinates = {}
    self.selections   = {}
    # Initialize the groups 
    for group in self.atom_groups:
      print(group)
#       self.init_group(i, atom_groups[i])
#############
#############
#     if 'ligand' in atom_groups.keys():
#       com0 = pt.center_of_mass(self.trajpdb, atom_groups["ligand"], frame_indices=[0]).squeeze(); 
#       print(f"Using ligand to align 3D curve {np.round(com0,2)}")
#       self.alignBy = "ligand"
#     else: 
#       com0 = pt.center_of_mass(self.trajpdb, atom_groups[atom_groups.keys()[0]], frame_indices=[0]).squeeze();
#       print(f"Using {atom_groups.keys()[0]} to align 3D curve {com0}")
#       self.alignBy = atom_groups.keys()[0]
#     self.alignCenter(com0)
#     self.curveCenter = np.mean(self.points3D, axis = 0).reshape(1,3); 
    
  def _init_settings(self):
    """
    Initialize important parameters/settings
    """
    print(self.parmdic)
    pointnr = 16
    self.parms = {
      "lattice_nr"  : pointnr,  # unit points 
      "lattice_len" : 1,        # "length3D"  : defpointnr 
      "shift" : np.array([0,0,0]),
      "atom_groups" : [],
      "atmpro" : np.array([]), 
      "atmlig" : np.array([]), 
      "atmsol" : np.array([]), 
      "stride" : 1,
      "search_cutoff" : 18
    }
    self.updatecell(); 
    
    tmptop = pt.load(self.pdbfile)
    tmptop.top.set_reference(tmptop[0])
    self.parms["atmpro"] = utils.getprotein(tmptop); 
    self.parms["atmlig"] = tmptop.top.select(":LIG"); 
    prot_lig_idxlst = (self.parms["atmpro"]+1).astype(str).tolist() + (self.parms["atmlig"]+1).astype(str).tolist(); 
    pro_lig_mask = "@"+",".join(prot_lig_idxlst);
    self.parms["atmsol"] = tmptop.top.select("!"+pro_lig_mask); 
    if len(self.parms["atmpro"]) > 0:
      self.parms['atom_groups'].append("protein"); 
    if len(self.parms["atmlig"]) > 0:
      self.parms['atom_groups'].append("ligand"); 
    if len(self.parms["atmsol"]) > 0:
      self.parms['atom_groups'].append("solvent"); 
    print(self.parms)
    
    parmkeys = self.parmdic.keys(); 
    if ("MASK_PRO" in parmkeys): 
      self.parms["atmpro"] = tmptop.top.select(self.parmdic["MASK_PRO"]); 
      if "protein" not in self.parms['atom_groups']:
        self.parms['atom_groups'].append("protein"); 
    if ("MASK_LIG" in parmkeys): 
      self.parms["atmlig"] = tmptop.top.select(self.parmdic["MASK_LIG"]); 
      if "ligand" not in self.parms['atom_groups']:
        self.parms['atom_groups'].append("ligand"); 
    if ("MASL_SOL" in parmkeys): 
      self.parms["atmsol"] = tmptop.top.select(self.parmdic["MASL_SOL"]); 
      if "solvent" not in self.parms['atom_groups']:
        self.parms['atom_groups'].append("solvent"); 
    
    if "LATTICE_POINTS" in parmkeys: 
      """ 
      Could put this to a separate function 
      Basic numbers : lattice_nr, lattice_len Could only be defined by user
      """
      self.parms["lattice_nr"] = self.parmdic["LATTICE_POINTS"]; 
      self.updatecell();       
      
    if "CELL_LENGTH" in parmkeys:
      self.parms["lattice_len"] = self.parmdic["CELL_LENGTH"] / (self.parms["lattice_nr"]-1);
      print("Settting the lattice_length", self.parms["lattice_len"]); 
      self.updatecell(); 
      
    # Either use center or shift a cell 
    # Firstly set the shift 
    # Set the shift back to array [0,0,0] after updating the cell.
    if "CENTERMASK" in parmkeys:
      thecenter = pt.center_of_mass(tmptop, mask=self.parmdic["CENTERMASK"])[0];
      self.parms["shift"] = thecenter - np.mean(self.parms["points3D"], axis=0); 
      self.updatecell(); 
      self.parms["shift"] = np.array([0,0,0])
    elif "CENTER" in parmkeys:
      self.parms["shift"] = np.array(self.parmdic["CENTER"]) - np.mean(self.parms["points3D"], axis=0); 
      self.updatecell(); 
      self.parms["shift"] = np.array([0,0,0])
    elif "CELL_SHIFT" in parmkeys:
      self.parms["shift"] = self.parmdic["CELL_SHIFT"]; 
      self.updatecell(); 
      self.parms["shift"] = np.array([0,0,0])
      
    if "STRIDE" in parmkeys:
      self.parms["stride"] = self.parmdic["STRIDE"]; 
    if "CUTOFF" in parmkeys:
      self.parms["search_cutoff"] = self.parmdic["CUTOFF"]; 
    self._load_parms()
    
#   # FF of components
#   "FF_PRO" : "./Forcefield/charmm36_nowaters.xml",
#   "FF_SOL" : "./Forcefield/test_wat.xml",
#   "FF_LIG"  : "./tests/featurizer_test1_lig.xml",
  
    
    # self.distances   = np.array(range(self.length3D**3));
    # self.points3D    = self.index3D; 
    # self.atom_groups = "test"; # atom_groups; 


  def load_settings(self, file):
    """Directly load from a json file """
    with open(file, "r") as file1: 
      self.parms = json.load(file1);
    self._load_parms(); 
    
  def _load_parms(self):
    """
    Load the input parameters from the dictionary , Avoid direct assignment 
    """
    self.lattice_number = self.parms["lattice_nr"]; 
    self.lattice_length = self.parms["lattice_len"]; 
    self.cell_length = self.parms["cell_len"]; 
    self.index3D = self.parms["index3D"]; 
    self.points3D = self.parms["points3D"]; 
    self.distances = self.parms["distances"]; 
    self.cellcenter = self.parms["cellcenter"]; 
    
    self.atom_groups = self.parms['atom_groups']; 
    
    self.stride = self.parms['stride']; 
    self.search_cutoff = self.parms['search_cutoff']; 
    #     self.length3D = self.parms[""]
    #     self.atom_groups = self.parms[""]
    #     self.cell_length = self.parms[""]

    # defined sel.xxx = self.parms[xxxx]
    
  def updatecell(self):
    print("Setting the box")
    self.parms["cell_len"]   = self.parms["lattice_len"] * self.parms["lattice_nr"]; 
    self.parms["index3D"]    = self.get_points(self.parms["lattice_nr"]);  
    self.parms["points3D"]   = self.get_points(self.parms["lattice_nr"]) *self.parms["lattice_len"]; 
    self.parms["points3D"]   += self.parms["shift"]; 
    self.parms["distances"]  = np.array(range(self.parms["lattice_nr"]**3)); 
    self.parms["cellcenter"] = np.mean(self.parms["points3D"], axis = 0).reshape(1,3); 
    
  def get_point_by_distance(self, point, length):
    d0 = int(point/length**2)
    d1 = int((point - d0*length**2)/length)
    d3 = int(point - d0*length**2 - d1*length)
    return [d0, d1, d3]
  
  def get_points(self, length):
    x=[]; 
    for i in range(length**3):
      x.append(self.get_point_by_distance(i,length))
    return np.array(x).astype(int)

  def points_to_3D(self, thearray, dtype=float):
    if len(self.distances) != len(thearray):
      print("Cannot match the length of the array to the 3D cuboid"); 
      return np.array([0])
    tempalte  = np.zeros((self.lattice_size, self.lattice_size, self.lattice_size)).astype(dtype);
    for ind in self.distances:
      array_3Didx = tuple(self.index3D[ind]); 
      tempalte[array_3Didx] = thearray[ind]
    return tempalte

  def Norm_mass_array(self, array, parm = 9, x0=7, slope=0.015):
    line1 = 1/(1+np.e**(-array+x0))
    baseNr = 1/(1+np.e**(-parm+x0))
    line2 = baseNr + (array-parm)*slope
    status1 = array <= parm
    status2 = array > parm
    template = np.zeros(array.shape)
    template[status1] = line1[status1]
    template[status2] = line2[status2]
    return template

  def get_entropy(self, arr):
    unique, counts = np.unique(arr, return_counts=True)
    return entropy(counts)

  def init_group(self, groupname, mask):
#     "coorpro" : np.array([]), 
#       "coorlig" : np.array([]), 
#       "coorsol" : np.array([]), 
    
    atom_sel = self.trajpdb.top.select(mask);
    if len(atom_sel) == 0: 
      print(f"Warning: There is no atom selected in the group {groupname}, skipping......"); 
      return 
    else: 
      print(f"Group Name: {groupname}; Atoms: {len(atom_sel)} ")
      self.selections[groupname]  = self.trajpdb.top.select(mask);
      self.coordinates[groupname] = self.trajpdb.xyz[0][self.selections[groupname]]

  def featurize_element(self):
    print("Featurizing element"); 
    pdb_atomic_names  = np.array([i.name for i in self.trajpdb.top.atoms]).astype(str); 
    pdb_atomic_numbers = np.array([i.atomic_number for i in self.trajpdb.top.atoms]).astype(int); 
    pdb_residue_names = np.array([self.reslist[i.resid] for i in self.trajpdb.top.atoms]).astype(str); 
    print(pdb_residue_names)
    
    self.features.append(feature)
    st_elm = time.perf_counter(); 
    # Initialize the container of the descriptors
    self.atom_mass  = {}; 
    self.norm_mass  = {}; 
    self.gauss_mass = {}; 
    self.atom_name  = {}; 
    self.res_name   = {};

    # Firstly, Sequentially process each frames 
    for sel in self.selections.keys():
      # Secondly, sequentially process each selection
      self.atom_mass[sel] = []; 
      self.norm_mass[sel] = []; 
      self.gauss_mass[sel] = []; 
      self.atom_name[sel]  = []; 
      self.res_name[sel]   = []; 
      for i in range(len(self.trajpdb)):
        thisxyz = self.trajpdb.xyz[i]; 
        # Thirdly: Extract coordinates within the cutoff, atom index and
        selidx = self.selections[sel]; 
        selxyz = thisxyz[selidx]; 
        # Fourthly: restrain real candidates
        cand_status = distance_matrix(selxyz, self.cellcenter) <= self.search_cutoff; 
        cand_status = cand_status.squeeze(); 
        cand_index  = selidx[cand_status]; 
        cand_xyz    = selxyz[cand_status]; 
        cand_distmatrix = distance_matrix(self.points3D, cand_xyz)
        cand_diststatus = cand_distmatrix < 1.75
        # cand_distmatrix < 3.75

        mins = np.min(cand_distmatrix, axis=1)
        idx_lst = [np.where(cand_distmatrix[m] == mins[m])[0][0] if np.any(cand_diststatus[m,:]) else -1 for m in range(len(mins))]
        candlst = [cand_index[m] if m>=0 else -1 for m in idx_lst]

        atom_name_frameN = [pdb_atomic_names[m]  if m>0 else False for m in candlst]; 
        res_name_frameN  = [pdb_residue_names[m] if m>0 else False for m in candlst]; 
        atom_name_frameN = self.points_to_3D(atom_name_frameN, dtype=str); 
        res_name_frameN  = self.points_to_3D(res_name_frameN, dtype=str); 

        atom_mass_frameN = [pdb_atomic_numbers[m] if m>0 else 0 for m in candlst]; 
        atom_mass_frameN = self.points_to_3D(atom_mass_frameN); 
        norm_mass_frameN = self.Norm_mass_array(atom_mass_frameN)
        gauss_mass_frameN = gaussian_filter(norm_mass_frameN, sigma=1)

        self.atom_mass[sel].append(atom_mass_frameN)
        self.norm_mass[sel].append(norm_mass_frameN)
        self.gauss_mass[sel].append(gauss_mass_frameN)
        self.atom_name[sel].append(atom_name_frameN)
        self.res_name[sel].append(res_name_frameN)

      self.atom_mass[sel] = np.array(self.atom_mass[sel]); 
      self.norm_mass[sel] = np.array(self.norm_mass[sel]);
      self.gauss_mass[sel]= np.array(self.gauss_mass[sel]);
      self.atom_name[sel] = np.array(self.atom_name[sel]).astype(str);
      self.res_name[sel]  = np.array(self.res_name[sel]).astype(str);
    loadtime = time.perf_counter() - st_elm;
    print(f"Element: featurized {self.frameNr} frames, took {loadtime:.2f} seconds; Avg: {loadtime/self.frameNr:.2f}")

  def featurize_charge(self, thetype):
    self.features.append(feature)
    print("Reading forcefield files")
    reader = ffreader(settings["forcefield_ligand"])
    waitlist = list(set([i.upper() for i in self.reslist])) + list(reader.residuemap.keys())
    reader.addFF(settings["forcefield_protein"], waitlist=waitlist)
    reader.addFF(settings["forcefield_solvent"], waitlist=waitlist)

    st_chg = time.perf_counter(); 
    self.atom_charge  = {}; 
    self.gauss_charge = {}; 
    for sel in self.selections.keys():
      theshape = self.atom_name[sel].shape
      chargearr = np.zeros(theshape)
      tmpgrp_atom_charge  = []; 
      tmpgrp_gauss_charge = []; 
      for fnr in range(len(self.atom_name[sel])):
        atomnamearr = [self.atom_name[sel][fnr][tuple(self.index3D[i])] for i in self.distances];
        resnamearr  = [self.res_name[sel][fnr][tuple(self.index3D[i])]  for i in self.distances];
        print(f"dealing with the selection {sel}",resnamearr)
        chargearr   = [reader.getAtomCharge(i, j) if (i != "False" and i != False) else 0 for i,j in zip(resnamearr, atomnamearr)]
        chargearr   = self.points_to_3D(chargearr)
        tmpgrp_atom_charge.append(chargearr); 
        tmpgrp_gauss_charge.append(gaussian_filter(chargearr, sigma=1)); 
      self.atom_charge[sel] = np.array(tmpgrp_atom_charge)
      self.gauss_charge[sel] = np.array(tmpgrp_gauss_charge)
      print(self.atom_charge[sel])
    loadtime = time.perf_counter() - st_chg;
    print(f"Charge: featurized {self.frameNr} frames, took {loadtime:.2f} seconds; Avg: {loadtime/self.frameNr:.2f}")
  
  def featurize_entropy(self, thetype):
    self.features.append(feature)
    print("Featurizing entropy")
    # TODO: set a proper cutoff to determine very little occupied cells.
    if "entropy_threshold" in settings.keys():
      occupancy_threshold = settings["entropy_threshold"]
    else:
      occupancy_threshold = 0.0

    st_etp = time.perf_counter(); 
    entropy_values = np.zeros((self.length3D, self.length3D, self.length3D));
    idx_template = [[] for i in self.distances]

    for i in range(len(self.trajpdb)):
      # Thirdly: Extract coordinates within the cutoff, atom index and
      thisxyz = self.trajpdb.xyz[i];
      self.trajpdb.top.set_reference(self.trajpdb[i]); 
      selidx = self.trajpdb.top.select(f":LIG<@{self.search_cutoff}"); 
      selxyz = thisxyz[selidx]; 

      sel_distmatrix_max = distance_matrix(self.points3D+self.cell_length/2, selxyz)
      sel_distmatrix_min = distance_matrix(self.points3D-self.cell_length/2, selxyz) 
      sel_status_max = sel_distmatrix_max < np.sqrt(3)*self.cell_length; 
      sel_status_min = sel_distmatrix_min < np.sqrt(3)*self.cell_length; 
      summary = sel_status_max * sel_status_min; 

      # Interate through all of the grid points
      # Set pre-exit and add zero just to make sure the list value is greater than frame number
      for p in range(len(self.distances)):
        Nratoms = np.count_nonzero(summary[p])
        if Nratoms >0:
          pointp = self.points3D[p]; 
          upper = pointp+self.cell_length/2
          lower = pointp-self.cell_length/2
          sel_ndxs  = selidx[np.where(summary[p] == True)[0]]
          sel_points = thisxyz[sel_ndxs]
          up_status = upper - sel_points > 0
          lw_status = sel_points - lower > 0
          ov_status = np.all(up_status*lw_status, axis=1)
          if True not in ov_status:
            idx_template[p].append(0)
            continue
          for s, tmpidx in zip(ov_status, sel_ndxs):
            if s == True:
              idx_template[p].append(tmpidx)
        else:
          idx_template[p].append(0)
    # If there is only one value in the list, the entropy will be 0 
    # Hence, there will be a 0 when initialize the list 
    entropy_arr = [self.get_entropy(_) if len(set(_)) > occupancy_threshold*self.frameNr else 0 for _ in idx_template]
    self.entropy = self.points_to_3D(entropy_arr)
    self.gauss_entropy = gaussian_filter(self.entropy, sigma=1)
    time_etp = time.perf_counter() - st_etp;
    print(f"Entropy: featurized {self.frameNr} frames, took {time_etp:.2f} seconds; Avg: {time_etp/self.frameNr:.2f}")

    print(f"Frame Number: {self.frameNr}, occupancy threshold {occupancy_threshold}")
    print(f"The averaged entropy is {np.mean(self.entropy):.2f}, Gaussian filtered entropy is {np.mean(self.gauss_entropy):.2f}")
    print(f"The max entropy is {np.max(self.entropy):.2f}, Gaussian filtered entropy is {np.max(self.gauss_entropy):.2f}")
    print(f"The min entropy is {np.min(self.entropy):.2f}, Gaussian filtered entropy is {np.min(self.gauss_entropy):.2f}")
    print(f"The standard deviation of entropy is {np.std(self.entropy):.2f}, Gaussian filtered entropy is {np.std(self.gauss_entropy):.2f}")

  def featurize_hydrophb(self, thetype):
    pass
  
  def featurize(self, features, settings={}):
    self.features = [];
    features = [i.lower() for i in features]
    for feature in features:
      if feature.lower() == 'element':
        pass
      elif feature.lower() == 'charge':
        pass
      elif feature.lower() == 'entropy':
        pass
      else: 
        print(f"Decriptor {feature} is not a standard descriptor yet. ")

  def alignCenter(self, refCenter):
    diff = np.array(refCenter) - np.mean(self.points3D, axis=0); 
    self.points3D = self.points3D + diff; 
    
  def shift(self, shift):
    self.points3D = self.points3D + np.array(shift)
    
  def scaleToLength(self, refLength):
    scaleFactor = refLength / self.length3D;
    self.cell_length = self.cell_length * scaleFactor; 
    diff = self.points3D - self.index3D
    self.points3D = diff + self.index3D * scaleFactor; 
    
  def scaleByFactor(self, scaleFactor):
    self.cell_length = self.cell_length * scaleFactor; 
    self.points3D = self.points3D * scaleFactor; 

  def save(self, filename):
    with open(filename, "wb") as tmpfile:
      data_to_save={
        "frameNr": self.frameNr,
        "frameList": self.frameList,
        "atom_groups": self.atom_groups, 
        "distances": self.distances, 
        "length3D": self.length3D,
        "index3D": self.index3D,
        "points3D": self.points3D,
        "features": self.features,
        "atomic_names": self.pdb_atomic_names,
        "atomic_number": self.pdb_atomic_numbers,
       }
      if "element" in self.features:
        data_to_save["atom_mass"] = self.atom_mass
        data_to_save["norm_mass"] = self.norm_mass
        data_to_save["gauss_mass"] = self.gauss_mass
      if "entropy" in self.features:
        data_to_save["entropy"] = self.entropy; 
        data_to_save["gauss_entropy"]=self.gauss_entropy; 
      if "charge" in self.features:
        data_to_save["atom_charge"] = self.atom_charge
        data_to_save["gauss_charge"] = self.gauss_charge
      pickle.dump(data_to_save ,tmpfile, protocol=pickle.HIGHEST_PROTOCOL)

            
# ATOM_GROUPS = {"protein":":1-221", "ligand":":LIG", "solvent":":T3P,CL-,K+"}

FEATURIZER_SETTINGS = {
  # Mask of components 
  "MASK_PRO" : ":1-221",
  "MASK_LIG" : ":LIG", 
  "MASL_SOL" : ":T3P,CL-,K+", 
  
  # FF of components
  "FF_PRO" : "./Forcefield/charmm36_nowaters.xml",
  "FF_SOL" : "./Forcefield/test_wat.xml",
  "FF_LIG"  : "./tests/featurizer_test1_lig.xml",
  
  # POCKET SETTINGS
  "CELL_LENGTH" : 10,       # Unit: Angstorm (Need scaling)
  "LATTICE_POINTS" : 8,     # Unit: 1 (Number of lattice in one dimension)
  "CELL_SHIFT" : [1,2,3],   # Either CENTERMASK, CENTER or CELL_SHIFT (mask>center>shift) 
#   "CENTER" : [6,6,6], 
  "CENTERMASK" : ":LIG",
  
  # IO SETTINGS
  "STRIDE": 50,             # Unit: frames  
  
  # SEARCH SETTINGS
  "CUTOFF": 18, 
  
  # DESCRIPTOR SETTINGS
  "DESCRIPTORS"  : {
    "ELEMENT":{"ACTIVE":True, }, 
    "CHARGE":{"ACTIVE":True, }, 
    "ENTROPY":{"ACTIVE":True, "THRESHOLD":0.05, },
  }, 
}


amberxml = "/home/miemie/Dropbox/Documents/BetaPose/Forcefield/ff14SB.xml"
charmmxml = "/home/miemie/Dropbox/Documents/BetaPose/Forcefield/charmm36_nowaters.xml"

featurizer = featurizer_3d("./tests/featurizer_test1.pdb", "./tests/featurizer_test1.nc", FEATURIZER_SETTINGS); 

# featurizer.scaleToLength(18); 
featurizer.featurize(DESCRIPTORS, settings=DESCRIPTORS_SETTINGS)
featurizer.save("/tmp/test_featurizer_3D.pkl")


# print(featurizer.index3D)
# print(featurizer.distances)
# print(featurizer.points3D)


Initializing the featurizer object......
{'MASK_PRO': ':1-221', 'MASK_LIG': ':LIG', 'MASL_SOL': ':T3P,CL-,K+', 'FF_PRO': './Forcefield/charmm36_nowaters.xml', 'FF_SOL': './Forcefield/test_wat.xml', 'FF_LIG': './tests/featurizer_test1_lig.xml', 'CELL_LENGTH': 10, 'LATTICE_POINTS': 8, 'CELL_SHIFT': [1, 2, 3], 'CENTERMASK': ':LIG', 'STRIDE': 50, 'CUTOFF': 18, 'DESCRIPTORS': {'ELEMENT': {'ACTIVE': True}, 'CHARGE': {'ACTIVE': True}, 'ENTROPY': {'ACTIVE': True, 'THRESHOLD': 0.05}}}
Setting the box
{'lattice_nr': 16, 'lattice_len': 1, 'shift': array([0, 0, 0]), 'atom_groups': ['protein', 'ligand', 'solvent'], 'atmpro': array([   0,    1,    2, ..., 3570, 3571, 3572]), 'atmlig': array([41106, 41107, 41108, ..., 41149, 41150, 41151]), 'atmsol': array([ 3573,  3574,  3575, ..., 41103, 41104, 41105]), 'stride': 1, 'search_cutoff': 18, 'cell_len': 16, 'index3D': array([[ 0,  0,  0],
       [ 0,  0,  1],
       [ 0,  0,  2],
       ...,
       [15, 15, 13],
       [15, 15, 14],
       [15, 15, 15]]

AttributeError: 'featurizer_3d' object has no attribute 'length3D'

In [57]:
("g SETTINGS").upper()
isinstance("", str)

True

In [5]:
Frame Number: 3, occupancy threshold 0.2
The averaged entropy is 0.13, Gaussian filtered entropy is 0.13
The max entropy is 1.39, Gaussian filtered entropy is 0.42
The min entropy is 0.00, Gaussian filtered entropy is 0.01
The standard deviation of entropy is 0.27, Gaussian filtered entropy is 0.05


#####################
Frame Number: 6, occupancy threshold 0.2
The averaged entropy is 0.17, Gaussian filtered entropy is 0.17
The max entropy is 1.75, Gaussian filtered entropy is 0.55
The min entropy is 0.00, Gaussian filtered entropy is 0.01
The standard deviation of entropy is 0.29, Gaussian filtered entropy is 0.07


#####################
Frame Number: 11, occupancy threshold 0.2
The averaged entropy is 0.09, Gaussian filtered entropy is 0.09
The max entropy is 1.81, Gaussian filtered entropy is 0.51
The min entropy is 0.00, Gaussian filtered entropy is 0.00
The standard deviation of entropy is 0.25, Gaussian filtered entropy is 0.06


#####################
Frame Number: 21, occupancy threshold 0.2
The averaged entropy is 0.02, Gaussian filtered entropy is 0.02
The max entropy is 1.92, Gaussian filtered entropy is 0.38
The min entropy is 0.00, Gaussian filtered entropy is 0.00
The standard deviation of entropy is 0.14, Gaussian filtered entropy is 0.04
#####################
Frame Number: 21, occupancy threshold 0.05
The averaged entropy is 0.23, Gaussian filtered entropy is 0.23
The max entropy is 1.92, Gaussian filtered entropy is 0.64
The min entropy is 0.00, Gaussian filtered entropy is 0.01
The standard deviation of entropy is 0.28, Gaussian filtered entropy is 0.08
#####################
Frame Number: 21, occupancy threshold 0.0
The averaged entropy is 0.23, Gaussian filtered entropy is 0.23
The max entropy is 1.92, Gaussian filtered entropy is 0.64
The min entropy is 0.00, Gaussian filtered entropy is 0.01
The standard deviation of entropy is 0.28, Gaussian filtered entropy is 0.08
#####################
Frame Number: 41, occupancy threshold 0.2
The averaged entropy is 0.01, Gaussian filtered entropy is 0.01
The max entropy is 1.81, Gaussian filtered entropy is 0.30
The min entropy is 0.00, Gaussian filtered entropy is 0.00
The standard deviation of entropy is 0.08, Gaussian filtered entropy is 0.02
#####################
Frame Number: 41, occupancy threshold 0.05
The averaged entropy is 0.18, Gaussian filtered entropy is 0.18
The max entropy is 1.81, Gaussian filtered entropy is 0.61
The min entropy is 0.00, Gaussian filtered entropy is 0.00
The standard deviation of entropy is 0.28, Gaussian filtered entropy is 0.11
#####################
Frame Number: 41, occupancy threshold 0.0
The averaged entropy is 0.25, Gaussian filtered entropy is 0.25
The max entropy is 1.81, Gaussian filtered entropy is 0.66
The min entropy is 0.00, Gaussian filtered entropy is 0.02
The standard deviation of entropy is 0.27, Gaussian filtered entropy is 0.09
#####################
Frame Number: 101, occupancy threshold 0.2
The averaged entropy is 0.00, Gaussian filtered entropy is 0.00
The max entropy is 1.71, Gaussian filtered entropy is 0.23
The min entropy is 0.00, Gaussian filtered entropy is 0.00
The standard deviation of entropy is 0.05, Gaussian filtered entropy is 0.01
#####################
Frame Number: 101, occupancy threshold 0.05
The averaged entropy is 0.08, Gaussian filtered entropy is 0.08
The max entropy is 1.71, Gaussian filtered entropy is 0.54
The min entropy is 0.00, Gaussian filtered entropy is 0.00
The standard deviation of entropy is 0.21, Gaussian filtered entropy is 0.14
#####################
Frame Number: 101, occupancy threshold 0.0
The averaged entropy is 0.27, Gaussian filtered entropy is 0.27
The max entropy is 1.71, Gaussian filtered entropy is 0.66
The min entropy is 0.00, Gaussian filtered entropy is 0.02
The standard deviation of entropy is 0.27, Gaussian filtered entropy is 0.10

SyntaxError: invalid syntax (152286284.py, line 1)

In [46]:
traj1= pt.load("test_featurizer.nc", top="test_featurizer.pdb")
print(dir(traj1))
print(traj1.n_frames)
print(traj1.n_atoms)

reslst = [i.name for i in traj1.top.residues]
# print(dir(reslst[1]))
# print(reslst)
residlst = [reslst[i.resid] for i in traj1.top.atoms]
print(residlst)
print(len(residlst))
# np.zeros(8000).tolist()

['__add__', '__call__', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_allocate', '_append_unitcells', '_boxes', '_estimated_GB', '_frame_holder', '_handle_setting_box_force_velocity', '_iterframe_indices', '_life_holder', '_top', '_xyz', 'align_principal_axis', 'append', 'append_xyz', 'autoimage', 'center', 'copy', 'crdinfo', 'forces', 'from_iterable', 'iterframe', 'load', 'n_atoms', 'n_frames', 'rmsfit', 'rotate', 'save', 'scale', 'shape', 'strip', 'superpose', 'time', 'top', 'topology', 'transform', 'translate', 'unitcells', 'velocities', 'view', 'visualize', 'xyz']
201
41152
['ASP', 'ASP', 'ASP', 'ASP', 'ASP', 'ASP', 'ASP', '

In [185]:
a=[1,1,1,1,3,3,2,6,2]
b = set(a)
print(len(b))

4


In [127]:
import pickle
import numpy as np 
import matplotlib.pyplot as plt
from matplotlib import cm
import nglview as nv 
import pytraj as pt
%matplotlib
# %nglview
class feature_3d_reader:
  def __init__(self, pickleFile):
    # Generate the 2D/3D hilbert curve
    with open(pickleFile, "rb") as file1:
      featuredic = pickle.load(file1)
      print(featuredic.keys()); 
      self.distances = featuredic["distances"]; 
      self.length3D  = featuredic["length3D"];
      self.points3D  = featuredic["points3D"]; 
      self.index3D   = featuredic["index3D"];
      self.features  = featuredic["features"];

      self.atom_groups = featuredic["atom_groups"]; 
      self.frameList = featuredic["frameList"]
      self.frameNr   = featuredic["frameNr"]
      if "element" in self.features:
        self.atom_mass  = featuredic["atom_mass"]
        self.norm_mass  = featuredic["norm_mass"]
        self.gauss_mass = featuredic["gauss_mass"]
      if "entropy" in self.features:
        self.entropy = featuredic["entropy"]
        self.gauss_entropy = featuredic["gauss_entropy"]
      if "charge" in self.features:
        self.atom_charge  = featuredic["atom_charge"]; 
        self.gauss_charge = featuredic["gauss_charge"]; 
  def selectData(self, maintype, select_group, subtype="gauss"):
    if maintype == "element":
      if subtype == "atom":
        data = self.atom_mass[select_group]; 
      elif subtype == "norm":
        data = self.norm_mass[select_group]; 
      elif subtype == "gauss": 
        data = self.gauss_mass[select_group]; 
      else: 
        print(f"Not Found the subtype {subtype}"); 
        data = self.gauss_mass[select_group]; 
    elif maintype == "entropy":
      if subtype == "gauss":
        data = self.gauss_entropy; 
      else:
        data = self.entropy; 
    elif maintype == "charge":
      if subtype == "atom":
        data = self.atom_charge[select_group]; 
      elif subtype == "gauss": 
        data = self.gauss_charge[select_group]; 
      else: 
        print(f"Not Found the subtype {subtype}"); 
        data = self.gauss_charge[select_group]; 
    return data
  def scatter3D(self, maintype, select_group, indice=0, subtype="gauss", cmap="Blues", threshold=0.1):
    thedata = self.selectData(maintype, select_group, subtype=subtype);
    if maintype == "element":
      thedata = thedata[indice]

    fig = plt.figure(); 
    ax = fig.add_subplot(projection='3d'); 
    plt.ion(); 

    thecmap = cm.get_cmap(cmap)
    print(thedata.shape)
    for i in self.distances: 
      theindex = tuple(self.index3D[i]); 
      theposition = self.points3D[i]; 
      v = thedata[theindex]
      thecolor = thecmap(v)
      # print(f"point: {i}, value: {v}, color: {thecolor}")
      if v > threshold:
        ax.scatter(*theposition, color=thecolor)

  def gen_pdbstr(self, coordinates, elements=None, bf=[]):
    if elements == None: 
      elements = ["Du"]*len(coordinates)
    if len(coordinates) != len(bf):
      print("length not aligned")
    pdbline = ""
    tempstr = "ATOM      1  Du  TMP     1       0.000   0.000   0.000  1.00  0.00";
    coordinates = np.round(coordinates, decimals=3)
    for i in range(len(coordinates)):
      if len(bf) != 0: 
        bfval = bf[i]
      else: 
        bfval = 0.00
      point = coordinates[i]; 
      elem  = elements[i]; 
      tmpstr = "".join(["{:>8}".format(i) for i in point]); 
      tmpstr = "".join([f"{i:>8}" for i in point]); 
      thisline = f"ATOM  {i:>5}  {elem:<3}{tempstr[16:30]}{tmpstr}{tempstr[54:60]}{round(bfval,2):>6}\n"
      pdbline += thisline
    return pdbline

  def filter_coor(self, maintype, select_group, threshold, mode="gt", indice=0, subtype="gauss"):
    thedata = self.selectData(maintype, select_group, subtype=subtype);
    if maintype == "element" or maintype == "charge":
      thedata = thedata[indice]
    if mode == "gt":
      status = thedata > float(threshold)
    elif mode == "lt":
      status = thedata < float(threshold)
    elif mode == "mid":
      threshold1 = float(threshold.split(",")[0])
      threshold2 = float(threshold.split(",")[1])
      status1 = thedata >= threshold1
      status2 = thedata <= threshold2
      status = status1 * status2
    elif mode == "out":
      threshold1 = float(threshold.split(",")[0])
      threshold2 = float(threshold.split(",")[1])
      status1 = thedata < threshold1
      status2 = thedata > threshold2
      status = np.logical_or(status1, status2)
    filtered = thedata[status]
    print(f"there are {np.count_nonzero(status)} non-zero values")
    print(f"Filtered data: mean:{np.mean(filtered):2f}, Std:{np.std(filtered):2f}, Max:{np.max(filtered):2f}, Min:{np.min(filtered):2f}")
    coorlist = []
    for i in self.distances: 
      theindex = tuple(self.index3D[i]); 
      if status[theindex] == True:
        coorlist.append(self.points3D[i])
    return np.array(coorlist), filtered

  def write_pdb(self, pdblines, pdbfile="./test.pdb"):
    with open(pdbfile, "w") as file1:
      file1.write(pdblines)
    return pdbfile
        
reader = feature_3d_reader("test_featurizer_3D.pkl")

# print(reader.selectData("charge", "ligand", subtype="atom"))

points, filtered_vals = reader.filter_coor("charge", "ligand", "-0.05,0.05", indice=0, mode = "out", subtype="atom")
print(points.shape, len(filtered_vals))
tmppdb = reader.gen_pdbstr(points, bf=filtered_vals)
reader.write_pdb(tmppdb)
viewer = nv.show_text(tmppdb)

traj1 = pt.load("test_featurizer.pdb")

def sel_conv(traj, mask):
  return f"@{','.join(traj.top.select(mask).astype(str))}"


viewer.add_component("test_featurizer.pdb")
viewer[0].clear_representations()
viewer[0].add_representation("line", color="bfactor", opacity=0.6, pointSize=0.7)
viewer[1].clear_representations()
viewer[1].add_representation("cartoon", selection="protein", color="#bbe1fa")
# viewer[1].add_representation("licorice", selection="protein")
selstr = sel_conv(traj1, ":113-120")
viewer[1].add_representation("line", selection=selstr, radius=4, zOffset=0, xOffset=0.5,yOffset=0.5)
selstr = sel_conv(traj1, ":LIG")
viewer[1].add_representation("ball+stick", selection=selstr)
viewer

# reader.scatter3D("entropy", "all", subtype="gauss", cmap="Reds", threshold=0.3); 



Using matplotlib backend: Qt5Agg
dict_keys(['frameNr', 'frameList', 'atom_groups', 'distances', 'length3D', 'index3D', 'points3D', 'features', 'atomic_names', 'atomic_number', 'atom_mass', 'norm_mass', 'gauss_mass', 'entropy', 'gauss_entropy', 'atom_charge', 'gauss_charge'])
there are 651 non-zero values
Filtered data: mean:-0.010184, Std:0.387488, Max:0.500000, Min:-0.773000
(651, 3) 651


NGLWidget()

In [None]:
REPRESENTATION_NAME_PAIRS = [
    ('axes', 'axes'),            # Shows the three pairs of ellipse axes
    ('principal_axes', 'axes'),  # Not work well. Should be used in another way
    ('point', 'point'),          # Cubic point
    ('line', 'line'),            # Dummy atoms as cross, other structures are normal
    ('rope', 'rope'),            # A normal representation on ONLY protein structure 
    ('tube', 'tube'),            # A normal representation on ONLY protein structure (relative thin)
    ('trace', 'trace'),          # A normal representation on ONLY protein structure (very thin line)
    ('label', 'label'),          # Very interesting labels
    ('slice', 'slice'),          # Not work well. Should be used in another way
    ('unitcell', 'unitcell'),    # Unit cell of the PDB
    ('cartoon', 'cartoon'),      # A normal representation on ONLY protein structure 
    ('licorice', 'licorice'),    # A normal representation on ALL structure 
    ('distance', 'distance'),    # Not work well. Should be used in another way
    ('ribbon', 'ribbon'),        # A normal representation on ONLY protein structure 
    ('surface', 'surface'),      # SURFACE
    ('backbone', 'backbone'),    # A normal representation on ONLY protein structure 
    ('contact', 'contact'),      # Hydrogen bond interaction 
    ('hyperball', 'hyperball'),  # A normal representation on ALL structure 
    ('rocket', 'rocket'),        # A SPECIAL representation on ONLY protein structure 
    ('helixorient', 'helixorient'), # A SPECIAL representation on ONLY protein structure and ONLY alpha-Helix
    ('simplified_base', 'base'), # Not work well. Should be used in another way
    ('spacefill', 'spacefill'),  # SPHERE
    ('ball_and_stick', 'ball+stick'),  # BALL+STICK
]

In [178]:
from scipy.stats import entropy
def get_entropy(arr):
    unique, counts = np.unique(arr, return_counts=True)
    return entropy(counts)
    
# lst1 = [1500, 8, 4, 3, 54]
lst1 = [0, 0, 0, 3, 0, 0, 0, 4, 3, 2 , 0, 0, 0, 3, 0]
lst2 = [i for i in set(lst1)]
print(get_entropy(lst1))
print(get_entropy(lst2))

0.953271014705891
1.3862943611198906


In [11]:
x=reader.gauss_entropy.reshape((-1,8000)).squeeze()
bins = np.linspace(0,0.7,15)
print(bins)
def plt_hist(data, n_bins):
    fig, ax = plt.subplots(1, 1, sharey=True, tight_layout=True)
    N, bins, patches = ax.hist(data, bins=n_bins)
    return fig, ax, N, bins, patches
plt_hist(x, n_bins=bins)

[0.   0.05 0.1  ... 0.6  0.65 0.7 ]


(<Figure size 640x480 with 1 Axes>,
 <AxesSubplot:>,
 array([ 21., 215., 670., ...,   9.,   1.,   1.]),
 array([0.  , 0.05, 0.1 , ..., 0.6 , 0.65, 0.7 ]),
 <BarContainer object of 14 artists>)

In [36]:
import pytraj as pt 
traj1 = pt.load("test_featurizer.pdb", top='test_featurizer.pdb')
traj1.top.set_reference(traj1[0])
a = traj1.top.select(":LIG<@18")
print(len(a))

4991


In [6]:
a=0.879456135165465
x = f"{round(a,2):>6}"
print(x)
print(len(x))

  0.88
6


In [None]:
                        # Step5 : Iterate all 3D points, find nearest points and assign descriptors                        
                        atom_mass = np.zeros((self.length3D, self.length3D, self.length3D));
                        atom_name = np.zeros((self.length3D, self.length3D, self.length3D)).astype(str);
                        res_name  = np.zeros((self.length3D, self.length3D, self.length3D)).astype(str);
                        for ind in self.distances:
                            array_3Didx = tuple(self.index3D[ind]); 
                            if np.any(cand_diststatus[ind,:]):
                                rown = cand_distmatrix[ind,:]; 
                                grpidx = np.where(rown == np.min(rown))[0].item();
                                atmidx = cand_index[grpidx]; 
                                atom_mass[array_3Didx] = self.pdb_atomic_numbers[atmidx]; 
                                atom_name[array_3Didx] = self.pdb_atomic_names[atmidx]; 
                                res_name[array_3Didx]  = self.pdb_residue_names[atmidx];
                            else:
                                atom_mass[array_3Didx] = 0; 
                                atom_name[array_3Didx] = False;
                                res_name[array_3Didx]  = False;
                                
                        break
                        # Normalize the atom mass and then smoothen the points by gaussian
                        atom_mass_framen = np.array(atom_mass); 
                        norm_mass_framen = self.Norm_mass_array(atom_mass_framen)
                        gauss_mass_framen = gaussian_filter(norm_mass_framen, sigma=1)
                        
                        self.atom_mass[sel].append(atom_mass_framen)
                        self.norm_mass[sel].append(norm_mass_framen)
                        self.gauss_mass[sel].append(gauss_mass_framen)
                        self.atom_name[sel].append(np.array(atom_name))
                        self.res_name[sel].append(np.array(res_name))