In [1]:
%matplotlib ipympl
import pandas as pd
from biopandas.pdb import PandasPdb
from prody import parsePDBHeader
from typing import Optional
import numpy as np
import time
import matplotlib.pyplot as plt


In [2]:
def f1(x):
    return x ** -1

In [3]:
def f1_5(x):
    return x ** -1.5

In [4]:
def f2(x):
    return x ** -2

In [99]:
def f2_5(x):
    return x ** -2.5

In [6]:
def f3(x):
    return x ** -3

In [95]:
def preprocess(pdb_path, feature, include='none', exclude='none'):

    try:
        cutoff = len(pdb_path[::-1].split('/',1)[1][::-1]) + 1
    except IndexError:
        cutoff = 0

    atomic_df = PandasPdb().read_pdb(pdb_path)

    header = parsePDBHeader(pdb_path)

    atomic_df = atomic_df.get_model(1)
    
    if include != 'none':
        for ind, i in enumerate(include):
            if ind == 0:
                temp = atomic_df.df['ATOM'][feature].eq(i)
            else:
                temp = temp | atomic_df.df['ATOM'][feature].eq(i)

        atomic_df.df['ATOM'] = atomic_df.df['ATOM'][temp]

    elif exclude != 'none':
        for ind, i in enumerate(exclude):
            if ind == 0:
                temp = atomic_df.df['ATOM'][feature].ne(i)
            else:
                temp = ~(~temp | ~atomic_df.df['ATOM'][feature].ne(i))

        atomic_df.df['ATOM'] = atomic_df.df['ATOM'][temp]

    atomic_df.to_pdb('pdbs/preprocessed/' + pdb_path[cutoff:])


In [94]:
def preprocess_startswith(pdb_path, feature = 'atom_name', include: list = ['C', 'N', 'O', 'S']):

    try:
        cutoff = len(pdb_path[::-1].split('/',1)[1][::-1]) + 1
    except IndexError:
        cutoff = 0

    atomic_df = PandasPdb().read_pdb(pdb_path)

    header = parsePDBHeader(pdb_path)

    atomic_df = atomic_df.get_model(1)
    
    df = atomic_df.df['ATOM']
    temp = df[feature].eq(include[0])

    for i, x in atomic_df.df['ATOM'].iterrows():
        if x[feature][0] in include:
            temp.iloc[i] = True

    atomic_df.df['ATOM'][temp]

    atomic_df.to_pdb('pdbs/preprocessed/' + pdb_path[cutoff:])

In [101]:
def calc_exposure(pdb_path, funcs: dict = {'2', f2}, assignment=None, plus_inv_score: bool = True, save_matrix: bool = False, save_scores_as_vector: bool = False):

    try:
        cutoff = len(pdb_path[::-1].split('/',1)[1][::-1]) + 1
    except IndexError:
        cutoff = 0

    atomic_df = PandasPdb().read_pdb(pdb_path)

    atomic_df = atomic_df.get_model(1)

    coords = np.vstack((atomic_df.df['ATOM']['x_coord'].to_numpy(), atomic_df.df['ATOM']['y_coord'].to_numpy(), atomic_df.df['ATOM']['z_coord'].to_numpy())).T

    pair_scores = {}

    for key in funcs:
        pair_scores[key] = np.zeros((len(coords), len(coords)))

    for i, coord1 in enumerate(coords):
        for j, coord2 in enumerate(coords[i+1:]):
            distance = np.linalg.norm(coord1-coord2)
            for key, func in funcs.items():
                pair_scores[key][i,i+j+1] = pair_scores[key][i+j+1,i] = func(distance)

    if assignment == None:
        for key, mat in pair_scores.items():
            temp = np.sum(mat, axis = 0)
            atomic_df.df['ATOM']['b_factor'] = temp
            atomic_df.to_pdb('pdbs/out/' + pdb_path[cutoff:-4] + '_' + key + '.pdb')
            
            if save_scores_as_vector:
                np.save('pdbs/out/npys/' + pdb_path[cutoff:-4] + '_' + key + '_vec.npy', temp)

            if plus_inv_score:
                atomic_df.df['ATOM']['b_factor'] = 100/temp
                atomic_df.to_pdb('pdbs/out/' + pdb_path[cutoff:-4] + '_' + key + '_inv.pdb')
                if save_scores_as_vector:
                    np.save('pdbs/out/npys/' + pdb_path[cutoff:-4] + '_' + key + '_inv_vec.npy', 100/temp)

            if save_matrix:
                np.save('pdbs/out/npys/' + pdb_path[cutoff:-4] + '_' + key + '_mat.npy', mat)
    
    if type(assignment) == dict:
        mat_not_saved = True
        for k, assigment_vert in assignment.items():
            for key, mat in pair_scores.items():
                temp = assigment_vert @ mat
                atomic_df.df['ATOM']['b_factor'] = temp
                atomic_df.to_pdb('pdbs/out/' + pdb_path[cutoff:-4] + '_' + k + '_' + key + '.pdb')

                if save_scores_as_vector:
                    np.save('pdbs/out/npys/' + pdb_path[cutoff:-4] + '_' + k + '_' + key + '_vec.npy', temp)

                if plus_inv_score:
                    atomic_df.df['ATOM']['b_factor'] = 100/temp
                    atomic_df.to_pdb('pdbs/out/' + pdb_path[cutoff:-4] + '_' + k + '_' + key + '_inv.pdb')
                    if save_scores_as_vector:
                        np.save('pdbs/out/npys/' + pdb_path[cutoff:-4] + '_' + k + '_' + key + '_inv_vec.npy', 100/temp)

                if save_matrix and mat_not_saved:
                    np.save('pdbs/out/npys/' + pdb_path[cutoff:-4] + '_' + key + '_mat.npy', mat)
                    mat_not_saved = False

In [None]:
def print_features(pdb_path, feature):
    atomic_df = PandasPdb().read_pdb(pdb_path)

    header = parsePDBHeader(pdb_path)

    atomic_df = atomic_df.get_model(1)

    out = []

    for chain in atomic_df.df['ATOM'][feature]:
        if chain not in out:
            out = out + [chain]

    print(out)

In [11]:
def create_vectors(pdb_path, chains, feature):
    atomic_df = PandasPdb().read_pdb(pdb_path)

    header = parsePDBHeader(pdb_path)

    atomic_df = atomic_df.get_model(1)

    out = np.ones((len(chains), len(atomic_df.df['ATOM'])))

    for ind, chain_id in enumerate(atomic_df.df['ATOM'][feature]):
        for i, chain in enumerate(chains):
            if chain_id not in chain:
                out[i,ind] = 0

    return out

In [57]:
def create_3_vectors(pdb_path, chain1, feature):
    atomic_df = PandasPdb().read_pdb(pdb_path)
    atomic_df = atomic_df.get_model(1)

    out_tot = np.ones(len(atomic_df.df['ATOM']))

    if type(chain1) == str:
        out1 = np.array(atomic_df.df['ATOM'][feature].eq(chain1)).astype(int)
        name = chain1
    elif type(chain1) == list:
        for ind, chain in enumerate(chain1):
            if ind == 0:
                temp = atomic_df.df['ATOM'][feature].eq(chain)
                name = chain + 'plus'
            else:
                temp = temp | atomic_df.df['ATOM'][feature].eq(chain)
        out1 = np.array(temp).astype(int)

    out2 = out_tot-out1


    return {name: out1, 'not'+name: out2, 'tot': out_tot}

In [None]:
calc_exposure('pdbs/preprocessed/bddm15x_nvt_rep1_chains.pdb', {'2': f2}, create_3_vectors('pdbs/preprocessed/bddm15x_nvt_rep1_chains.pdb', 'BDD', 'residue_name'))

In [2]:
def plot_score_v_localres_byatom(pdbout, defattr, backboneonly: bool = False, inverse: bool = True, interactive: bool = True):

    plt.close()
    
    fig,ax = plt.subplots()

    localres = pd.read_csv(defattr, sep = '\t', header = 3, usecols = [1,2], names = ['atom', 'localres']).set_index('atom')

    out = np.zeros((len(localres), 2))

    names = list(np.zeros(len(localres)).astype(int).astype(str))

    atomic_df = PandasPdb().read_pdb(pdbout)

    atomic_df = atomic_df.get_model(1)

    df = atomic_df.df['ATOM'].set_index(['chain_id','residue_number', 'atom_name'])

    i=0

    errorcount = 0

    if localres.iloc[0].name[0] == '#':
        k = len( localres.iloc[0].name.split('/')[0] )
    else:
        k = 0

    for ind, row in localres.iterrows():
        if not backboneonly or ind.split('@')[1] in ['C', 'N', 'O', 'CA']:
            try:
                if type(df.loc[(ind[1+k:2+k], int(ind[3+k:].split('@')[0])), 'b_factor'][ind[3+k:].split('@')[1]]) == pd.core.series.Series:
                    errorcount+=1
                else:
                    out[i,0] = df.loc[(ind[1+k:2+k], int(ind[3+k:].split('@')[0])), 'b_factor'][ind[3+k:].split('@')[1]]
                    out[i,1] = row['localres']
                    names[i] = ind
                    i+=1
            except KeyError:
                errorcount+=1
        else:
            errorcount+=1

    if errorcount != 0:
        out = out[:-errorcount].T
        names = names[:-errorcount]
    else:
        out=out.T

    if inverse:
        sc = plt.scatter(out[0], 1/out[1], s=3)#, color=(0,0,1,0.5))
        plt.plot(np.unique(out[0]), np.poly1d(np.polyfit(out[0], 1/out[1], 1))(np.unique(out[0])), color = (0,0,0))
        plt.ylabel('Local Resolution at Atom in Model (1/Å)')
    else:
        sc = plt.scatter(out[0], out[1], s=3)#, color=(0,0,1,0.5))
        plt.ylabel('Local Resolution at Atom in Model (Å)')
    plt.xlabel('Atom Exposure Score (Arbitrary Units)')

    if interactive:
        annot = ax.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->"))
        annot.set_visible(False)
        
        def update_annot(ind):
            pos = sc.get_offsets()[ind["ind"][0]]
            annot.xy = pos
            text = "{}".format(" ".join([names[n] for n in ind["ind"]]))
            annot.set_text(text)
            # annot.get_bbox_patch().set_facecolor(cmap(norm(c[ind["ind"][0]])))
            annot.get_bbox_patch().set_alpha(0.4)
            
        def hover(event):
            vis = annot.get_visible()
            if event.inaxes == ax:
                cont, ind = sc.contains(event)
                if cont:
                    update_annot(ind)
                    annot.set_visible(True)
                    fig.canvas.draw_idle()
                else:
                    if vis:
                        annot.set_visible(False)
                        fig.canvas.draw_idle()

        fig.canvas.mpl_connect("motion_notify_event", hover)

    plt.show()

In [None]:
preprocess_startswith('pdbs/6cvm.pdb')
calc_exposure('pdbs/preprocessed/6cvm.pdb')


*Getting local resolution by atom in ChimeraX*

measure mapvalues #3 atoms #1 attribute locres

save 'XXXX\pdbs\XXXX.defattr' attrName locres models #1

In [None]:
plot_score_v_localres_byatom('pdbs/out/6cvm_2.pdb', 'pdbs/betagal_J255.defattr', backboneonly=True)