In [1]:
import mdtraj as md 
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import seaborn as sns
import MDAnalysis

# RMSF

In [2]:
def get_rmsf_all_atom(traj, case_name):
    #try to load the npy file
    try:
        rmsf_all_atom = np.load('./data/' + case_name + '_rmsf_all_atom.npy')
        print(f'{case_name}_rmsf_all_atom.npy file loaded')
    except:
        rmsf_all_atom = md.rmsf(traj[1000:], traj[0])

        #write to a numpy file
        np.save('./data/' + case_name + '_rmsf_all_atom.npy', rmsf_all_atom)
        print(f'{case_name}_rmsf_all_atom.npy file saved')
    
    return rmsf_all_atom

def is_protein(res):
    try:
        back_bone = [res.atom(i) for i in ['CA', 'C', 'O', 'N']]
        return True
    except:
        return False

def get_backbone_rmsf_byres(topology, rmsf_all_atom):    
    n_protein_res = 0
    rmsf_by_res = []
    for res in topology.residues:
        if is_protein(res):
            n_protein_res += 1

            atom_index = topology.select(f'resid {res.index} and (name CA or name N or name C or name O)')            
            rmsf_by_res.append(np.average(rmsf_all_atom[atom_index]))
    
    one_chain_legnth = int(n_protein_res/4)
    rmsf_df = pd.DataFrame({'rmsf': np.array(rmsf_by_res), 
                            'residue': np.array([j for i in range(4) for j in range(topology_1.residue(0).index, topology_1.residue(0).index+one_chain_legnth)])})

    rmsf_mean_df = rmsf_df.groupby('residue').mean()
    return rmsf_df, rmsf_mean_df


def plot_rmsf(rmsf_df, rmsf_mean_df, ax):
    sns.lineplot(x='residue', y='rmsf', data=rmsf_df, ax=ax)
    #sns.scatterplot(x='residue', y='rmsf', data=rmsf_mean_df[rmsf_mean_df['rmsf'] > 0.3], ax=ax)

    # text the residue number of the residues with high RMSF
    #for i in rmsf_mean_df[rmsf_mean_df['rmsf'] > 0.3].index:
        #ax.text(i, rmsf_mean_df.loc[i, 'rmsf'], i, fontsize=12)

        
def rmsd_diff(rmsf_mean_df_1, rmsf_mean_df_2, pdb_file):
    diff  = rmsf_mean_df_1['rmsf'] - rmsf_mean_df_2['rmsf']
    diff.apply(abs).sort_values(ascending=False).head(20)
    
    # save_pdb_with_bfactor


    u = MDAnalysis.Universe(pdb_file)
    u.add_TopologyAttr('tempfactors') # add empty attribute for all atoms
    protein = u.select_atoms('protein') # select protein atoms

    for residue, r_value in zip(protein.residues, pd.concat([diff]*4)):
        residue.atoms.tempfactors = r_value
    u.atoms.write('./rmsf/4xyj_rmsf-diff_complex-apo.pdb')
    
    return diff

In [5]:
# traj1
case_name_1 = '1_4XYK_17-784-FLC_nowat'
traj_1 = md.load_xtc('../5_4XYK-17-784-FLC_MD/1_4XYK_17-784-FLC_nowat.xtc', top = '../5_4XYK-17-784-FLC_MD/1_4XYK_17-784-FLC_nowat.gro')

#traj2
case_name_2 = '1_apo_4XYK_17-784_nowat'
traj_2 = md.load_xtc('../3_apo_4XYK-17-784_MD/1_apo_4XYK_17-784_nowat.xtc', top = '../3_apo_4XYK-17-784_MD/1_apo_4XYK_17-784_nowat.gro')



topology_1 = traj_1.topology
rmsf_all_atom_1 = get_rmsf_all_atom(traj_1, case_name_1)
rmsf_df_1, rmsf_mean_df_1 = get_backbone_rmsf_byres(topology_1, rmsf_all_atom_1)



topology_2 = traj_2.topology
rmsf_all_atom_2 = get_rmsf_all_atom(traj_2, case_name_2)
rmsf_df_2, rmsf_mean_df_2 = get_backbone_rmsf_byres(topology_2, rmsf_all_atom_2)


1_4XYK_17-784-FLC_nowat_rmsf_all_atom.npy file loaded
1_apo_4XYK_17-784_nowat_rmsf_all_atom.npy file loaded


In [6]:
diff = rmsd_diff(rmsf_mean_df_1, rmsf_mean_df_2, '../3_apo_4XYK-17-784_MD/1_apo_4XYK_17-784.pdb')



In [11]:
diff.min(), diff.max()

(-0.132914, 0.046398506)

In [10]:
# spectrum b, blue_white_red, minimum=-0.05, maximum=0.05

In [14]:
# traj1
case_name_1 = '1_4XYJ_17-784-FLC_nowat'
traj_1 = md.load_xtc('../6_4XYJ-17-784-FLC_MD/1_4XYJ_17-784-FLC_nowat.xtc', top = '../6_4XYJ-17-784-FLC_MD/1_4XYJ_17-784-FLC_nowat.gro')

#traj2
case_name_2 = '1_apo_4XYJ_17-784_nowat'
traj_2 = md.load_xtc('../4_apo_4XYJ-17-784_MD/1_apo_4XYJ_17-784_nowat.xtc', top = '../4_apo_4XYJ-17-784_MD/1_apo_4XYJ_17-784_nowat.gro')



topology_1 = traj_1.topology
rmsf_all_atom_1 = get_rmsf_all_atom(traj_1, case_name_1)
rmsf_df_1, rmsf_mean_df_1 = get_backbone_rmsf_byres(topology_1, rmsf_all_atom_1)



topology_2 = traj_2.topology
rmsf_all_atom_2 = get_rmsf_all_atom(traj_2, case_name_2)
rmsf_df_2, rmsf_mean_df_2 = get_backbone_rmsf_byres(topology_2, rmsf_all_atom_2)


1_4XYJ_17-784-FLC_nowat_rmsf_all_atom.npy file saved
1_apo_4XYJ_17-784_nowat_rmsf_all_atom.npy file saved


In [15]:
diff = rmsd_diff(rmsf_mean_df_1, rmsf_mean_df_2, '../3_apo_4XYK-17-784_MD/1_apo_4XYK_17-784.pdb')

