### Attempt to make, new improved x-ray structure analyser

In [1]:
from typing import Tuple, Optional, List
import warnings
import itertools
import pandas as pd
import numpy as np


from MDAnalysis import Universe
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA
from MDAnalysis.analysis import contacts # TODO perhaps remove and use distances implementation?
from MDAnalysis.analysis import distances

  import xdrlib


In [2]:
# Amino acid definitions - helps define interaction types.
POSITIVE_SB_RESIDUES = ("LYS", "ARG")
NEGATIVE_SB_RESIDUES = ("GLU", "ASP")
HYDROPHOBIC_RESIDUES = ("ALA", "VAL", "LEU", "ILE", "PRO", "PHE", "CYS")
# CATION PI
# AROMATIC 
# OTHER INTERACTION TYPES. 
STANDARD_20_RESIDUES = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", 
                        "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", 
                        "THR", "TRP", "TYR", "VAL"]

### Test single frame analysis

In [3]:
parm_file = r"WT_PTP1B_Closed.pdb"
coord_file = r"WT_PTP1B_Closed.pdb"

from tools_proj.contacts.contact_analysis import single_frame_contact_analysis
result_no_topo = single_frame_contact_analysis(#topology_file=parm_file,
                              coordinates_file=coord_file,
                             out_file="test_file.txt",
                             first_res=1,
                             last_res=250)
result_no_topo

Sytem setup complete, identifying interactions now.
Analysis complete, the file test_file.txt has been written to disk
Time taken: 0:00:11.611758


['GLU1 LYS4 saltbridge sc-sc',
 'GLU1 GLU5 vdw mc-wc',
 'GLU1 SER241 vdw sc-wc',
 'MET2 PHE6 vdw mc-wc',
 'MET2 MET234 vdw sc-sc',
 'MET2 PRO240 vdw sc-mc',
 'MET2 VAL243 vdw sc-wc',
 'MET2 ILE245 vdw sc-sc',
 'GLU3 GLU7 vdw mc-wc',
 'LYS4 GLN8 vdw mc-wc',
 'GLU5 ILE9 vdw wc-wc',
 'GLU5 LYS246 saltbridge sc-sc',
 'PHE6 ASP10 vdw wc-wc',
 'PHE6 TRP15 pipi sc-sc',
 'GLU7 LYS11 vdw mc-wc',
 'GLN8 SER12 vdw mc-wc',
 'ILE9 GLY13 vdw mc-mc',
 'ILE9 SER14 vdw wc-wc',
 'ILE9 TRP15 vdw wc-wc',
 'ILE9 ILE18 vdw sc-sc',
 'ASP10 TRP15 vdw wc-sc',
 'SER14 ILE18 vdw wc-wc',
 'TRP15 TYR19 vdw mc-wc',
 'ALA16 GLN20 vdw mc-wc',
 'ALA17 ASP21 vdw mc-wc',
 'ILE18 ILE22 vdw wc-wc',
 'ILE18 LYS246 vdw sc-sc',
 'TYR19 ARG23 vdw wc-wc',
 'GLN20 HIP24 vdw mc-wc',
 'ASP21 GLU25 vdw wc-wc',
 'ILE22 ALA26 vdw mc-wc',
 'ILE22 LYS246 vdw sc-sc',
 'ILE22 LEU249 vdw sc-sc',
 'ILE22 LEU250 vdw wc-sc',
 'GLU25 LEU250 vdw wc-sc',
 'ALA26 LEU250 vdw wc-sc',
 'SER27 PHE51 vdw mc-sc',
 'PHE29 PHE51 pipi sc-sc',
 'PHE29 HI

In [4]:
result_yes_topo = single_frame_contact_analysis(topology_file=r"WT_PTP1B_Closed.pdb",
                              coordinates_file=r"WT_PTP1B_Closed.pdb",
                             out_file="test_file.txt",
                             first_res=1,
                             last_res=298)
result_yes_topo

Sytem setup complete, identifying interactions now.
Analysis complete, the file test_file.txt has been written to disk
Time taken: 0:00:15.819235


['GLU1 LYS4 saltbridge sc-sc',
 'GLU1 GLU5 vdw mc-wc',
 'GLU1 SER241 vdw sc-wc',
 'MET2 PHE6 vdw mc-wc',
 'MET2 MET234 vdw sc-sc',
 'MET2 PRO240 vdw sc-mc',
 'MET2 VAL243 vdw sc-wc',
 'MET2 ILE245 vdw sc-sc',
 'MET2 TYR270 vdw mc-sc',
 'MET2 VAL273 vdw sc-sc',
 'MET2 ILE274 vdw wc-wc',
 'GLU3 GLU7 vdw mc-wc',
 'GLU3 ILE274 vdw wc-sc',
 'LYS4 GLN8 vdw mc-wc',
 'GLU5 ILE9 vdw wc-wc',
 'GLU5 LYS246 saltbridge sc-sc',
 'GLU5 TYR270 vdw sc-sc',
 'PHE6 ASP10 vdw wc-wc',
 'PHE6 TRP15 pipi sc-sc',
 'PHE6 ARG267 vdw sc-wc',
 'PHE6 TYR270 pipi sc-sc',
 'PHE6 LEU271 vdw sc-sc',
 'PHE6 ILE274 vdw sc-sc',
 'GLU7 LYS11 vdw mc-wc',
 'GLN8 SER12 vdw mc-wc',
 'ILE9 GLY13 vdw mc-mc',
 'ILE9 SER14 vdw wc-wc',
 'ILE9 TRP15 vdw wc-wc',
 'ILE9 ILE18 vdw sc-sc',
 'ASP10 TRP15 vdw wc-sc',
 'ASP10 ARG267 saltbridge sc-sc',
 'SER14 ILE18 vdw wc-wc',
 'TRP15 TYR19 vdw mc-wc',
 'TRP15 ALA263 vdw wc-wc',
 'TRP15 LEU266 vdw sc-wc',
 'TRP15 ARG267 vdw sc-wc',
 'TRP15 TYR270 pipi sc-sc',
 'ALA16 GLN20 vdw mc-wc',
 'A

### Testing multiple frame analysis

In [5]:
from tools_proj.contacts.contact_analysis import multi_frame_contact_analysis

# # Using pdb as both 
# parm_file = r"4YFM_MAB-1_test_traj.pdb" 
# traj_file = r"4YFM_MAB-1_test_traj.pdb" # 5 frames. 

# results_pdb = multi_frame_contact_analysis(topology_file=parm_file,
#                              trajectory_file=traj_file,
#                              out_file="traj_test.txt",
#                              report_time_taken = True
# )
# results_pdb

In [6]:
# results_pdb_single = multi_frame_contact_analysis(#topology_file=parm_file,
#                              trajectory_file=traj_file,
#                              out_file="traj_test.txt",
#                              first_res=1,
#                              #last_res=282, 
#                              report_time_taken = True
# )
# results_pdb_single

In [7]:
# Using prmtop and traj...
parm_file = r"4YFM_MAB-1_apo.prmtop" 
traj_file = r"4YFM_MAB-1_test_traj.nc" # 5 frames. 

results_prmtop_traj = multi_frame_contact_analysis(topology_file=parm_file,
                             trajectory_file=traj_file,
                             out_file="doesn't matter now",
                             last_res=100, 
                             report_time_taken = True
)
results_prmtop_traj

Setup complete, identifying interactions now.
Time taken: 0:00:25.024709


Unnamed: 0,ALA1 LEU5 vdw mc-wc,ALA1 VAL29 vdw sc-sc,PRO2 ALA6 vdw mc-wc,PRO2 GLY30 vdw wc-mc,PRO2 HID31 vdw mc-sc,ASP3 SER7 vdw mc-mc,GLU4 LEU8 vdw mc-wc,LEU5 GLU9 vdw mc-wc,LEU5 ILE16 vdw wc-sc,LEU5 VAL18 vdw sc-sc,...,GLU9 ARG15 vdw sc-mc,ILE16 ARG32 vdw mc-wc,ASP22 GLY26 vdw wc-mc,THR42 PHE46 vdw mc-wc,LYS44 THR97 vdw wc-wc,LEU52 LEU62 vdw wc-sc,ARG55 LEU62 vdw sc-wc,LEU62 THR89 vdw mc-sc,MET88 LEU93 vdw wc-wc,ASP95 TYR100 vdw wc-sc
0,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,1,1,1,0,1,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,0,1,1,0,1,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,1,1,1,1,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,1,1,0,1,1,0,1,...,1,1,1,1,1,1,1,1,1,1


#### New form of pymol projections...

Draw links no longer works

In [8]:
mean_values = results_prmtop_traj.mean().to_dict()
print(mean_values)

{'ALA1 LEU5 vdw mc-wc': 0.6, 'ALA1 VAL29 vdw sc-sc': 1.0, 'PRO2 ALA6 vdw mc-wc': 1.0, 'PRO2 GLY30 vdw wc-mc': 0.6, 'PRO2 HID31 vdw mc-sc': 1.0, 'ASP3 SER7 vdw mc-mc': 0.2, 'GLU4 LEU8 vdw mc-wc': 1.0, 'LEU5 GLU9 vdw mc-wc': 1.0, 'LEU5 ILE16 vdw wc-sc': 0.2, 'LEU5 VAL18 vdw sc-sc': 1.0, 'LEU5 HID31 vdw wc-sc': 0.6, 'ALA6 LYS10 vdw mc-wc': 1.0, 'ALA6 HID31 vdw wc-sc': 1.0, 'SER7 ASP11 vdw mc-wc': 0.8, 'LEU8 PHE12 vdw mc-wc': 1.0, 'LEU8 ILE16 vdw sc-sc': 0.2, 'GLU9 GLY13 vdw mc-mc': 1.0, 'GLU9 GLY14 vdw wc-mc': 1.0, 'GLU9 ARG15 vdw sc-wc': 0.8, 'GLU9 ILE16 vdw wc-wc': 1.0, 'GLU9 HID31 vdw sc-sc': 1.0, 'GLU9 ARG32 saltbridge sc-sc': 1.0, 'PHE12 ILE16 vdw sc-sc': 1.0, 'ARG15 ARG32 vdw sc-sc': 0.6, 'ARG15 GLU35 saltbridge sc-sc': 1.0, 'ARG15 ARG36 vdw sc-mc': 1.0, 'ARG15 PHE37 cationpi sc-sc': 0.4, 'ILE16 HID31 vdw wc-wc': 0.6, 'ILE16 ARG32 vdw mc-sc': 0.8, 'ILE16 PHE37 vdw mc-sc': 1.0, 'GLY17 HID31 vdw mc-mc': 1.0, 'GLY17 ARG32 vdw mc-wc': 0.8, 'GLY17 PHE37 vdw mc-sc': 1.0, 'VAL18 GLY30 vdw 

In [9]:
import re
pymol_mean_values = {}

INTERACTION_TO_COLOR = {"saltbridge": "blue", "vdw": "green", "cationpi": "yellow", "pipi":"orange"}

res_res_scores, res_res_colors = {}, {}
for key, score in mean_values.items():
    res1, res2, interaction_type, residue_parts = key.split(" ")
    res1_numb = int(re.sub(r"[^0-9]", "", res1))
    res2_numb = int(re.sub(r"[^0-9]", "", res2))
    reformatted_key = (res1_numb, res2_numb)
    res_res_scores[reformatted_key] = score
    res_res_colors[reformatted_key] = INTERACTION_TO_COLOR[interaction_type]

print(res_res_colors)
print(res_res_scores)

{(1, 5): 'green', (1, 29): 'green', (2, 6): 'green', (2, 30): 'green', (2, 31): 'green', (3, 7): 'green', (4, 8): 'green', (5, 9): 'green', (5, 16): 'green', (5, 18): 'green', (5, 31): 'green', (6, 10): 'green', (6, 31): 'green', (7, 11): 'green', (8, 12): 'green', (8, 16): 'green', (9, 13): 'green', (9, 14): 'green', (9, 15): 'green', (9, 16): 'green', (9, 31): 'green', (9, 32): 'blue', (12, 16): 'green', (15, 32): 'green', (15, 35): 'blue', (15, 36): 'green', (15, 37): 'green', (16, 31): 'green', (16, 32): 'green', (16, 37): 'green', (17, 31): 'green', (17, 32): 'green', (17, 37): 'green', (18, 30): 'green', (18, 31): 'green', (18, 33): 'green', (19, 28): 'green', (19, 29): 'green', (19, 30): 'green', (19, 33): 'green', (20, 27): 'green', (20, 28): 'green', (20, 29): 'green', (21, 26): 'green', (21, 27): 'green', (21, 28): 'green', (22, 26): 'green', (22, 27): 'green', (32, 35): 'blue', (32, 37): 'green', (39, 43): 'green', (39, 46): 'green', (43, 47): 'green', (44, 48): 'green', (44

In [10]:
from tools_proj.pymol_projections import project_pymol_res_res_scores, project_pymol_per_res_scores, gen_per_res_scores

project_pymol_res_res_scores(res_res_scores=res_res_scores, 
                             res_res_colors=res_res_colors, 
                             out_file="test_pymol_out.py")

# no colors 
project_pymol_res_res_scores(res_res_scores=res_res_scores, 
                             out_file="test_pymol_out_no_colors.py")

The file: test_pymol_out.py was written to disk.
The file: test_pymol_out_no_colors.py was written to disk.


In [11]:
print(res_res_scores)

{(1, 5): 0.2, (1, 29): 1.0, (2, 6): 1.0, (2, 30): 0.4, (2, 31): 1.0, (3, 7): 0.8, (4, 8): 1.0, (5, 9): 1.0, (5, 16): 0.2, (5, 18): 1.0, (5, 31): 0.4, (6, 10): 1.0, (6, 31): 1.0, (7, 11): 0.2, (8, 12): 1.0, (8, 16): 0.6, (9, 13): 1.0, (9, 14): 1.0, (9, 15): 0.2, (9, 16): 1.0, (9, 31): 1.0, (9, 32): 1.0, (12, 16): 1.0, (15, 32): 0.4, (15, 35): 1.0, (15, 36): 1.0, (15, 37): 0.6, (16, 31): 0.4, (16, 32): 0.2, (16, 37): 1.0, (17, 31): 1.0, (17, 32): 0.8, (17, 37): 1.0, (18, 30): 0.2, (18, 31): 0.4, (18, 33): 1.0, (19, 28): 1.0, (19, 29): 0.2, (19, 30): 1.0, (19, 33): 1.0, (20, 27): 1.0, (20, 28): 1.0, (20, 29): 0.2, (21, 26): 0.8, (21, 27): 1.0, (21, 28): 0.2, (22, 26): 0.2, (22, 27): 0.4, (32, 35): 1.0, (32, 37): 1.0, (39, 43): 0.8, (39, 46): 0.4, (43, 47): 0.2, (44, 48): 1.0, (44, 97): 0.2, (44, 98): 0.4, (45, 49): 0.6, (45, 98): 1.0, (46, 50): 1.0, (47, 51): 1.0, (48, 52): 0.8, (48, 94): 1.0, (48, 97): 1.0, (48, 98): 1.0, (49, 53): 1.0, (50, 54): 1.0, (51, 55): 1.0, (51, 61): 0.6, (51, 9

In [12]:
per_res_scores = gen_per_res_scores(res_res_scores=res_res_scores)

In [13]:
project_pymol_per_res_scores(per_res_scores=per_res_scores, out_file="test_pymol_per_res_out.py")

The file: test_pymol_per_res_out.py was written to disk.


### (Side Chain) Hbonds

Can have a protocol if topology provided, but otherwise, this will work with standard 20 amino acids 
Could the expansion just be that I add the backbone names to each one, and then select from the whole aa and not from the side chain? 
Once a hydrogen bond found, can label it based on the atoms used perhaps?

Hbond defintion:
- up till 3.5 A, reducing between 2 and 3.5 good.
- 180 +- 45 degrees?, towards 180 = better.
- Could try to make a model that swaps between 0 and 1 for this? 

In [None]:
SC_HBOND_RES_ATOMS_ACCEPTOR = {"ASN": ["OD1"], 
                                "GLN": ["OE1"], 
                                "SER": ["OG"], 
                                "THR": ["OG1"], 
                                "TYR": ["OH"], 
                                "GLU": ["OE1", "OE2"], 
                                "GLU": ["OE1", "OE2"],
                                "ASP": ["OD1", "OD2"],
                                "ASH": ["OD1", "OD2"],
                                "HIE": ["ND1"],
                                "HID": ["NE2"]
}
# List of possible combinations? 
# Hydrogens that match the pair? 
SC_HBOND_RES_ATOMS_DONOR = {"ASN": [{"ND2": ["HD21", "HD22"]}], 
                            "ARG": [{"NE": ["HE"]}, {"NH1": ["HH11", "HH12"]}, {"NH2": ["HH21", "HH22"]}], 
                            "GLN": [{"NE2": ["HE21", "HE22"]}],
                            "SER": [{"OG": ["HG"]}],
                            "THR": [{"OG1": ["HG1"]}],
                            "TYR": [{"OH": ["HH"]}], 
                            "HIE": [{"NE2": ["HE2"]}], 
                            "HID": [{"ND1": ["HD1"]}], 
                            "HIP": [{"NE2": ["HE2"]}, {"ND1": ["HD1"]}],  
                            "LYS": [{"NZ": ["HZ1", "HZ2", "HZ3"]}], 
                            "LYN": [{"NZ": ["HZ2", "HZ3"]}]
}

# cutoffs. 
HB_DIST_CUTOFF = 3.5 # Donor-acceptor Distance
HB_ANGLE_IDEAL = 180 
HB_ANGLE_TOLERANCE = 45 #

In [None]:
parm_file = r"WT_PTP1B_Closed.pdb" 
traj_file = r"WT_PTP1B_Closed.pdb" 
universe = Universe(parm_file, traj_file) 

In [None]:
res1, res2 = 39, 63 # 39, 63 # Hbond, 
res1_mc_sele = "name C CA O N H and resid " + str(res1)
res1_sc_sele = "not name C CA O N H and resid " + str(res1)
res1_mc_atoms = universe.select_atoms(res1_mc_sele)
res1_sc_atoms = universe.select_atoms(res1_sc_sele)

res2_mc_sele = "name C CA O N H and resid " + str(res2)
res2_sc_sele = "not name C CA O N H and resid " + str(res2)
res2_mc_atoms = universe.select_atoms(res2_mc_sele)
res2_sc_atoms = universe.select_atoms(res2_sc_sele)

res1_name = universe.residues.resnames[res1-1] # 0-indexed
res2_name = universe.residues.resnames[res2-1] # 0-indexed
res1_name, res2_name

In [None]:
# Need to consider that if and elif can both be true, and need to test both! 


if (res1_name in SC_HBOND_RES_ATOMS_ACCEPTOR.keys()) and (res2_name in SC_HBOND_RES_ATOMS_DONOR.keys()):
    hb_potential_acceptor_atoms = SC_HBOND_RES_ATOMS_ACCEPTOR[res1_name]
    hb_potential_donor_h_pairs = SC_HBOND_RES_ATOMS_DONOR[res2_name]

    combinations = list(itertools.product(hb_potential_acceptor_atoms, hb_potential_donor_h_pairs))

    for acceptor, donor in combinations:
        acceptor_atom = res1_sc_atoms.select_atoms("name " + acceptor)
        donor_atom = res2_sc_atoms.select_atoms("name " + list(donor.keys())[0])

        # remove donor_acceptor that fail distance test. 
        d_a_dist = distances.distance_array(acceptor_atom.positions, donor_atom.positions)
        if d_a_dist > HB_DIST_CUTOFF:
            continue

        for hydrogen in list(donor.values())[0]:
            hydrogen_atom = res2_sc_atoms.select_atoms("name " + hydrogen)

            dh_vector = donor_atom.positions[0] - hydrogen_atom.positions[0]
            ha_vector = acceptor_atom.positions[0] - hydrogen_atom.positions[0] 
            dha_angle = angle_between_two_vectors(dh_vector, ha_vector) 

            delta_dha_ideal = min(
                np.abs(dha_angle - HB_ANGLE_IDEAL),
                np.abs(dha_angle - HB_ANGLE_IDEAL - 180)
            )
            if delta_dha_ideal < HB_ANGLE_TOLERANCE:
                print("H-bond found!")
                print(acceptor_atom.names, donor_atom.names, hydrogen_atom.names)

elif (res1_name in SC_HBOND_RES_ATOMS_DONOR.keys()) and (res2_name in SC_HBOND_RES_ATOMS_ACCEPTOR.keys()):
    hb_potential_donor_h_pairs = SC_HBOND_RES_ATOMS_DONOR[res1_name]
    hb_potential_acceptor_atoms = SC_HBOND_RES_ATOMS_ACCEPTOR[res2_name]
    # TODO, logic would be here, for follow up.
    # test_sc_hbond()

else: 
    print("no hbonds possible.")

### Hydrophobic

Requirements:
- 4 Å min distance.
- Between two "hydrophobic" residues 
- The atoms that contact must be non-polar (e.g. carbon).
- The atom should also be from the side-chain.

### Metals?
