In [None]:
import numpy as np
import MDAnalysis as mda
import pandas as pd
import matplotlib.pyplot as pp
import urllib.request
from MDAnalysis.analysis import distances
from IPython.display import Markdown, HTML
%matplotlib widget

# PDB Ids
Retrieve PDBs (done already, no need to do again unless changed)

In [None]:
download = False # True to downloadn
pdb_ids = '1mey 4qf3 6uei 6uej 2puy 6fi1 6fhq 5yc3'.split()
pdb_ids += '3t7l 3u9g 4q6f 3iuf 4bbq 5yc4 5y20'.split()
if download: 
    for pdb_id in pdb_ids:
        urllib.request.urlretrieve(
            f"https://files.rcsb.org/download/{pdb_id}.pdb",
            f"pdbs/{pdb_id}.pdb")

### Total no. of PDBs studied

In [None]:
len(pdb_ids)

In [None]:
unis = {}#{'1znf' : mda.Universe("input.pdb")}
for pdb_id in pdb_ids:
    unis[pdb_id] = mda.Universe(f"pdbs/{pdb_id}.pdb")

In [None]:
def nearby_residues(uni, r=3):
    zns = uni.select_atoms('name ZN')
    
    shells = {}
    
    for ind in zns.indices:
        shell = uni.select_atoms(f'same residue as (around {r} index {ind})')
        #residues = [f'{atom.resname}{atom.resid}' for atom in shell]
        
        shells[ind] = shell.residues
        
    return shells

# Find pdb ids with list of Zincs that match 1ZNF

By "match" here I mean that there are 2 HIS and 2 CYS within given radius

In [None]:
matching_zns1 = {}
matching_zns2 = {}

matching_zns = (matching_zns1, matching_zns2)
# Columns for DataFrame
df_data = dict(
    PDBid = [],
    Zn_sites = [],
    N4HC = [],
    N2H2C = [],
)

for pid, uni in unis.items():
    sel = uni.select_atoms('name ZN')
    display(Markdown(f"### {pid} : {sel.n_atoms} Zn sites "))
    
    df_data['PDBid'].append(pid.upper())
    df_data['Zn_sites'].append(sel.n_atoms)
    
    Ns = [0, 0]
    for ind, shell in nearby_residues(uni, r=3).items():
        his_count = np.count_nonzero(shell.resnames == 'HIS')
        cys_count = np.count_nonzero(shell.resnames == 'CYS')
        
        counts_match1 = his_count + cys_count == 4 
        counts_match2 = his_count==2 and cys_count==2
        

        counts_match = counts_match1 # 2 for exact residue match
        
        col = 'green' if counts_match else 'red'
        display(
            Markdown(f'  <span style="color:{col}"> {ind:5d} : {"--".join(shell.resnames)} </span>' )
        )
        
        for n, cm in enumerate((counts_match1, counts_match2)):
            if cm:
                Ns[n] += 1
                if pid not in matching_zns[n]:
                    matching_zns[n][pid] = []
                matching_zns[n][pid].append(ind)

    
    df_data['N4HC'].append(Ns[0])
    df_data['N2H2C'].append(Ns[1])

# Get minimum Zn-water distance for each matching binding site

In [None]:


label = 'Min H2O dist'

df_data[label] = np.ones(len(df_data['PDBid'])) * np.nan

df = pd.DataFrame(df_data)

for n, mz in enumerate(matching_zns):

    for pid, inds in mz.items():
        smallest_d = 9
        mind = 9
        display(Markdown(f"## {pid}"))
        for ind in inds:
            uni = unis[pid]
            zn = uni.select_atoms(f'index {ind}')
            waters = uni.select_atoms('resname HOH')
            dists = distances.distance_array(zn.positions, waters.positions)

            if len(waters.positions):
                mind = np.min(dists)
            else:
                print("No waters")
                continue

            if smallest_d > mind:
                smallest_d = mind

            if mind < 4: 
                col = 'red'
            else:
                col = 'white'
            display(Markdown(f' <span style="color:{col}"> {ind} : {mind:4.2f} </span>'))
        df[label][df.PDBid == pid.upper()] = smallest_d
        print(smallest_d)

In [None]:
df

In [None]:

#help(df.to_latex)
print(
df.to_latex(float_format="%4.2f", column_format='|c|c|c|c|c|', index=False)
)