In [1]:
import atomium
import numpy as np
import matplotlib as plt
%matplotlib inline 

In [8]:
pdb = atomium.fetch("1ADG") #<1ADG.cif File> atomium.data.File
pdb2 = atomium.fetch('5XME.pdb')

In [10]:
pdb.title

'CRYSTALLOGRAPHIC STUDIES OF TWO ALCOHOL DEHYDROGENASE-BOUND ANALOGS OF THIAZOLE-4-CARBOXAMIDE ADENINE DINUCLEOTIDE (TAD), THE ACTIVE ANABOLITE OF THE ANTITUMOR AGENT TIAZOFURIN'

In [15]:
# The basic structures within a model are chains, residues, ligands, and atoms
# Biological molecules are hierarchical, building from atoms to residues to chains to assemblies
pdb.model

<Model (1 chain, 3 ligands)>

In [11]:
for model in pdb.models:
        print(model.center_of_mass)

[ 4.89640762 15.08309274 24.40335099]


In [80]:
print(pdb.model.chains())
print("="*50)
print(len(pdb.model.residues()))
residues = pdb.model.residues() # {<Residue ALA (A.69)>, <Residue GLN (A.96)>,...
pdb.model.optimise_distances()


{<Chain A (374 residues)>}
374


In [41]:
pdb.model.ligands() #A small molecule, usually associated with a polymer chain.

{<Ligand SAD (A.378)>, <Ligand ZN (A.376)>, <Ligand ZN (A.377)>}

In [57]:
# These are the Zn+ ligands:
# class type: tomium.structures.Ligand
lig_a376 = pdb.model.ligand(id="A.376") 
lig_a377 = pdb.model.ligand(id="A.377") 

In [84]:
lig_a376.nearby_atoms(10.0) #does not specifcy what this distance is... Angstroms?
# {<Atom 1050 (CG)>,
# <Atom 1051 (CD1)>,
# <Atom 1052 (CD2)>,
# <Atom 1288 (CG)>,
# <Atom 1289 (CD1)>,...


{<Atom 1050 (CG)>,
 <Atom 1051 (CD1)>,
 <Atom 1052 (CD2)>,
 <Atom 1288 (CG)>,
 <Atom 1289 (CD1)>,
 <Atom 1290 (CD2)>,
 <Atom 1291 (CE1)>,
 <Atom 1292 (CE2)>,
 <Atom 1293 (CZ)>,
 <Atom 1299 (CB)>,
 <Atom 1300 (CG)>,
 <Atom 1301 (CD1)>,
 <Atom 1302 (CD2)>,
 <Atom 1314 (OG1)>,
 <Atom 1315 (CG2)>,
 <Atom 1317 (HG1)>,
 <Atom 1331 (OG1)>,
 <Atom 1334 (HG1)>,
 <Atom 1343 (CE1)>,
 <Atom 1345 (CZ)>,
 <Atom 1556 (C)>,
 <Atom 1557 (O)>,
 <Atom 1562 (CA)>,
 <Atom 1563 (C)>,
 <Atom 1564 (O)>,
 <Atom 1567 (CD1)>,
 <Atom 1570 (N)>,
 <Atom 1572 (C)>,
 <Atom 1573 (O)>,
 <Atom 1579 (N)>,
 <Atom 1580 (CA)>,
 <Atom 1581 (C)>,
 <Atom 1582 (O)>,
 <Atom 1583 (H)>,
 <Atom 1584 (N)>,
 <Atom 1585 (CA)>,
 <Atom 1586 (C)>,
 <Atom 1587 (O)>,
 <Atom 1588 (CB)>,
 <Atom 1589 (SG)>,
 <Atom 1590 (H)>,
 <Atom 1591 (N)>,
 <Atom 1592 (CA)>,
 <Atom 1593 (C)>,
 <Atom 1594 (O)>,
 <Atom 1595 (H)>,
 <Atom 1596 (N)>,
 <Atom 1597 (CA)>,
 <Atom 1598 (C)>,
 <Atom 1599 (O)>,
 <Atom 1607 (H)>,
 <Atom 1608 (N)>,
 <Atom 1609 (CA)>,
 <

In [120]:
# for our Zn ligand, let's find al the atoms within 10 angstrom radius

center_loc_lig_a376 = tuple(lig_a376.center_of_mass)
radius = 10.0 # Å 
atoms_in_lig_a376 = list(pdb.model.atoms_in_sphere(center_loc_lig_a376, radius))

In [171]:
# let's find the distance from the logand's center of mass and sort these atoms
# Sanity check, <Atom 3393 (ZN)> should be at the top of the list w/ a dist of 0.0

dist_to_lig_a376 = []
for idx, atom in enumerate(atoms_in_lig_a376):
    dist_to_lig_a376.append(
        {
            "atom": atom,
            "distance": atom.distance_to(center_loc_lig_a376),
            "is_metal": atom.is_metal,
            "atom_id": atom.id,
            "near_hets": list(atom.nearby_hets(cutoff=3))
        }
    )

sorted_dist_to_lig_a376 = sorted(dist_to_lig_a376, key=lambda d: d["distance"]) 
sorted_dist_to_lig_a376

# [{'atom': <Atom 3393 (ZN)>,
#   'distance': 0.0,
#   'is_metal': True,
#   'atom_id': 3393,
#   'near_hets': [<Water HOH (A.388)>,<Residue CYS (A.174)>,<Residue HIS (A.67)>,<Residue CYS (A.46)>]
#  },
#  {'atom': <Atom 3473 (O)>,
#   'distance': 1.6169613477136673,
#   'is_metal': False,
#   'atom_id': 3473,
#   'near_hets': [<Ligand ZN (A.376)>,<Residue CYS (A.174)>,<Residue SER (A.48)>,<Water HOH (A.391)>,<Residue HIS (A.67)>]
#  },
#    ...

[{'atom': <Atom 3393 (ZN)>,
  'distance': 0.0,
  'is_metal': True,
  'atom_id': 3393,
  'near_hets': [<Water HOH (A.388)>,
   <Residue CYS (A.174)>,
   <Residue HIS (A.67)>,
   <Residue CYS (A.46)>]},
 {'atom': <Atom 3473 (O)>,
  'distance': 1.6169613477136673,
  'is_metal': False,
  'atom_id': 3473,
  'near_hets': [<Ligand ZN (A.376)>,
   <Residue CYS (A.174)>,
   <Residue SER (A.48)>,
   <Water HOH (A.391)>,
   <Residue HIS (A.67)>]},
 {'atom': <Atom 609 (NE2)>,
  'distance': 1.9514922495362346,
  'is_metal': False,
  'atom_id': 609,
  'near_hets': [<Ligand ZN (A.376)>, <Water HOH (A.388)>]},
 {'atom': <Atom 1589 (SG)>,
  'distance': 2.007570920291484,
  'is_metal': False,
  'atom_id': 1589,
  'near_hets': [<Ligand ZN (A.376)>, <Water HOH (A.388)>]},
 {'atom': <Atom 3474 (H1)>,
  'distance': 2.4257592625815123,
  'is_metal': False,
  'atom_id': 3474,
  'near_hets': [<Ligand ZN (A.376)>,
   <Residue SER (A.48)>,
   <Water HOH (A.391)>]},
 {'atom': <Atom 3475 (H2)>,
  'distance': 2.487

In [169]:
sorted_dist_to_lig_a376[1]["atom"].nearby_hets(cutoff=3)

{<Ligand ZN (A.376)>,
 <Residue CYS (A.174)>,
 <Residue HIS (A.67)>,
 <Residue SER (A.48)>,
 <Water HOH (A.391)>}

In [129]:
# def angle(loc1, loc2):
#     vectors = [
#      [v1 - v2 for v1, v2 in zip(loc1, loc2)] for atom in (loc1, loc2)]
#     normalized = [np.linalg.norm(v) for v in vectors]
#     if 0 in normalized:
#         return 0
#     vectors = [v / n for v, n in zip(vectors, normalized)]
#     return np.arccos(np.clip(np.dot(vectors[0], vectors[1]), -1.0, 1.0))

notes:
Proteins are the major ligands for zinc(II) ions (“zinc”). Zinc can be readily available from proteins or not available at all unless the protein is degraded. In a way, the coordination chemistry of zinc in proteins bridges the one in natural waters, where zinc is in an available form, and the one in minerals, where zinc is unavailable without chemical processing. Zinc is essential for virtually all cellular functions through its presence in an estimated 3000 human proteins, the zinc sites of which have been predicted by bioinformatics approaches from the signature motifs in their protein sequences [1]. If one considers additional functions of zinc in regulation, the number of zinc proteins in the human zinc proteome will be even larger [2]. 

The coordination chemistry of zinc in proteins and peptides involves N, O, and S donors of the side chains of histidine, glutamate/aspartate, and/or cysteine with any permutation of these ligands and with the number of protein ligands ranging from three to six. The ligands may not stem from a single protein but from up to four proteins. One property regularly cited for zinc is its flexibility in coordination due to the lack of ligand field stabilization. This allows for dynamic coordination environments of zinc ions, which is critical for example in its catalytic prowess in enzymes when adopting different coordination numbers in interactions with substrates. 

A critical chemical issue that is important for the functions of proteins is their affinity towards zinc. Using a limited number of ligand donors and geometries, proteins must control and adjust affinities for zinc according to physiological functions. These functions are determined by the structural properties of zinc binding sites such as the presence of a second coordination sphere that engages in hydrogen bonding to ligands, including water molecules when present, geometric strains (entasis), pockets with different dielectric constants (hydrophobic cores), all of which modulate the properties of the bound zinc. For example, in zinc fingers with a ββα fold and in zinc-containing LIM domains, hydrogen bonds and hydrophobic and electrostatic interactions around the bound zinc affect binding and exchange kinetics [3], [4]. Such stabilization through interactions in the second coordination sphere, though not always directly obvious from inspection of the protein structure, occurs in both intra- or intermolecular zinc binding sites and is responsible for stabilizing some sites to affinities for zinc as high as femtomolar in zinc-binding domains such as the zinc hook [5]. A lack or a limited number of stabilizing secondary interactions also can have the opposite effect, namely decreasing affinities of proteins for zinc as a way of controlling their function [6]. 

Zinc coordination in proteins has been reviewed and catalogued extensively and hence is not the subject of this article. An important subject that has not been reviewed in the literature, however, is the biological coordination chemistry of zinc ions when not bound to proteins – the focus of this article. This subject matter is important with regard to the way zinc is controlled in biology and the functions of zinc as a signalling ion as discussed in many articles in this special issue.

a residue refers to a single unit that makes up a polymer (biopolymer: Starch, proteins and peptides, DNA, and RNA are all examples of biopolymers, in which the monomer units, respectively, are sugars, amino acids, and nucleotides.), such as an amino acid in a polypeptide or protein

(1) A molecule, ion or atom bonded to the central metal atom of a coordination compound (A substance consisting of atoms or ions of two or more elements that are chemically bonded together, e.g. carbon dioxide, a substance consisting of carbon and two oxygen atoms).
(2) Any substance (e.g. hormone, drug, functional group, etc.) that binds specifically and reversibly to another chemical entity to form a larger complex.


