<a href="https://colab.research.google.com/github/phenix-project/Colabs/blob/main/alphafold2/AlphaFold_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### <center> <b> <font color='black'>  AlphaFold development suite</font></b> </center>

<p><font color='green'> Instructions</p>

<p>A. SETUP:  Run cells 1-3 to set up or hit <b><i>Runtime/Run all</i></b> to run everything (5 min.)</p>

<p>B. DEVELOPMENT CYCLE:  </p>
<li> 1a. Either edit files in the alphafold directory, or </li>

 <li> 1b. Edit files in github and hit "Load current alphafold_working from github"</li>
 <li> 2. Run cell B below to run Alphafold (2 min.)</li>
</font>



In [None]:
#@markdown 1. Set up imports and load dependencies...this is the slow step
import os, sys
import os.path
import re
import hashlib
from pathlib import Path
from contextlib import redirect_stderr, redirect_stdout
from io import StringIO
from google.colab import files
import shutil
from string import ascii_uppercase

! echo "Installing biopython ..."
!  pip -q install biopython dm-haiku ml-collections py3Dmol

! echo "Downloading model parameters..."
!    rm -rf params
!    mkdir params
!    curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar  | tar x -C params

!echo "Downloading jq curl zlib1g gawk..."
!    apt-get -qq -y update 2>&1 1>/dev/null
!    apt-get -qq -y install jq curl zlib1g gawk 2>&1 1>/dev/null

! echo "Setting up conda..."
!    wget -qnc https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
!    bash Miniconda3-latest-Linux-x86_64.sh -bfp /usr/local 2>&1 1>/dev/null
!    rm Miniconda3-latest-Linux-x86_64.sh

! echo "Setting up template search methods..."

! conda install -y -q -c conda-forge -c bioconda kalign3=3.2.2 hhsuite=3.3.0 python=3.7 2>&1 1>/dev/null

! echo "Installing openmm..."
! conda install -qy -c omnia openmm 2>&1 1>/dev/null

! echo "Installing Mock..."
!  pip install mock
 
! echo "Setting paths to site-packages and dist-packages..."
sys.path.append("/usr/local/lib/python3.7/site-packages")
sys.path.append("/usr/local/lib/python3.7/dist-packages")
sys.path.append("/usr/local/lib/python3.7/site-packages/simtk/")

target_all_atom_positions = None # Initialize

! echo "Done with loading dependencies"

In [None]:
#@markdown 2. Load current alphafold_working from phenix-project github.  
#@markdown  This is quick...
#@markdown cycle back here after making a change in github alphafold_working
! echo "installing alphafold from https://github.com/phenix-project/af_development.git in /contents/alphafold"

! rm -rf af_development
! rm -rf alphafold
! rm -f colabfold.py


! git clone https://github.com/phenix-project/af_development.git --quiet
! (cd af_development; git checkout --quiet)
! mv af_development/alphafold alphafold
! mv af_development/colabfold.py .
! mv alphafold/run_alphafold.py .
! mv alphafold/run_alphafold_test.py .



!    # remove "END" from PDBs, otherwise biopython complains
!    sed -i "s/pdb_lines.append('END')//" /content/alphafold/common/protein.py
!    sed -i "s/pdb_lines.append('ENDMDL')//" /content/alphafold/common/protein.py


! echo "Ready with alphafold in /content/alphafold"
! ls -ltr /content/alphafold

! echo "clearing out /tmp/absl_testing/"
! rm -rf /tmp/absl_testing/
! mkdir /tmp/absl_testing/

! echo " Clearing python caches ..."
for x in list(sys.modules.keys(  )) + list(globals()):
  for key in ['alphafold','protein', 'Alphafold', 'Protein', 'colabfold']:
    if x.find(key)>-1:
      if x in list(sys.modules.keys()):
        
        del(sys.modules[x])
      if x in list(globals().keys()):
      
        del globals()[x]
        assert not x in list(globals().keys())
        break


if not os.environ['PYTHONPATH'].find(":/opt/conda/bin")>-1:
  os.environ['PYTHONPATH']+=":/opt/conda/bin"
  os.environ['PYTHONPATH']+=":/opt/conda/lib/python3.7/site-packages"
  os.environ['PYTHONPATH']+=":/usr/local/lib/python3.7/dist-packages"
! echo "`grep Version run_alphafold_test.py|grep -v Apache`"
! echo "Done loading current version."

In [None]:
#@markdown 3a. Run ProteinTest (optional, local code)

TEST_DATA_DIR = 'alphafold/common/testdata/'

from absl.testing import parameterized
from absl.testing import absltest
from alphafold.common import protein
import numpy as np
class ProteinTest(parameterized.TestCase):

  def _check_shapes(self, prot, num_res):
    """Check that the processed shapes are correct."""
    num_atoms = residue_constants.atom_type_num
    self.assertEqual((num_res, num_atoms, 3), prot.atom_positions.shape)
    self.assertEqual((num_res,), prot.aatype.shape)
    self.assertEqual((num_res, num_atoms), prot.atom_mask.shape)
    self.assertEqual((num_res,), prot.residue_index.shape)
    self.assertEqual((num_res, num_atoms), prot.b_factors.shape)
    print("Finished _check_shapes")

  @parameterized.parameters(('2rbg.pdb', 'A', 282),
                            ('2rbg.pdb', 'B', 282))
  def test_from_pdb_str(self, pdb_file, chain_id, num_res):
    pdb_file = os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR,
                            pdb_file)
    with open(pdb_file) as f:
      pdb_string = f.read()
    prot = protein.from_pdb_string(pdb_string, chain_id)
    self._check_shapes(prot, num_res)
    print("Total residues: %s" %(prot.aatype.shape))
    self.assertGreaterEqual(prot.aatype.min(), 0)
    # Allow equal since unknown restypes have index equal to restype_num.
    self.assertLessEqual(prot.aatype.max(), residue_constants.restype_num)
    print("Finished test_from_pdb_str")

  def test_to_pdb(self):
    with open(
        os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR,
                     '2rbg.pdb')) as f:
      pdb_string = f.read()
    prot = protein.from_pdb_string(pdb_string, chain_id='A')
    pdb_string_reconstr = protein.to_pdb(prot)
    prot_reconstr = protein.from_pdb_string(pdb_string_reconstr)
    print("Total residues: %s" %(prot.aatype.shape))

    np.testing.assert_array_equal(prot_reconstr.aatype, prot.aatype)
    np.testing.assert_array_almost_equal(
        prot_reconstr.atom_positions, prot.atom_positions)
    np.testing.assert_array_almost_equal(
        prot_reconstr.atom_mask, prot.atom_mask)
    np.testing.assert_array_equal(
        prot_reconstr.residue_index, prot.residue_index)
    np.testing.assert_array_almost_equal(
        prot_reconstr.b_factors, prot.b_factors)
    print("Finished test_to_pdb")

  def test_ideal_atom_mask(self):
    with open(
        os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR,
                     '2rbg.pdb')) as f:
      pdb_string = f.read()
    prot = protein.from_pdb_string(pdb_string, chain_id='A')
    print("Total residues: %s" %(prot.aatype.shape))
    
    ideal_mask = protein.ideal_atom_mask(prot)
    non_ideal_residues = set([102] + list(range(127, 285)))
    for i, (res, atom_mask) in enumerate(
        zip(prot.residue_index, prot.atom_mask)):
      if res in non_ideal_residues:
        self.assertFalse(np.all(atom_mask == ideal_mask[i]), msg=f'{res}')
      else:
        self.assertTrue(np.all(atom_mask == ideal_mask[i]), msg=f'{res}')
    print("Finished test_ideal_atom_mask")
    
t = ProteinTest()

t.test_to_pdb()
t.test_ideal_atom_mask()



In [None]:
#@markdown 3b. load target as <b><i>target_all_atom_positions</i></b> (optional)
text = """ATOM      1  N   PRO A   1     -11.409  -6.141   4.110  1.00  0.00           N  
ATOM      2  CA  PRO A   1     -10.216  -5.307   3.945  1.00  0.00           C  
ATOM      3  C   PRO A   1      -9.088  -6.028   3.211  1.00  0.00           C  
ATOM      4  CB  PRO A   1      -9.810  -4.984   5.386  1.00  0.00           C  
ATOM      5  O   PRO A   1      -8.908  -7.237   3.383  1.00  0.00           O  
ATOM      6  CG  PRO A   1     -11.029  -5.276   6.200  1.00  0.00           C  
ATOM      7  CD  PRO A   1     -11.803  -6.371   5.524  1.00  0.00           C  
ATOM      8  N   ILE A   2      -8.624  -5.196   2.203  1.00  0.00           N  
ATOM      9  CA  ILE A   2      -7.508  -5.766   1.457  1.00  0.00           C  
ATOM     10  C   ILE A   2      -6.253  -4.925   1.684  1.00  0.00           C  
ATOM     11  CB  ILE A   2      -7.825  -5.858  -0.053  1.00  0.00           C  
ATOM     12  O   ILE A   2      -6.294  -3.697   1.580  1.00  0.00           O  
ATOM     13  CG1 ILE A   2      -9.048  -6.751  -0.288  1.00  0.00           C  
ATOM     14  CG2 ILE A   2      -6.611  -6.375  -0.829  1.00  0.00           C  
ATOM     15  CD1 ILE A   2      -9.686  -6.580  -1.660  1.00  0.00           C  
ATOM     16  N   ALA A   3      -5.098  -5.598   2.117  1.00  0.00           N  
ATOM     17  CA  ALA A   3      -3.787  -4.962   2.224  1.00  0.00           C  
ATOM     18  C   ALA A   3      -2.829  -5.500   1.165  1.00  0.00           C  
ATOM     19  CB  ALA A   3      -3.205  -5.173   3.620  1.00  0.00           C  
ATOM     20  O   ALA A   3      -2.724  -6.714   0.972  1.00  0.00           O  
ATOM     21  N   GLN A   4      -2.268  -4.537   0.410  1.00  0.00           N  
ATOM     22  CA  GLN A   4      -1.195  -4.866  -0.522  1.00  0.00           C  
ATOM     23  C   GLN A   4       0.144  -4.323  -0.033  1.00  0.00           C  
ATOM     24  CB  GLN A   4      -1.507  -4.318  -1.916  1.00  0.00           C  
ATOM     25  O   GLN A   4       0.270  -3.130   0.249  1.00  0.00           O  
ATOM     26  CG  GLN A   4      -0.531  -4.778  -2.990  1.00  0.00           C  
ATOM     27  CD  GLN A   4      -0.914  -4.295  -4.376  1.00  0.00           C  
ATOM     28  NE2 GLN A   4       0.017  -4.399  -5.319  1.00  0.00           N  
ATOM     29  OE1 GLN A   4      -2.038  -3.834  -4.599  1.00  0.00           O  
ATOM     30  N   ILE A   5       1.091  -5.243   0.107  1.00  0.00           N  
ATOM     31  CA  ILE A   5       2.419  -4.888   0.594  1.00  0.00           C  
ATOM     32  C   ILE A   5       3.460  -5.179  -0.485  1.00  0.00           C  
ATOM     33  CB  ILE A   5       2.767  -5.648   1.893  1.00  0.00           C  
ATOM     34  O   ILE A   5       3.525  -6.294  -1.009  1.00  0.00           O  
ATOM     35  CG1 ILE A   5       1.727  -5.352   2.980  1.00  0.00           C  
ATOM     36  CG2 ILE A   5       4.176  -5.286   2.370  1.00  0.00           C  
ATOM     37  CD1 ILE A   5       1.491  -6.507   3.943  1.00  0.00           C  
ATOM     38  N   HIS A   6       4.161  -4.107  -0.848  1.00  0.00           N  
ATOM     39  CA  HIS A   6       5.333  -4.226  -1.707  1.00  0.00           C  
ATOM     40  C   HIS A   6       6.621  -4.179  -0.891  1.00  0.00           C  
ATOM     41  CB  HIS A   6       5.339  -3.118  -2.762  1.00  0.00           C  
ATOM     42  O   HIS A   6       6.845  -3.234  -0.132  1.00  0.00           O  
ATOM     43  CG  HIS A   6       4.114  -3.098  -3.618  1.00  0.00           C  
ATOM     44  CD2 HIS A   6       2.920  -2.483  -3.448  1.00  0.00           C  
ATOM     45  ND1 HIS A   6       4.031  -3.777  -4.815  1.00  0.00           N  
ATOM     46  CE1 HIS A   6       2.836  -3.578  -5.345  1.00  0.00           C  
ATOM     47  NE2 HIS A   6       2.142  -2.797  -4.536  1.00  0.00           N  
ATOM     48  N   ILE A   7       7.376  -5.238  -1.103  1.00  0.00           N  
ATOM     49  CA  ILE A   7       8.596  -5.333  -0.307  1.00  0.00           C  
ATOM     50  C   ILE A   7       9.727  -5.897  -1.163  1.00  0.00           C  
ATOM     51  CB  ILE A   7       8.384  -6.210   0.947  1.00  0.00           C  
ATOM     52  O   ILE A   7       9.479  -6.577  -2.162  1.00  0.00           O  
ATOM     53  CG1 ILE A   7       8.210  -7.680   0.548  1.00  0.00           C  
ATOM     54  CG2 ILE A   7       7.181  -5.716   1.756  1.00  0.00           C  
ATOM     55  CD1 ILE A   7       8.197  -8.644   1.726  1.00  0.00           C  
ATOM     56  N   LEU A   8      11.030  -5.490  -0.756  1.00  0.00           N  
ATOM     57  CA  LEU A   8      12.171  -6.057  -1.465  1.00  0.00           C  
ATOM     58  C   LEU A   8      12.239  -7.568  -1.266  1.00  0.00           C  
ATOM     59  CB  LEU A   8      13.475  -5.410  -0.989  1.00  0.00           C  
ATOM     60  O   LEU A   8      11.906  -8.072  -0.191  1.00  0.00           O  
ATOM     61  CG  LEU A   8      13.787  -4.022  -1.552  1.00  0.00           C  
ATOM     62  CD1 LEU A   8      14.831  -3.323  -0.688  1.00  0.00           C  
ATOM     63  CD2 LEU A   8      14.262  -4.125  -2.997  1.00  0.00           C  
ATOM     64  N   GLU A   9      12.811  -8.320  -2.263  1.00  0.00           N  
ATOM     65  CA  GLU A   9      13.011  -9.761  -2.139  1.00  0.00           C  
ATOM     66  C   GLU A   9      14.033 -10.085  -1.053  1.00  0.00           C  
ATOM     67  CB  GLU A   9      13.456 -10.359  -3.476  1.00  0.00           C  
ATOM     68  O   GLU A   9      14.870  -9.247  -0.712  1.00  0.00           O  
ATOM     69  CG  GLU A   9      12.400 -10.278  -4.568  1.00  0.00           C  
ATOM     70  CD  GLU A   9      12.889 -10.788  -5.915  1.00  0.00           C  
ATOM     71  OE1 GLU A   9      12.054 -10.999  -6.823  1.00  0.00           O  
ATOM     72  OE2 GLU A   9      14.117 -10.978  -6.062  1.00  0.00           O  
ATOM     73  N   GLY A  10      13.958 -11.362  -0.475  1.00  0.00           N  
ATOM     74  CA  GLY A  10      15.038 -11.766   0.412  1.00  0.00           C  
ATOM     75  C   GLY A  10      14.560 -12.144   1.801  1.00  0.00           C  
ATOM     76  O   GLY A  10      15.350 -12.593   2.634  1.00  0.00           O  
ATOM     77  N   ARG A  11      13.243 -11.969   2.022  1.00  0.00           N  
ATOM     78  CA  ARG A  11      12.689 -12.380   3.308  1.00  0.00           C  
ATOM     79  C   ARG A  11      12.360 -13.870   3.311  1.00  0.00           C  
ATOM     80  CB  ARG A  11      11.436 -11.566   3.639  1.00  0.00           C  
ATOM     81  O   ARG A  11      11.994 -14.432   2.276  1.00  0.00           O  
ATOM     82  CG  ARG A  11      11.660 -10.063   3.638  1.00  0.00           C  
ATOM     83  CD  ARG A  11      12.717  -9.649   4.653  1.00  0.00           C  
ATOM     84  NE  ARG A  11      12.844  -8.197   4.737  1.00  0.00           N  
ATOM     85  NH1 ARG A  11      13.821  -8.197   6.830  1.00  0.00           N  
ATOM     86  NH2 ARG A  11      13.432  -6.217   5.742  1.00  0.00           N  
ATOM     87  CZ  ARG A  11      13.365  -7.540   5.769  1.00  0.00           C  
ATOM     88  N   SER A  12      12.537 -14.507   4.482  1.00  0.00           N  
ATOM     89  CA  SER A  12      12.218 -15.926   4.601  1.00  0.00           C  
ATOM     90  C   SER A  12      10.712 -16.161   4.553  1.00  0.00           C  
ATOM     91  CB  SER A  12      12.791 -16.498   5.899  1.00  0.00           C  
ATOM     92  O   SER A  12       9.928 -15.241   4.795  1.00  0.00           O  
ATOM     93  OG  SER A  12      12.095 -15.986   7.023  1.00  0.00           O  
ATOM     94  N   ASP A  13      10.308 -17.403   4.283  1.00  0.00           N  
ATOM     95  CA  ASP A  13       8.895 -17.769   4.308  1.00  0.00           C  
ATOM     96  C   ASP A  13       8.280 -17.492   5.678  1.00  0.00           C  
ATOM     97  CB  ASP A  13       8.715 -19.243   3.939  1.00  0.00           C  
ATOM     98  O   ASP A  13       7.135 -17.044   5.771  1.00  0.00           O  
ATOM     99  CG  ASP A  13       8.907 -19.511   2.456  1.00  0.00           C  
ATOM    100  OD1 ASP A  13       9.036 -18.545   1.673  1.00  0.00           O  
ATOM    101  OD2 ASP A  13       8.927 -20.698   2.067  1.00  0.00           O  
ATOM    102  N   GLU A  14       9.064 -17.739   6.733  1.00  0.00           N  
ATOM    103  CA  GLU A  14       8.590 -17.507   8.094  1.00  0.00           C  
ATOM    104  C   GLU A  14       8.303 -16.028   8.335  1.00  0.00           C  
ATOM    105  CB  GLU A  14       9.612 -18.017   9.113  1.00  0.00           C  
ATOM    106  O   GLU A  14       7.303 -15.678   8.966  1.00  0.00           O  
ATOM    107  CG  GLU A  14       9.661 -19.534   9.229  1.00  0.00           C  
ATOM    108  CD  GLU A  14      10.769 -20.030  10.144  1.00  0.00           C  
ATOM    109  OE1 GLU A  14      10.869 -21.259  10.365  1.00  0.00           O  
ATOM    110  OE2 GLU A  14      11.543 -19.184  10.646  1.00  0.00           O  
ATOM    111  N   GLN A  15       9.145 -15.174   7.850  1.00  0.00           N  
ATOM    112  CA  GLN A  15       8.958 -13.734   7.998  1.00  0.00           C  
ATOM    113  C   GLN A  15       7.722 -13.259   7.240  1.00  0.00           C  
ATOM    114  CB  GLN A  15      10.195 -12.978   7.512  1.00  0.00           C  
ATOM    115  O   GLN A  15       6.952 -12.441   7.747  1.00  0.00           O  
ATOM    116  CG  GLN A  15      11.393 -13.093   8.445  1.00  0.00           C  
ATOM    117  CD  GLN A  15      12.655 -12.490   7.858  1.00  0.00           C  
ATOM    118  NE2 GLN A  15      13.251 -11.545   8.577  1.00  0.00           N  
ATOM    119  OE1 GLN A  15      13.091 -12.871   6.767  1.00  0.00           O  
ATOM    120  N   LYS A  16       7.491 -13.722   6.040  1.00  0.00           N  
ATOM    121  CA  LYS A  16       6.313 -13.361   5.257  1.00  0.00           C  
ATOM    122  C   LYS A  16       5.032 -13.826   5.945  1.00  0.00           C  
ATOM    123  CB  LYS A  16       6.399 -13.957   3.851  1.00  0.00           C  
ATOM    124  O   LYS A  16       4.041 -13.094   5.985  1.00  0.00           O  
ATOM    125  CG  LYS A  16       7.478 -13.333   2.979  1.00  0.00           C  
ATOM    126  CD  LYS A  16       7.598 -14.050   1.640  1.00  0.00           C  
ATOM    127  CE  LYS A  16       8.850 -13.621   0.886  1.00  0.00           C  
ATOM    128  NZ  LYS A  16       9.112 -14.497  -0.295  1.00  0.00           N  
ATOM    129  N   GLU A  17       5.119 -15.055   6.565  1.00  0.00           N  
ATOM    130  CA  GLU A  17       3.968 -15.564   7.305  1.00  0.00           C  
ATOM    131  C   GLU A  17       3.646 -14.677   8.504  1.00  0.00           C  
ATOM    132  CB  GLU A  17       4.218 -17.002   7.766  1.00  0.00           C  
ATOM    133  O   GLU A  17       2.480 -14.372   8.762  1.00  0.00           O  
ATOM    134  CG  GLU A  17       4.196 -18.023   6.637  1.00  0.00           C  
ATOM    135  CD  GLU A  17       4.512 -19.436   7.100  1.00  0.00           C  
ATOM    136  OE1 GLU A  17       4.548 -20.358   6.254  1.00  0.00           O  
ATOM    137  OE2 GLU A  17       4.724 -19.623   8.319  1.00  0.00           O  
ATOM    138  N   THR A  18       4.715 -14.348   9.171  1.00  0.00           N  
ATOM    139  CA  THR A  18       4.547 -13.489  10.338  1.00  0.00           C  
ATOM    140  C   THR A  18       3.968 -12.136   9.936  1.00  0.00           C  
ATOM    141  CB  THR A  18       5.884 -13.280  11.074  1.00  0.00           C  
ATOM    142  O   THR A  18       3.061 -11.623  10.595  1.00  0.00           O  
ATOM    143  CG2 THR A  18       5.726 -12.305  12.236  1.00  0.00           C  
ATOM    144  OG1 THR A  18       6.347 -14.538  11.579  1.00  0.00           O  
ATOM    145  N   LEU A  19       4.458 -11.542   8.841  1.00  0.00           N  
ATOM    146  CA  LEU A  19       3.947 -10.279   8.318  1.00  0.00           C  
ATOM    147  C   LEU A  19       2.454 -10.378   8.024  1.00  0.00           C  
ATOM    148  CB  LEU A  19       4.703  -9.879   7.048  1.00  0.00           C  
ATOM    149  O   LEU A  19       1.674  -9.525   8.455  1.00  0.00           O  
ATOM    150  CG  LEU A  19       4.181  -8.647   6.309  1.00  0.00           C  
ATOM    151  CD1 LEU A  19       4.241  -7.421   7.213  1.00  0.00           C  
ATOM    152  CD2 LEU A  19       4.978  -8.413   5.030  1.00  0.00           C  
ATOM    153  N   ILE A  20       1.998 -11.381   7.346  1.00  0.00           N  
ATOM    154  CA  ILE A  20       0.598 -11.568   6.979  1.00  0.00           C  
ATOM    155  C   ILE A  20      -0.258 -11.660   8.240  1.00  0.00           C  
ATOM    156  CB  ILE A  20       0.406 -12.830   6.108  1.00  0.00           C  
ATOM    157  O   ILE A  20      -1.302 -11.010   8.337  1.00  0.00           O  
ATOM    158  CG1 ILE A  20       1.013 -12.616   4.717  1.00  0.00           C  
ATOM    159  CG2 ILE A  20      -1.077 -13.197   6.009  1.00  0.00           C  
ATOM    160  CD1 ILE A  20       1.208 -13.900   3.922  1.00  0.00           C  
ATOM    161  N   ARG A  21       0.213 -12.392   9.252  1.00  0.00           N  
ATOM    162  CA  ARG A  21      -0.523 -12.561  10.501  1.00  0.00           C  
ATOM    163  C   ARG A  21      -0.692 -11.228  11.221  1.00  0.00           C  
ATOM    164  CB  ARG A  21       0.187 -13.563  11.413  1.00  0.00           C  
ATOM    165  O   ARG A  21      -1.804 -10.864  11.612  1.00  0.00           O  
ATOM    166  CG  ARG A  21      -0.487 -13.754  12.762  1.00  0.00           C  
ATOM    167  CD  ARG A  21       0.332 -14.650  13.681  1.00  0.00           C  
ATOM    168  NE  ARG A  21       1.698 -14.159  13.835  1.00  0.00           N  
ATOM    169  NH1 ARG A  21       1.554 -13.861  16.121  1.00  0.00           N  
ATOM    170  NH2 ARG A  21       3.500 -13.363  15.016  1.00  0.00           N  
ATOM    171  CZ  ARG A  21       2.247 -13.795  14.990  1.00  0.00           C  
ATOM    172  N   GLU A  22       0.339 -10.575  11.300  1.00  0.00           N  
ATOM    173  CA  GLU A  22       0.314  -9.334  12.068  1.00  0.00           C  
ATOM    174  C   GLU A  22      -0.524  -8.268  11.368  1.00  0.00           C  
ATOM    175  CB  GLU A  22       1.736  -8.817  12.299  1.00  0.00           C  
ATOM    176  O   GLU A  22      -1.261  -7.524  12.018  1.00  0.00           O  
ATOM    177  CG  GLU A  22       2.574  -9.706  13.206  1.00  0.00           C  
ATOM    178  CD  GLU A  22       2.029  -9.802  14.622  1.00  0.00           C  
ATOM    179  OE1 GLU A  22       2.154 -10.879  15.249  1.00  0.00           O  
ATOM    180  OE2 GLU A  22       1.471  -8.793  15.108  1.00  0.00           O  
ATOM    181  N   VAL A  23      -0.445  -8.088  10.042  1.00  0.00           N  
ATOM    182  CA  VAL A  23      -1.240  -7.137   9.271  1.00  0.00           C  
ATOM    183  C   VAL A  23      -2.724  -7.465   9.418  1.00  0.00           C  
ATOM    184  CB  VAL A  23      -0.839  -7.141   7.779  1.00  0.00           C  
ATOM    185  O   VAL A  23      -3.542  -6.575   9.662  1.00  0.00           O  
ATOM    186  CG1 VAL A  23      -1.863  -6.376   6.942  1.00  0.00           C  
ATOM    187  CG2 VAL A  23       0.555  -6.542   7.600  1.00  0.00           C  
ATOM    188  N   SER A  24      -3.051  -8.736   9.277  1.00  0.00           N  
ATOM    189  CA  SER A  24      -4.438  -9.165   9.419  1.00  0.00           C  
ATOM    190  C   SER A  24      -4.996  -8.789  10.787  1.00  0.00           C  
ATOM    191  CB  SER A  24      -4.557 -10.675   9.208  1.00  0.00           C  
ATOM    192  O   SER A  24      -6.109  -8.270  10.889  1.00  0.00           O  
ATOM    193  OG  SER A  24      -4.190 -11.029   7.886  1.00  0.00           O  
ATOM    194  N   GLU A  25      -4.246  -9.020  11.901  1.00  0.00           N  
ATOM    195  CA  GLU A  25      -4.669  -8.703  13.262  1.00  0.00           C  
ATOM    196  C   GLU A  25      -4.861  -7.200  13.444  1.00  0.00           C  
ATOM    197  CB  GLU A  25      -3.653  -9.231  14.278  1.00  0.00           C  
ATOM    198  O   GLU A  25      -5.826  -6.764  14.076  1.00  0.00           O  
ATOM    199  CG  GLU A  25      -3.688 -10.742  14.454  1.00  0.00           C  
ATOM    200  CD  GLU A  25      -2.557 -11.271  15.322  1.00  0.00           C  
ATOM    201  OE1 GLU A  25      -2.527 -12.490  15.604  1.00  0.00           O  
ATOM    202  OE2 GLU A  25      -1.694 -10.459  15.724  1.00  0.00           O  
ATOM    203  N   ALA A  26      -3.912  -6.464  12.862  1.00  0.00           N  
ATOM    204  CA  ALA A  26      -4.000  -5.010  12.969  1.00  0.00           C  
ATOM    205  C   ALA A  26      -5.280  -4.489  12.321  1.00  0.00           C  
ATOM    206  CB  ALA A  26      -2.778  -4.354  12.329  1.00  0.00           C  
ATOM    207  O   ALA A  26      -5.981  -3.656  12.900  1.00  0.00           O  
ATOM    208  N   ILE A  27      -5.602  -4.938  11.163  1.00  0.00           N  
ATOM    209  CA  ILE A  27      -6.771  -4.497  10.410  1.00  0.00           C  
ATOM    210  C   ILE A  27      -8.044  -4.921  11.138  1.00  0.00           C  
ATOM    211  CB  ILE A  27      -6.762  -5.060   8.972  1.00  0.00           C  
ATOM    212  O   ILE A  27      -8.971  -4.124  11.298  1.00  0.00           O  
ATOM    213  CG1 ILE A  27      -5.665  -4.386   8.140  1.00  0.00           C  
ATOM    214  CG2 ILE A  27      -8.134  -4.884   8.314  1.00  0.00           C  
ATOM    215  CD1 ILE A  27      -5.449  -5.018   6.772  1.00  0.00           C  
ATOM    216  N   SER A  28      -8.017  -6.210  11.569  1.00  0.00           N  
ATOM    217  CA  SER A  28      -9.183  -6.706  12.293  1.00  0.00           C  
ATOM    218  C   SER A  28      -9.457  -5.870  13.538  1.00  0.00           C  
ATOM    219  CB  SER A  28      -8.987  -8.171  12.685  1.00  0.00           C  
ATOM    220  O   SER A  28     -10.594  -5.458  13.778  1.00  0.00           O  
ATOM    221  OG  SER A  28     -10.055  -8.621  13.502  1.00  0.00           O  
ATOM    222  N   ARG A  29      -8.382  -5.567  14.321  1.00  0.00           N  
ATOM    223  CA  ARG A  29      -8.520  -4.829  15.572  1.00  0.00           C  
ATOM    224  C   ARG A  29      -8.911  -3.379  15.311  1.00  0.00           C  
ATOM    225  CB  ARG A  29      -7.220  -4.885  16.376  1.00  0.00           C  
ATOM    226  O   ARG A  29      -9.762  -2.825  16.010  1.00  0.00           O  
ATOM    227  CG  ARG A  29      -6.982  -6.217  17.070  1.00  0.00           C  
ATOM    228  CD  ARG A  29      -5.887  -6.116  18.124  1.00  0.00           C  
ATOM    229  NE  ARG A  29      -4.644  -5.597  17.561  1.00  0.00           N  
ATOM    230  NH1 ARG A  29      -3.741  -7.674  17.106  1.00  0.00           N  
ATOM    231  NH2 ARG A  29      -2.565  -5.773  16.601  1.00  0.00           N  
ATOM    232  CZ  ARG A  29      -3.653  -6.349  17.090  1.00  0.00           C  
ATOM    233  N   SER A  30      -8.421  -2.719  14.373  1.00  0.00           N  
ATOM    234  CA  SER A  30      -8.588  -1.292  14.119  1.00  0.00           C  
ATOM    235  C   SER A  30      -9.931  -1.002  13.456  1.00  0.00           C  
ATOM    236  CB  SER A  30      -7.452  -0.767  13.240  1.00  0.00           C  
ATOM    237  O   SER A  30     -10.555   0.026  13.728  1.00  0.00           O  
ATOM    238  OG  SER A  30      -6.195  -0.995  13.853  1.00  0.00           O  
ATOM    239  N   LEU A  31     -10.315  -1.956  12.656  1.00  0.00           N  
ATOM    240  CA  LEU A  31     -11.491  -1.658  11.846  1.00  0.00           C  
ATOM    241  C   LEU A  31     -12.677  -2.516  12.273  1.00  0.00           C  
ATOM    242  CB  LEU A  31     -11.191  -1.884  10.362  1.00  0.00           C  
ATOM    243  O   LEU A  31     -13.766  -2.403  11.705  1.00  0.00           O  
ATOM    244  CG  LEU A  31     -10.039  -1.069   9.772  1.00  0.00           C  
ATOM    245  CD1 LEU A  31      -9.845  -1.415   8.299  1.00  0.00           C  
ATOM    246  CD2 LEU A  31     -10.295   0.424   9.947  1.00  0.00           C  
ATOM    247  N   ASP A  32     -12.480  -3.295  13.366  1.00  0.00           N  
ATOM    248  CA  ASP A  32     -13.523  -4.186  13.865  1.00  0.00           C  
ATOM    249  C   ASP A  32     -14.075  -5.066  12.746  1.00  0.00           C  
ATOM    250  CB  ASP A  32     -14.654  -3.382  14.509  1.00  0.00           C  
ATOM    251  O   ASP A  32     -15.287  -5.273  12.653  1.00  0.00           O  
ATOM    252  CG  ASP A  32     -14.242  -2.717  15.811  1.00  0.00           C  
ATOM    253  OD1 ASP A  32     -13.464  -3.318  16.584  1.00  0.00           O  
ATOM    254  OD2 ASP A  32     -14.702  -1.584  16.069  1.00  0.00           O  
ATOM    255  N   ALA A  33     -13.247  -5.501  11.753  1.00  0.00           N  
ATOM    256  CA  ALA A  33     -13.587  -6.449  10.695  1.00  0.00           C  
ATOM    257  C   ALA A  33     -13.238  -7.877  11.105  1.00  0.00           C  
ATOM    258  CB  ALA A  33     -12.869  -6.080   9.399  1.00  0.00           C  
ATOM    259  O   ALA A  33     -12.279  -8.099  11.847  1.00  0.00           O  
ATOM    260  N   PRO A  34     -14.211  -8.860  10.694  1.00  0.00           N  
ATOM    261  CA  PRO A  34     -13.843 -10.252  10.960  1.00  0.00           C  
ATOM    262  C   PRO A  34     -12.511 -10.642  10.324  1.00  0.00           C  
ATOM    263  CB  PRO A  34     -14.997 -11.046  10.341  1.00  0.00           C  
ATOM    264  O   PRO A  34     -12.225 -10.248   9.191  1.00  0.00           O  
ATOM    265  CG  PRO A  34     -16.115 -10.062  10.219  1.00  0.00           C  
ATOM    266  CD  PRO A  34     -15.528  -8.691  10.047  1.00  0.00           C  
ATOM    267  N   LEU A  35     -11.697 -11.341  11.117  1.00  0.00           N  
ATOM    268  CA  LEU A  35     -10.372 -11.752  10.664  1.00  0.00           C  
ATOM    269  C   LEU A  35     -10.458 -12.500   9.338  1.00  0.00           C  
ATOM    270  CB  LEU A  35      -9.695 -12.632  11.718  1.00  0.00           C  
ATOM    271  O   LEU A  35      -9.622 -12.302   8.453  1.00  0.00           O  
ATOM    272  CG  LEU A  35      -8.199 -12.888  11.527  1.00  0.00           C  
ATOM    273  CD1 LEU A  35      -7.425 -11.575  11.584  1.00  0.00           C  
ATOM    274  CD2 LEU A  35      -7.685 -13.864  12.580  1.00  0.00           C  
ATOM    275  N   THR A  36     -11.484 -13.236   9.142  1.00  0.00           N  
ATOM    276  CA  THR A  36     -11.650 -14.092   7.972  1.00  0.00           C  
ATOM    277  C   THR A  36     -11.907 -13.256   6.722  1.00  0.00           C  
ATOM    278  CB  THR A  36     -12.805 -15.091   8.171  1.00  0.00           C  
ATOM    279  O   THR A  36     -11.801 -13.758   5.601  1.00  0.00           O  
ATOM    280  CG2 THR A  36     -12.556 -15.987   9.380  1.00  0.00           C  
ATOM    281  OG1 THR A  36     -14.026 -14.368   8.370  1.00  0.00           O  
ATOM    282  N   SER A  37     -12.284 -12.016   6.952  1.00  0.00           N  
ATOM    283  CA  SER A  37     -12.609 -11.134   5.836  1.00  0.00           C  
ATOM    284  C   SER A  37     -11.398 -10.309   5.413  1.00  0.00           C  
ATOM    285  CB  SER A  37     -13.765 -10.204   6.207  1.00  0.00           C  
ATOM    286  O   SER A  37     -11.464  -9.552   4.443  1.00  0.00           O  
ATOM    287  OG  SER A  37     -13.365  -9.275   7.199  1.00  0.00           O  
ATOM    288  N   VAL A  38     -10.164 -10.443   6.033  1.00  0.00           N  
ATOM    289  CA  VAL A  38      -8.942  -9.703   5.738  1.00  0.00           C  
ATOM    290  C   VAL A  38      -8.109 -10.469   4.713  1.00  0.00           C  
ATOM    291  CB  VAL A  38      -8.111  -9.447   7.015  1.00  0.00           C  
ATOM    292  O   VAL A  38      -7.862 -11.667   4.875  1.00  0.00           O  
ATOM    293  CG1 VAL A  38      -6.809  -8.723   6.677  1.00  0.00           C  
ATOM    294  CG2 VAL A  38      -8.924  -8.644   8.028  1.00  0.00           C  
ATOM    295  N   ARG A  39      -7.785  -9.817   3.628  1.00  0.00           N  
ATOM    296  CA  ARG A  39      -6.877 -10.344   2.615  1.00  0.00           C  
ATOM    297  C   ARG A  39      -5.572  -9.555   2.584  1.00  0.00           C  
ATOM    298  CB  ARG A  39      -7.537 -10.318   1.234  1.00  0.00           C  
ATOM    299  O   ARG A  39      -5.585  -8.322   2.604  1.00  0.00           O  
ATOM    300  CG  ARG A  39      -8.783 -11.183   1.132  1.00  0.00           C  
ATOM    301  CD  ARG A  39     -10.055 -10.358   1.274  1.00  0.00           C  
ATOM    302  NE  ARG A  39     -11.250 -11.196   1.215  1.00  0.00           N  
ATOM    303  NH1 ARG A  39     -12.712  -9.512   1.819  1.00  0.00           N  
ATOM    304  NH2 ARG A  39     -13.500 -11.622   1.392  1.00  0.00           N  
ATOM    305  CZ  ARG A  39     -12.484 -10.775   1.475  1.00  0.00           C  
ATOM    306  N   VAL A  40      -4.366 -10.278   2.533  1.00  0.00           N  
ATOM    307  CA  VAL A  40      -3.042  -9.672   2.445  1.00  0.00           C  
ATOM    308  C   VAL A  40      -2.305 -10.217   1.224  1.00  0.00           C  
ATOM    309  CB  VAL A  40      -2.214  -9.926   3.725  1.00  0.00           C  
ATOM    310  O   VAL A  40      -2.172 -11.433   1.063  1.00  0.00           O  
ATOM    311  CG1 VAL A  40      -0.837  -9.276   3.615  1.00  0.00           C  
ATOM    312  CG2 VAL A  40      -2.959  -9.405   4.953  1.00  0.00           C  
ATOM    313  N   ILE A  41      -1.860  -9.289   0.377  1.00  0.00           N  
ATOM    314  CA  ILE A  41      -1.082  -9.644  -0.805  1.00  0.00           C  
ATOM    315  C   ILE A  41       0.336  -9.095  -0.674  1.00  0.00           C  
ATOM    316  CB  ILE A  41      -1.745  -9.115  -2.097  1.00  0.00           C  
ATOM    317  O   ILE A  41       0.527  -7.895  -0.464  1.00  0.00           O  
ATOM    318  CG1 ILE A  41      -3.202  -9.583  -2.180  1.00  0.00           C  
ATOM    319  CG2 ILE A  41      -0.954  -9.560  -3.331  1.00  0.00           C  
ATOM    320  CD1 ILE A  41      -4.037  -8.828  -3.205  1.00  0.00           C  
ATOM    321  N   ILE A  42       1.307 -10.038  -0.774  1.00  0.00           N  
ATOM    322  CA  ILE A  42       2.708  -9.632  -0.749  1.00  0.00           C  
ATOM    323  C   ILE A  42       3.282  -9.671  -2.164  1.00  0.00           C  
ATOM    324  CB  ILE A  42       3.540 -10.531   0.192  1.00  0.00           C  
ATOM    325  O   ILE A  42       3.216 -10.702  -2.838  1.00  0.00           O  
ATOM    326  CG1 ILE A  42       3.018 -10.428   1.630  1.00  0.00           C  
ATOM    327  CG2 ILE A  42       5.025 -10.160   0.121  1.00  0.00           C  
ATOM    328  CD1 ILE A  42       3.658 -11.418   2.593  1.00  0.00           C  
ATOM    329  N   THR A  43       3.827  -8.465  -2.589  1.00  0.00           N  
ATOM    330  CA  THR A  43       4.564  -8.366  -3.844  1.00  0.00           C  
ATOM    331  C   THR A  43       6.037  -8.064  -3.583  1.00  0.00           C  
ATOM    332  CB  THR A  43       3.968  -7.279  -4.757  1.00  0.00           C  
ATOM    333  O   THR A  43       6.372  -7.015  -3.030  1.00  0.00           O  
ATOM    334  CG2 THR A  43       4.743  -7.175  -6.066  1.00  0.00           C  
ATOM    335  OG1 THR A  43       2.603  -7.602  -5.048  1.00  0.00           O  
ATOM    336  N   GLU A  44       6.866  -9.050  -3.960  1.00  0.00           N  
ATOM    337  CA  GLU A  44       8.306  -8.890  -3.784  1.00  0.00           C  
ATOM    338  C   GLU A  44       8.944  -8.243  -5.010  1.00  0.00           C  
ATOM    339  CB  GLU A  44       8.966 -10.242  -3.500  1.00  0.00           C  
ATOM    340  O   GLU A  44       8.702  -8.671  -6.141  1.00  0.00           O  
ATOM    341  CG  GLU A  44       8.492 -10.899  -2.211  1.00  0.00           C  
ATOM    342  CD  GLU A  44       9.274 -12.153  -1.854  1.00  0.00           C  
ATOM    343  OE1 GLU A  44      10.168 -12.082  -0.980  1.00  0.00           O  
ATOM    344  OE2 GLU A  44       8.991 -13.214  -2.454  1.00  0.00           O  
ATOM    345  N   MET A  45       9.772  -7.155  -4.671  1.00  0.00           N  
ATOM    346  CA  MET A  45      10.441  -6.409  -5.734  1.00  0.00           C  
ATOM    347  C   MET A  45      11.934  -6.717  -5.758  1.00  0.00           C  
ATOM    348  CB  MET A  45      10.219  -4.906  -5.557  1.00  0.00           C  
ATOM    349  O   MET A  45      12.582  -6.752  -4.711  1.00  0.00           O  
ATOM    350  CG  MET A  45       8.755  -4.510  -5.454  1.00  0.00           C  
ATOM    351  SD  MET A  45       7.822  -4.861  -6.995  1.00  0.00           S  
ATOM    352  CE  MET A  45       8.732  -3.815  -8.166  1.00  0.00           C  
ATOM    353  N   ALA A  46      13.193  -6.145  -6.313  1.00  0.00           N  
ATOM    354  CA  ALA A  46      13.815  -7.208  -7.098  1.00  0.00           C  
ATOM    355  C   ALA A  46      15.256  -6.855  -7.454  1.00  0.00           C  
ATOM    356  CB  ALA A  46      13.007  -7.476  -8.365  1.00  0.00           C  
ATOM    357  O   ALA A  46      15.576  -5.686  -7.688  1.00  0.00           O  
ATOM    358  N   LYS A  47      16.140  -7.889  -7.544  1.00  0.00           N  
ATOM    359  CA  LYS A  47      17.554  -7.684  -7.842  1.00  0.00           C  
ATOM    360  C   LYS A  47      17.738  -6.994  -9.190  1.00  0.00           C  
ATOM    361  CB  LYS A  47      18.304  -9.019  -7.830  1.00  0.00           C  
ATOM    362  O   LYS A  47      18.695  -6.239  -9.380  1.00  0.00           O  
ATOM    363  CG  LYS A  47      18.441  -9.640  -6.448  1.00  0.00           C  
ATOM    364  CD  LYS A  47      19.276 -10.913  -6.488  1.00  0.00           C  
ATOM    365  CE  LYS A  47      19.559 -11.441  -5.088  1.00  0.00           C  
ATOM    366  NZ  LYS A  47      20.466 -12.627  -5.116  1.00  0.00           N  
ATOM    367  N   GLY A  48      16.855  -7.083 -10.084  1.00  0.00           N  
ATOM    368  CA  GLY A  48      16.916  -6.450 -11.391  1.00  0.00           C  
ATOM    369  C   GLY A  48      16.592  -4.968 -11.352  1.00  0.00           C  
ATOM    370  O   GLY A  48      16.800  -4.255 -12.336  1.00  0.00           O  
ATOM    371  N   HIS A  49      16.041  -4.501 -10.314  1.00  0.00           N  
ATOM    372  CA  HIS A  49      15.590  -3.119 -10.194  1.00  0.00           C  
ATOM    373  C   HIS A  49      16.466  -2.337  -9.221  1.00  0.00           C  
ATOM    374  CB  HIS A  49      14.129  -3.069  -9.742  1.00  0.00           C  
ATOM    375  O   HIS A  49      16.134  -1.207  -8.852  1.00  0.00           O  
ATOM    376  CG  HIS A  49      13.193  -3.797 -10.654  1.00  0.00           C  
ATOM    377  CD2 HIS A  49      13.293  -4.104 -11.969  1.00  0.00           C  
ATOM    378  ND1 HIS A  49      11.983  -4.299 -10.230  1.00  0.00           N  
ATOM    379  CE1 HIS A  49      11.375  -4.887 -11.248  1.00  0.00           C  
ATOM    380  NE2 HIS A  49      12.150  -4.782 -12.315  1.00  0.00           N  
ATOM    381  N   PHE A  50      17.440  -3.058  -8.677  1.00  0.00           N  
ATOM    382  CA  PHE A  50      18.379  -2.423  -7.762  1.00  0.00           C  
ATOM    383  C   PHE A  50      19.778  -2.377  -8.367  1.00  0.00           C  
ATOM    384  CB  PHE A  50      18.411  -3.165  -6.422  1.00  0.00           C  
ATOM    385  O   PHE A  50      20.292  -3.396  -8.832  1.00  0.00           O  
ATOM    386  CG  PHE A  50      18.982  -2.352  -5.292  1.00  0.00           C  
ATOM    387  CD2 PHE A  50      20.337  -2.411  -4.990  1.00  0.00           C  
ATOM    388  CD1 PHE A  50      18.163  -1.530  -4.530  1.00  0.00           C  
ATOM    389  CE2 PHE A  50      20.868  -1.660  -3.944  1.00  0.00           C  
ATOM    390  CE1 PHE A  50      18.687  -0.775  -3.484  1.00  0.00           C  
ATOM    391  CZ  PHE A  50      20.039  -0.843  -3.191  1.00  0.00           C  
ATOM    392  N   GLY A  51      20.475  -1.100  -8.425  1.00  0.00           N  
ATOM    393  CA  GLY A  51      21.814  -0.921  -8.962  1.00  0.00           C  
ATOM    394  C   GLY A  51      22.847  -0.604  -7.897  1.00  0.00           C  
ATOM    395  O   GLY A  51      22.519  -0.025  -6.859  1.00  0.00           O  
ATOM    396  N   ILE A  52      23.988  -1.240  -7.995  1.00  0.00           N  
ATOM    397  CA  ILE A  52      25.144  -0.965  -7.149  1.00  0.00           C  
ATOM    398  C   ILE A  52      26.363  -0.672  -8.019  1.00  0.00           C  
ATOM    399  CB  ILE A  52      25.437  -2.142  -6.192  1.00  0.00           C  
ATOM    400  O   ILE A  52      26.789  -1.520  -8.808  1.00  0.00           O  
ATOM    401  CG1 ILE A  52      24.203  -2.460  -5.341  1.00  0.00           C  
ATOM    402  CG2 ILE A  52      26.647  -1.829  -5.306  1.00  0.00           C  
ATOM    403  CD1 ILE A  52      24.247  -3.829  -4.674  1.00  0.00           C  
ATOM    404  N   GLY A  53      26.822   0.546  -7.917  1.00  0.00           N  
ATOM    405  CA  GLY A  53      28.010   0.900  -8.677  1.00  0.00           C  
ATOM    406  C   GLY A  53      27.798   0.838 -10.178  1.00  0.00           C  
ATOM    407  O   GLY A  53      28.698   0.442 -10.921  1.00  0.00           O  
ATOM    408  N   GLY A  54      26.582   1.121 -10.680  1.00  0.00           N  
ATOM    409  CA  GLY A  54      26.275   1.210 -12.100  1.00  0.00           C  
ATOM    410  C   GLY A  54      25.851  -0.115 -12.703  1.00  0.00           C  
ATOM    411  O   GLY A  54      25.694  -0.227 -13.921  1.00  0.00           O  
ATOM    412  N   GLU A  55      25.752  -1.240 -11.919  1.00  0.00           N  
ATOM    413  CA  GLU A  55      25.349  -2.573 -12.360  1.00  0.00           C  
ATOM    414  C   GLU A  55      24.163  -3.086 -11.549  1.00  0.00           C  
ATOM    415  CB  GLU A  55      26.521  -3.552 -12.255  1.00  0.00           C  
ATOM    416  O   GLU A  55      24.083  -2.853 -10.341  1.00  0.00           O  
ATOM    417  CG  GLU A  55      27.742  -3.138 -13.063  1.00  0.00           C  
ATOM    418  CD  GLU A  55      27.560  -3.319 -14.562  1.00  0.00           C  
ATOM    419  OE1 GLU A  55      28.407  -2.826 -15.341  1.00  0.00           O  
ATOM    420  OE2 GLU A  55      26.559  -3.957 -14.960  1.00  0.00           O  
ATOM    421  N   LEU A  56      23.248  -3.749 -12.365  1.00  0.00           N  
ATOM    422  CA  LEU A  56      22.143  -4.379 -11.651  1.00  0.00           C  
ATOM    423  C   LEU A  56      22.659  -5.363 -10.608  1.00  0.00           C  
ATOM    424  CB  LEU A  56      21.211  -5.097 -12.631  1.00  0.00           C  
ATOM    425  O   LEU A  56      23.666  -6.040 -10.831  1.00  0.00           O  
ATOM    426  CG  LEU A  56      19.812  -5.434 -12.113  1.00  0.00           C  
ATOM    427  CD1 LEU A  56      18.849  -4.288 -12.405  1.00  0.00           C  
ATOM    428  CD2 LEU A  56      19.310  -6.733 -12.735  1.00  0.00           C  
ATOM    429  N   ALA A  57      21.932  -5.329  -9.460  1.00  0.00           N  
ATOM    430  CA  ALA A  57      22.301  -6.256  -8.393  1.00  0.00           C  
ATOM    431  C   ALA A  57      22.127  -7.704  -8.840  1.00  0.00           C  
ATOM    432  CB  ALA A  57      21.470  -5.984  -7.142  1.00  0.00           C  
ATOM    433  O   ALA A  57      22.780  -8.607  -8.312  1.00  0.00           O  
ATOM    434  N   SER A  58      21.352  -7.945  -9.811  1.00  0.00           N  
ATOM    435  CA  SER A  58      21.128  -9.283 -10.348  1.00  0.00           C  
ATOM    436  C   SER A  58      22.361  -9.797 -11.084  1.00  0.00           C  
ATOM    437  CB  SER A  58      19.924  -9.286 -11.292  1.00  0.00           C  
ATOM    438  O   SER A  58      22.508 -11.004 -11.289  1.00  0.00           O  
ATOM    439  OG  SER A  58      20.180  -8.493 -12.438  1.00  0.00           O  
ATOM    440  N   LYS A  59      23.255  -8.825 -11.617  1.00  0.00           N  
ATOM    441  CA  LYS A  59      24.437  -9.196 -12.389  1.00  0.00           C  
ATOM    442  C   LYS A  59      25.618  -9.500 -11.472  1.00  0.00           C  
ATOM    443  CB  LYS A  59      24.808  -8.085 -13.372  1.00  0.00           C  
ATOM    444  O   LYS A  59      26.409 -10.405 -11.750  1.00  0.00           O  
ATOM    445  CG  LYS A  59      23.701  -7.734 -14.356  1.00  0.00           C  
ATOM    446  CD  LYS A  59      24.150  -6.662 -15.342  1.00  0.00           C  
ATOM    447  CE  LYS A  59      23.065  -6.353 -16.365  1.00  0.00           C  
ATOM    448  NZ  LYS A  59      23.509  -5.319 -17.347  1.00  0.00           N
"""
from alphafold.common import protein
prot = protein.from_pdb_string(text)
target_all_atom_positions = prot.atom_positions
print("Target all_atom_positions:", type(target_all_atom_positions))


In [None]:
#@markdown 3c. test lddt (optional, code in repository)


from alphafold.model.all_atom_test import AllAtomTest
aat = AllAtomTest()

aat.test_frame_aligned_point_error_perfect_on_global_transform_rot_174_trans_1()


In [None]:
#@markdown 3d. Run run_alphafold_test (optional, code in repository)
from run_alphafold_test import RunAlphafoldTest
r= RunAlphafoldTest()
r.test_end_to_end()

In [None]:
#@markdown 3e. RunAlphafoldTest (optional, local code)
import os

from absl.testing import absltest
from absl.testing import parameterized
from run_alphafold import predict_structure
from unittest import mock
import numpy as np
import run_alphafold
# Internal import (7716).


class RunAlphafoldTest(parameterized.TestCase):

  def test_end_to_end(self):

    data_pipeline_mock = mock.Mock()
    model_runner_mock = mock.Mock()
    amber_relaxer_mock = mock.Mock()

    data_pipeline_mock.process.return_value = {}
    model_runner_mock.process_features.return_value = {
        'aatype': np.zeros((12, 10), dtype=np.int32),
        'residue_index': np.tile(np.arange(10, dtype=np.int32)[None], (12, 1)),
    }
    model_runner_mock.predict.return_value = {
        'structure_module': {
            'final_atom_positions': np.zeros((10, 37, 3)),
            'final_atom_mask': np.ones((10, 37)),
        },
        'predicted_lddt': {
            'logits': np.ones((10, 50)),
        },
        'plddt': np.ones(10) * 42,
        'ptm': np.array(0.),
        'aligned_confidence_probs': np.zeros((10, 10, 50)),
        'predicted_aligned_error': np.zeros((10, 10)),
        'max_predicted_aligned_error': np.array(0.),
    }
    amber_relaxer_mock.process.return_value = ('RELAXED', None, None)

    fasta_path = os.path.join(absltest.get_default_test_tmpdir(),
                              'target.fasta')
    with open(fasta_path, 'wt') as f:
      f.write('>A\nAAAAAAAAAAAAA')
    fasta_name = 'test'

    out_dir = absltest.get_default_test_tmpdir()

    run_alphafold.predict_structure(
        fasta_path=fasta_path,
        fasta_name=fasta_name,
        output_dir_base=out_dir,
        data_pipeline=data_pipeline_mock,
        model_runners={'model1': model_runner_mock},
        amber_relaxer=amber_relaxer_mock,
        benchmark=False,
        random_seed=0)

    base_output_files = os.listdir(out_dir)
    self.assertIn('target.fasta', base_output_files)
    self.assertIn('test', base_output_files)


    target_output_files = os.listdir(os.path.join(out_dir, 'test'))
    self.assertCountEqual(
        ['features.pkl', 'msas', 'ranked_0.pdb', 'ranking_debug.json',
         'relaxed_model1.pdb', 'result_model1.pkl', 'timings.json',
         'unrelaxed_model1.pdb'], target_output_files)

    # Check that pLDDT is set in the B-factor column.
    with open(os.path.join(out_dir, 'test', 'unrelaxed_model1.pdb')) as f:
      for line in f:
        if line.startswith('ATOM'):
          self.assertEqual(line[61:66], '42.00')
    print("End-to-end completed successfully")      

r= RunAlphafoldTest()
r.test_end_to_end()


In [None]:
#@markdown Upload files to /content/alphafold/model/ .
#@markdown Check box to select and upload files.
upload_files = False #@param {type:"boolean"}
if upload_files:
  uploaded = files.upload()
  for filename,contents in uploaded.items():
   filepath = Path("/content/alphafold/model",filename)
   with filepath.open("w") as fh:
        fh.write(contents.decode("UTF-8"))
   print("Uploaded to %s" %(filepath))

In [None]:
#@markdown Optional code goes here. Type in anything, check box, and run cell
run_optional_code = False #@param {type:"boolean"}

! # use ! at start of line to indicate bash command
#   use any python command as-is
#   comment out commands you don't want to run
! # mv /content/folding.py /content/alphafold/model/
! # mv /content/modules.py /content/alphafold/model/


In [None]:
# USER INPUT SECTION

# IMPORTS, STANDARD PARAMETERS AND METHODS

import os, sys
import os.path
import re
import hashlib
from pathlib import Path
from contextlib import redirect_stderr, redirect_stdout
from io import StringIO
from google.colab import files
import shutil
from string import ascii_uppercase

# Local methods

def add_hash(x,y):
  return x+"_"+hashlib.sha1(y.encode()).hexdigest()[:5]

def clear_directories(all_dirs):

  for d in all_dirs:
    if d.exists():
      shutil.rmtree(d)
    d.mkdir(parents=True)


def clean_query(query_sequence):
  query_sequence = "".join(query_sequence.split())
  query_sequence = re.sub(r'[^a-zA-Z]','', query_sequence).upper()
  return query_sequence

def clean_jobname(jobname):
  jobname = "".join(jobname.split())
  jobname = re.sub(r'\W+', '', jobname)
  if len(jobname.split("_")) == 1:
    jobname = add_hash(jobname, query_sequence)
  return jobname

def save_sequence(jobname, query_sequence):
  # save sequence as text file
  filename = f"{jobname}.fasta"
  with open(filename, "w") as text_file:
    text_file.write(">1\n%s" % query_sequence)
  print("Saved sequence in %s: %s" %(filename, query_sequence))

def upload_templates(cif_dir):
  manual_templates_uploaded = []
  with redirect_stdout(StringIO()) as out:
    uploaded = files.upload()
    for filename,contents in uploaded.items():
      if str(filename).endswith(".pdb"):
        continue
      filepath = Path(cif_dir,filename)
      with filepath.open("w") as fh:
        fh.write(contents.decode("UTF-8"))
        manual_templates_uploaded.append(filepath)
  print("Templates uploaded: %s" %(manual_templates_uploaded))   
  if not manual_templates_uploaded:
    print("\n*** WARNING: no templates uploaded...Please use only .cif files ***\n")
  return manual_templates_uploaded

def get_jobnames_sequences_from_file(
    upload_manual_templates = None, cif_dir = None):
  from io import StringIO
  from google.colab import files
  print("Upload file with one jobname, a space and one sequence on each line")

  uploaded = files.upload()
  s = StringIO()
  query_sequences = []
  jobnames = []
  cif_filename_dict = {}
  for filename,contents in uploaded.items():
    print(contents.decode("UTF-8"), file = s)
    text = s.getvalue()
    for line in text.splitlines():
      spl = line.split()
      if len(spl) < 2:
        pass # empty line
      else: # usual
        jobname = spl[0]
        query_sequence = "".join(spl[1:])
        jobname = clean_jobname(jobname)
        query_sequence = clean_query(query_sequence)

        if jobname in jobnames:
          pass # already there
        else:
          query_sequences.append(query_sequence)
          jobnames.append(jobname)
          if upload_manual_templates:
            print("\nPlease upload CIF template for %s" %(jobname))
            sys.stdout.flush()
            cif_filename_dict[jobname] = upload_templates(cif_dir)
  return jobnames, query_sequences, cif_filename_dict

# Set working directory
os.chdir("/content/")

# Clear out directories
parent_dir = Path("/content/manual_templates")
cif_dir = Path(parent_dir,"mmcif")

# GET INPUTS

#@title A. Enter sequence and jobname. Load with <i><b>Run</b></i> button to left.
#@markdown <b><i><font color=green>Protein sequence and job name</font></i></b>

query_sequence = 'asdfhiertpymna' #@param {type:"string"}
jobname = 'test' #@param {type:"string"}

templates_to_use = "None"  

upload_manual_templates = False 
include_templates_from_pdb = False

disable_jit = True #@param {type:"boolean"}
number_of_ensembles = 1 #@param {type:"integer"}
af_iterations = 0 #@param {type:"integer"}
include_target_all_atom_positions_loaded_above = True #@param {type:"boolean"}
if target_all_atom_positions is not None:
  if include_target_all_atom_positions_loaded_above:
    print("Including target_all_atom_positions as target")
  else:
    target_all_atom_positions = None
if upload_manual_templates:
  print("Templates will be uploaded")
if include_templates_from_pdb:
  print("Templates from the PDB will be included")

upload_file_with_jobname_space_sequence_lines = False 
maximum_templates_from_pdb = 20 
clear_saved_sequences_and_jobnames = True 

# Initialize query_sequences so we can loop through input
if clear_saved_sequences_and_jobnames or (
     not locals().get('query_sequences', None)):
  query_sequences = []
  jobnames = []
  cif_filename_dict = {}
  clear_directories([parent_dir,cif_dir])
del locals()['clear_saved_sequences_and_jobnames'] # so it updates

if upload_file_with_jobname_space_sequence_lines:
  del locals()['upload_file_with_jobname_space_sequence_lines'] # so it updates
  jobnames, query_sequences, cif_filename_dict = \
    get_jobnames_sequences_from_file(
        upload_manual_templates = upload_manual_templates,
        cif_dir = cif_dir)
else: # usual
  jobname = clean_jobname(jobname)
  query_sequence = clean_query(query_sequence)
  if query_sequence and not jobname:
    print("Please enter a job name and rerun")
    raise AssertionError("Please enter a job name and rerun")

  if jobname and not query_sequence:
    print("Please enter a query_sequence and rerun")
    raise AssertionError("Please enter a query_sequence rerun")
  
  # Add sequence and jobname if new
  if (jobname and query_sequence) and (
       not query_sequence in query_sequences) and (
       not jobname in jobnames):
      query_sequences.append(query_sequence)
      jobnames.append(jobname)
      if upload_manual_templates:
        print("\nPlease upload template for %s" %(jobname))
        sys.stdout.flush()
        cif_filename_dict[jobname] = upload_templates(cif_dir)

# Save sequence
for i in range(len(query_sequences)):
  # save the sequence as a file with name jobname.fasta
  save_sequence(jobnames[i], query_sequences[i])
  
print("\nCurrent jobs, sequences, and templates:")

for qs,jn in zip(query_sequences, jobnames):
  template_list = []
  for t in cif_filename_dict.get(jn,[]):
    template_list.append(os.path.split(str(t))[-1])
  print(jn, qs, template_list)

sys.stdout.flush()  # seems to overwrite otherwise


if not query_sequences:
  print("Please supply a query sequence and run again")
  raise AssertionError("Need a query sequence")

# STANDARD PARAMETERS AND METHODS

#standard values of parameters
msa_mode = "MMseqs2 (UniRef+Environmental)" 
num_models = 1 
homooligomer = 1
use_msa = True
use_env = True
use_custom_msa = False
use_amber = False 
use_templates = True



In [None]:
#@title B. Create AlphaFold models with the <b><i>Run</i></b> button to the left

! echo "Clearing and re-importing python modules and tmp directories..."
! rm -rf /tmp/absl_testing/
! mkdir /tmp/absl_testing/

for x in list(sys.modules.keys(  )) + list(globals()):
  for key in ['alphafold','protein', 'Alphafold', 'Protein','haiku',"layer_norm","base","colabfold","control_flow","check_tree_and_avals"]:
    if x.find(key)>-1:
      if x in list(sys.modules.keys()):
        
        del(sys.modules[x])
      if x in list(globals().keys()):
      
        del globals()[x]
        assert not x in list(globals().keys())
        break
    

if not os.environ['PYTHONPATH'].find(":/opt/conda/bin")>-1:
  os.environ['PYTHONPATH']+=":/opt/conda/bin"
  os.environ['PYTHONPATH']+=":/opt/conda/lib/python3.7/site-packages"
  os.environ['PYTHONPATH']+=":/usr/local/lib/python3.7/dist-packages"
! echo "VERSION:  `grep Version run_alphafold_test.py|grep -v Apache`"


from contextlib import redirect_stderr, redirect_stdout
from dataclasses import dataclass, replace
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO


print("Setting up methods...", end = "")
import_alphafold_items = True
# setup the model
if import_alphafold_items:

  # hiding warning messages
  import warnings
  from absl import logging
  import os
  import tensorflow as tf
  warnings.filterwarnings('ignore')
  logging.set_verbosity("error")
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
  tf.get_logger().setLevel('ERROR')

  import sys
  import numpy as np
  import pickle
  from alphafold.common import protein
  from alphafold.data import pipeline
  from alphafold.data import templates
  from alphafold.model import data
  from alphafold.model import config
  from alphafold.model import model
  from alphafold.data.tools import hhsearch
  import colabfold as cf

  # plotting libraries
  import py3Dmol
  import matplotlib.pyplot as plt
  import ipywidgets
  from ipywidgets import interact, fixed, GridspecLayout, Output



from alphafold.data import mmcif_parsing
from alphafold.data.templates import (_get_pdb_id_and_chain,
                                      _process_single_hit,
                                      _assess_hhsearch_hit,
                                      _build_query_to_hit_index_mapping,
                                      _extract_template_features,
                                      SingleHitResult,
                                      TEMPLATE_FEATURES)

def mk_mock_template(query_sequence):
  # since alphafold's model requires a template input
  # we create a blank example w/ zero input, confidence -1
  ln = len(query_sequence)
  output_templates_sequence = "-"*ln
  output_confidence_scores = np.full(ln,-1)
  templates_all_atom_positions = np.zeros((ln, templates.residue_constants.atom_type_num, 3))
  templates_all_atom_masks = np.zeros((ln, templates.residue_constants.atom_type_num))
  templates_aatype = templates.residue_constants.sequence_to_onehot(output_templates_sequence,
                                                                    templates.residue_constants.HHBLITS_AA_TO_ID)
  template_features = {'template_all_atom_positions': templates_all_atom_positions[None],
                       'template_all_atom_masks': templates_all_atom_masks[None],
                       'template_sequence': [f'none'.encode()],
                       'template_aatype': np.array(templates_aatype)[None],
                       'template_confidence_scores': output_confidence_scores[None],
                       'template_domain_names': [f'none'.encode()],
                       'template_release_date': [f'none'.encode()]}
  return template_features

def mk_template(a3m_lines, template_paths):
  template_featurizer = templates.TemplateHitFeaturizer(
      mmcif_dir=template_paths,
      max_template_date="2100-01-01",
      max_hits=20,
      kalign_binary_path="kalign",
      release_dates_path=None,
      obsolete_pdbs_path=None)

  hhsearch_pdb70_runner = hhsearch.HHSearch(binary_path="hhsearch", databases=[f"{template_paths}/pdb70"])

  hhsearch_result = hhsearch_pdb70_runner.query(a3m_lines)
  hhsearch_hits = pipeline.parsers.parse_hhr(hhsearch_result)
  templates_result = template_featurizer.get_templates(query_sequence=query_sequence,
                                                       query_pdb_code=None,
                                                       query_release_date=None,
                                                       hits=hhsearch_hits)
  return templates_result.features

def set_bfactor(pdb_filename, bfac, idx_res, chains):
  I = open(pdb_filename,"r").readlines()
  O = open(pdb_filename,"w")
  for line in I:
    if line[0:6] == "ATOM  ":
      seq_id = int(line[22:26].strip()) - 1
      seq_id = np.where(idx_res == seq_id)[0][0]
      O.write(f"{line[:21]}{chains[seq_id]}{line[22:60]}{bfac[seq_id]:6.2f}{line[66:]}")
  O.close()

def predict_structure(prefix, feature_dict, Ls, model_params, 
  use_model,
  model_runner_1,
  model_runner_3,
  do_relax=False, random_seed=0):  
  """Predicts structure using AlphaFold for the given sequence."""

  # Minkyung's code
  # add big enough number to residue index to indicate chain breaks
  idx_res = feature_dict['residue_index']
  L_prev = 0
  # Ls: number of residues in each chain
  for L_i in Ls[:-1]:
      idx_res[L_prev+L_i:] += 200
      L_prev += L_i  
  chains = list("".join([ascii_uppercase[n]*L for n,L in enumerate(Ls)]))
  feature_dict['residue_index'] = idx_res

  # Run the models.
  plddts,paes = [],[]
  unrelaxed_pdb_lines = []
  relaxed_pdb_lines = []

  for model_name, params in model_params.items():
    if model_name in use_model:
      print(f"running {model_name}")
      # swap params to avoid recompiling
      # note: models 1,2 have diff number of params compared to models 3,4,5
      if any(str(m) in model_name for m in [1,2]): model_runner = model_runner_1
      if any(str(m) in model_name for m in [3,4,5]): model_runner = model_runner_3
      model_runner.params = params
      
      processed_feature_dict = model_runner.process_features(feature_dict, random_seed=random_seed)
      prediction_result = model_runner.predict(processed_feature_dict)
      unrelaxed_protein = protein.from_prediction(processed_feature_dict,prediction_result)
      unrelaxed_pdb_lines.append(protein.to_pdb(unrelaxed_protein))
      plddts.append(prediction_result['plddt'])
      paes.append(prediction_result['predicted_aligned_error'])


  # rerank models based on predicted lddt
  lddt_rank = np.mean(plddts,-1).argsort()[::-1]
  out = {}
  
  for n,r in enumerate(lddt_rank):
    print(f"model_{n+1} {np.mean(plddts[r])}")

    unrelaxed_pdb_path = f'{prefix}_unrelaxed_model_{n+1}.pdb'    
    with open(unrelaxed_pdb_path, 'w') as f: f.write(unrelaxed_pdb_lines[r])
    set_bfactor(unrelaxed_pdb_path, plddts[r], idx_res, chains)


    out[f"model_{n+1}"] = {"plddt":plddts[r], "pae":paes[r]}
  return out



def hh_process_seq(query_seq,template_seq,hhDB_dir,db_prefix="DB"):
  """
  This is a hack to get hhsuite output strings to pass on
  to the AlphaFold template featurizer. 
  
  Note: that in the case of multiple templates, this would be faster to build one database for
  all the templates. Currently it builds a database with only one template at a time. Even 
  better would be to get an hhsuite alignment without using a database at all, just between
  pairs of sequence files. However, I have not figured out how to do this.

  Update: I think the hhsearch can be replaced completely, and we can just do a pairwise 
  alignment with biopython, or skip alignment if the seqs match. TODO
  """
  # set up directory for hhsuite DB. Place one template fasta file to be the DB contents
  if hhDB_dir.exists():
    shutil.rmtree(hhDB_dir)
  
  msa_dir = Path(hhDB_dir,"msa")
  msa_dir.mkdir(parents=True)
  template_seq_path = Path(msa_dir,"template.fasta")
  with template_seq_path.open("w") as fh:
    SeqIO.write([template_seq], fh, "fasta")

  # make hhsuite DB
  with redirect_stdout(StringIO()) as out:
    os.chdir(msa_dir)
    %shell ffindex_build -s ../DB_msa.ff{data,index} .
    os.chdir(hhDB_dir)
    %shell ffindex_apply DB_msa.ff{data,index}  -i DB_a3m.ffindex -d DB_a3m.ffdata  -- hhconsensus -M 50 -maxres 65535 -i stdin -oa3m stdout -v 0
    %shell rm DB_msa.ff{data,index}
    %shell ffindex_apply DB_a3m.ff{data,index} -i DB_hhm.ffindex -d DB_hhm.ffdata -- hhmake -i stdin -o stdout -v 0
    %shell cstranslate -f -x 0.3 -c 4 -I a3m -i DB_a3m -o DB_cs219 
    %shell sort -k3 -n -r DB_cs219.ffindex | cut -f1 > sorting.dat

    %shell ffindex_order sorting.dat DB_hhm.ff{data,index} DB_hhm_ordered.ff{data,index}
    %shell mv DB_hhm_ordered.ffindex DB_hhm.ffindex
    %shell mv DB_hhm_ordered.ffdata DB_hhm.ffdata

    %shell ffindex_order sorting.dat DB_a3m.ff{data,index} DB_a3m_ordered.ff{data,index}
    %shell mv DB_a3m_ordered.ffindex DB_a3m.ffindex
    %shell mv DB_a3m_ordered.ffdata DB_a3m.ffdata

  # run hhsearch
  hhsearch_runner = hhsearch.HHSearch(binary_path="hhsearch", databases=[hhDB_dir.as_posix()+"/"+db_prefix])
  with StringIO() as fh:
    SeqIO.write([query_seq], fh, "fasta")
    seq_fasta = fh.getvalue()
  hhsearch_result = hhsearch_runner.query(seq_fasta)

  # process hits
  hhsearch_hits = pipeline.parsers.parse_hhr(hhsearch_result)
  if len(hhsearch_hits) >0:
    hit = hhsearch_hits[0]
    hit = replace(hit,**{"name":template_seq.id})
  else:
    hit = None
  return hit

def plot_plddt_legend():
  thresh = ['plDDT:','Very low (<50)','Low (60)','OK (70)','Confident (80)','Very high (>90)']
  plt.figure(figsize=(1,0.1),dpi=100)
  ########################################
  for c in ["#FFFFFF","#FF0000","#FFFF00","#00FF00","#00FFFF","#0000FF"]:
    plt.bar(0, 0, color=c)
  plt.legend(thresh, frameon=False,
             loc='center', ncol=6,
             handletextpad=1,
             columnspacing=1,
             markerscale=0.5,)
  plt.axis(False)
  return plt

def plot_confidence(outs, model_num=1):
  model_name = f"model_{model_num}"
  plt.figure(figsize=(10,3),dpi=100)
  """Plots the legend for plDDT."""
  #########################################
  plt.subplot(1,2,1); plt.title('Predicted lDDT')
  plt.plot(outs[model_name]["plddt"])
  for n in range(homooligomer+1):
    x = n*(len(query_sequence))
    plt.plot([x,x],[0,100],color="black")
  plt.ylabel('plDDT')
  plt.xlabel('position')
  #########################################
  plt.subplot(1,2,2);plt.title('Predicted Aligned Error')
  plt.imshow(outs[model_name]["pae"], cmap="bwr",vmin=0,vmax=30)
  plt.colorbar()
  plt.xlabel('Scored residue')
  plt.ylabel('Aligned residue')
  #########################################
  return plt

def show_pdb(model_num=1, show_sidechains=False, show_mainchains=False, color="lDDT"):
  model_name = f"model_{model_num}"
  if use_amber:
    pdb_filename = f"{jobname}_relaxed_{model_name}.pdb"
  else:
    pdb_filename = f"{jobname}_unrelaxed_{model_name}.pdb"

  view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',)
  view.addModel(open(pdb_filename,'r').read(),'pdb')

  if color == "lDDT":
    view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':50,'max':90}}})
  elif color == "rainbow":
    view.setStyle({'cartoon': {'color':'spectrum'}})
  elif color == "chain":
    for n,chain,color in zip(range(homooligomer),list("ABCDEFGH"),
                     ["lime","cyan","magenta","yellow","salmon","white","blue","orange"]):
       view.setStyle({'chain':chain},{'cartoon': {'color':color}})
  if show_sidechains:
    BB = ['C','O','N']
    view.addStyle({'and':[{'resn':["GLY","PRO"],'invert':True},{'atom':BB,'invert':True}]},
                        {'stick':{'colorscheme':f"WhiteCarbon",'radius':0.3}})
    view.addStyle({'and':[{'resn':"GLY"},{'atom':'CA'}]},
                        {'sphere':{'colorscheme':f"WhiteCarbon",'radius':0.3}})
    view.addStyle({'and':[{'resn':"PRO"},{'atom':['C','O'],'invert':True}]},
                        {'stick':{'colorscheme':f"WhiteCarbon",'radius':0.3}})  
  if show_mainchains:
    BB = ['C','O','N','CA']
    view.addStyle({'atom':BB},{'stick':{'colorscheme':f"WhiteCarbon",'radius':0.3}})

  view.zoomTo()
  return view

def run_job(query_sequence,
        jobname,
        upload_manual_templates,
        manual_templates_uploaded,
        maximum_templates_from_pdb,
        num_models,
        homooligomer,
        use_msa,
        use_env,
        use_custom_msa,
        use_amber,
        use_templates,
        include_templates_from_pdb,
        number_of_ensembles,
        af_iterations,
        disable_jit,
        target_all_atom_positions,):

  #@title Get MSA and templates
  print("Getting MSA and templates...")
  if (not include_templates_from_pdb):
    template_paths = None # toss these ... get template_paths later
  if use_templates:
    a3m_lines, template_paths = cf.run_mmseqs2(query_sequence, jobname, use_env, use_templates=True)
    if template_paths is None:
      template_features = mk_mock_template(query_sequence * homooligomer)
    else:
      template_features = mk_template(a3m_lines, template_paths)
  elif use_msa:
    a3m_lines = cf.run_mmseqs2(query_sequence, jobname, use_env)
    template_features = mk_mock_template(query_sequence * homooligomer)
  else:
    template_features = mk_mock_template(query_sequence * homooligomer)
  
  # File for a3m
  a3m_file = f"{jobname}.a3m"

  if use_msa:
    with open(a3m_file, "w") as text_file:
      text_file.write(a3m_lines)
  else:
    a3m_lines = "".join(open(a3m_file,"r").read())
  
  # parse MSA
  msa, deletion_matrix = pipeline.parsers.parse_a3m(a3m_lines)
  
  print("Done with MSA and templates")
  
  #Process templates
  print("PROCESSING TEMPLATES")
  
  os.chdir("/content/")
  
  other_cif_dir = Path("/content/%s" %(template_paths))
  parent_dir = Path("/content/manual_templates")
  cif_dir = Path(parent_dir,"mmcif")
  fasta_dir = Path(parent_dir,"fasta")
  hhDB_dir = Path(parent_dir,"hhDB")
  msa_dir = Path(hhDB_dir,"msa")
  clear_directories([fasta_dir,hhDB_dir,msa_dir])
  
  cif_files = list(cif_dir.glob("*"))
  number_of_supplied_templates = len(cif_files)
  # Only include the cif_files in manual_templates_uploaded
  manual_files_as_text = []
  for f in manual_templates_uploaded:
    manual_files_as_text.append(
        os.path.split(str(f))[-1])
  cif_files_to_include = []
  for cif_file in cif_files:
    text = os.path.split(str(cif_file))[-1]
    if text in manual_files_as_text:
      cif_files_to_include.append(cif_file)
  cif_files = cif_files_to_include
      
  if include_templates_from_pdb:
    other_cif_files = list(other_cif_dir.glob("*"))
    cif_files += other_cif_files
  print("CIF files to include:",cif_files)
  query_seq = SeqRecord(Seq(query_sequence),id="query",name="",description="")
  query_seq_path = Path(fasta_dir,"query.fasta")
  with query_seq_path.open("w") as fh:
      SeqIO.write([query_seq], fh, "fasta")
  
  shutil.copyfile(query_seq_path,Path(msa_dir,"query.fasta"))
  seqs = []
  template_hit_list = []
  
  n_used = 0
  for i,filepath in enumerate(cif_files):
    if not str(filepath).endswith(".cif"): continue
    if n_used >= maximum_templates_from_pdb + number_of_supplied_templates:
      continue
    n_used += 1
    print("CIF file included:",i+1,str(filepath))
    with filepath.open("r") as fh:
      filestr = fh.read()
      mmcif_obj = mmcif_parsing.parse(file_id=filepath.stem,mmcif_string=filestr)
      mmcif = mmcif_obj.mmcif_object
      if not mmcif: continue
  
      for chain_id,template_sequence in mmcif.chain_to_seqres.items():
        template_sequence = mmcif.chain_to_seqres[chain_id]
        seq_name = filepath.stem.upper()+"_"+chain_id
        seq = SeqRecord(Seq(template_sequence),id=seq_name,name="",description="")
        seqs.append(seq)
  
        with  Path(fasta_dir,seq.id+".fasta").open("w") as fh:
          SeqIO.write([seq], fh, "fasta")
  
        """
        At this stage, we have a template sequence.
        and a query sequence. 
        There are two options to generate template features:
          1. Write new code to manually generate template features
          2. Get an hhr alignment string, and pass that
            to the existing template featurizer. 
            
        I chose the second, implemented in hh_process_seq()
        """
        SeqIO.write([seq], sys.stdout, "fasta")
        SeqIO.write([query_seq], sys.stdout, "fasta")
        try:
          hit = hh_process_seq(query_seq,seq,hhDB_dir)
        except Exception as e:
          hit = None
        if hit is not None:
          template_hit_list.append(hit)
  
  if template_hit_list:
    #process hits into template features
    template_hit_list = [replace(hit,**{"index":i+1}) for i,hit in enumerate(template_hit_list)]
  
  if (len(manual_templates_uploaded) > 0) and upload_manual_templates and (not template_hit_list):
    # check to make sure we got something
    # need template and did not get any
      print("\n",80*"-")
      print("\nNo templates obtained...please be sure to use a .cif file")
      print("Use this converter: https://mmcif.pdbj.org/converter/")
      print("\nYou can hit the red run button and load a new file")
      print("\nYou can then hit all the remaining red run buttons one by one")
      print("\n ... or you can go up to Runtime and hit 'Run all' again to start over")
      print("\n",80*"-")
      raise AssertionError("Failed to read template file")
  elif use_templates and template_hit_list:
    # have new templates to work with
  
    template_features = {}
    for template_feature_name in TEMPLATE_FEATURES:
      template_features[template_feature_name] = []
  
    for i,hit in enumerate(sorted(template_hit_list, key=lambda x: x.sum_probs, reverse=True)):
      # modifications to alphafold/data/templates.py _process_single_hit
      hit_pdb_code, hit_chain_id = _get_pdb_id_and_chain(hit)
      mapping = _build_query_to_hit_index_mapping(
      hit.query, hit.hit_sequence, hit.indices_hit, hit.indices_query,
      query_sequence)
      template_sequence = hit.hit_sequence.replace('-', '')
  
      if 1:
        features, realign_warning = _extract_template_features(
          mmcif_object=mmcif,
          pdb_id=hit_pdb_code,
          mapping=mapping,
          template_sequence=template_sequence,
          query_sequence=query_sequence,
          template_chain_id=hit_chain_id,
          kalign_binary_path="kalign")
      if 0:
        continue
      features['template_sum_probs'] = [hit.sum_probs]
  
      single_hit_result = SingleHitResult(features=features, error=None, warning=None)
      for k in template_features:
        template_features[k].append(features[k])
  
    for name in template_features:
      template_features[name] = np.stack(
          template_features[name], axis=0).astype(TEMPLATE_FEATURES[name])
      
    #overwrite template data
    template_paths = cif_dir.as_posix()


    # Select only one chain from any cif file
    unique_template_hits = []
    pdb_text_list = []
    for hit in template_hit_list:
      pdb_text = hit.name.split()[0].split("_")[0]
      if not pdb_text in pdb_text_list:
        pdb_text_list.append(pdb_text)
        unique_template_hits.append(hit)
    template_hit_list = unique_template_hits
    template_hits = template_hit_list

    print("\nIncluding templates:")
    for hit in template_hit_list:
      print("\t",hit.name.split()[0])
    if len(template_hit_list) == 0:
      print("No templates found...quitting")
      raise AssertionError("No templates found...quitting")
    os.chdir("/content/")
  
    for key,value in template_features.items():
      if np.all(value==0):
        print("ERROR: Some template features are empty")
  else:  # no templates
    print("Not using any templates")
  
  print("\nPREDICTING STRUCTURE")

  
  # collect model weights
  use_model = {}
  model_params = {}
  model_runner_1 = None
  model_runner_3 = None

  for model_name in ["model_1","model_2","model_3","model_4","model_5"][:num_models]:
    use_model[model_name] = True

    if model_name not in list(model_params.keys()):
      model_params[model_name] = data.get_model_haiku_params(model_name=model_name+"_ptm", data_dir=".")
      if model_name == "model_1":
        model_config = config.model_config(model_name+"_ptm")
        model_config.model.num_recycle = af_iterations
        model_config.data.common.num_recycle = af_iterations
        print("Recycle iterations will be %s" %(model_config.data.common.num_recycle))
        model_config.data.eval.num_ensemble = number_of_ensembles
        print("Number of ensembles will be %s" %(model_config.data.eval.num_ensemble))
        if disable_jit:
          model_config.data.common.disable_jit = True
          model_config.model.global_config.disable_jit = True

        if target_all_atom_positions is not None:
          print("Setting all_atom",target_all_atom_positions.shape[0])
          model_config.model.global_config.target_all_atom_positions = target_all_atom_positions
          print("Set target_all_atom_positions")
        model_runner_1 = model.RunModel(model_config, model_params[model_name])
        print("Done running model.RunModel to get model_runner_1")

      if model_name == "model_3":
        model_config = config.model_config(model_name+"_ptm")
        model_config.data.eval.num_ensemble = 1
        model_runner_3 = model.RunModel(model_config, model_params[model_name])


  if homooligomer == 1:
    msas = [msa]
    deletion_matrices = [deletion_matrix]
  else:
    # make multiple copies of msa for each copy
    # AAA------
    # ---AAA---
    # ------AAA
    #
    # note: if you concat the sequences (as below), it does NOT work
    # AAAAAAAAA
    msas = []
    deletion_matrices = []
    Ln = len(query_sequence)
    for o in range(homooligomer):
      L = Ln * o
      R = Ln * (homooligomer-(o+1))
      msas.append(["-"*L+seq+"-"*R for seq in msa])
      deletion_matrices.append([[0]*L+mtx+[0]*R for mtx in deletion_matrix])
  
  # gather features
  feature_dict = {
      **pipeline.make_sequence_features(sequence=query_sequence*homooligomer,
                                        description="none",
                                        num_res=len(query_sequence)*homooligomer),
      **pipeline.make_msa_features(msas=msas,deletion_matrices=deletion_matrices),
      **template_features
  }
  outs = predict_structure(jobname, feature_dict,
                           Ls=[len(query_sequence)]*homooligomer,
                           model_params=model_params, use_model=use_model,
                           model_runner_1=model_runner_1,
                           model_runner_3=model_runner_3,
                           do_relax=use_amber)
  print("DONE WITH STRUCTURE")
  
  #@title Making plots...
  
  # gather MSA info
  deduped_full_msa = list(dict.fromkeys(msa))
  msa_arr = np.array([list(seq) for seq in deduped_full_msa])
  seqid = (np.array(list(query_sequence)) == msa_arr).mean(-1)
  seqid_sort = seqid.argsort() #[::-1]
  non_gaps = (msa_arr != "-").astype(float)
  non_gaps[non_gaps == 0] = np.nan
  
  ##################################################################
  plt.figure(figsize=(14,4),dpi=100)
  ##################################################################
  plt.subplot(1,2,1); plt.title("Sequence coverage")
  plt.imshow(non_gaps[seqid_sort]*seqid[seqid_sort,None],
             interpolation='nearest', aspect='auto',
             cmap="rainbow_r", vmin=0, vmax=1, origin='lower')
  plt.plot((msa_arr != "-").sum(0), color='black')
  plt.xlim(-0.5,msa_arr.shape[1]-0.5)
  plt.ylim(-0.5,msa_arr.shape[0]-0.5)
  plt.colorbar(label="Sequence identity to query",)
  plt.xlabel("Positions")
  plt.ylabel("Sequences")
  
  ##################################################################
  plt.subplot(1,2,2); plt.title("Predicted lDDT per position")
  for model_name,value in outs.items():
    plt.plot(value["plddt"],label=model_name)
  if homooligomer > 0:
    for n in range(homooligomer+1):
      x = n*(len(query_sequence)-1)
      plt.plot([x,x],[0,100],color="black")
  plt.legend()
  plt.ylim(0,100)
  plt.ylabel("Predicted lDDT")
  plt.xlabel("Positions")
  plt.savefig(jobname+"_coverage_lDDT.png")
  ##################################################################
  plt.show()
  
  print("Predicted Alignment Error")
  ##################################################################
  plt.figure(figsize=(3*num_models,2), dpi=100)
  for n,(model_name,value) in enumerate(outs.items()):
    plt.subplot(1,num_models,n+1)
    plt.title(model_name)
    plt.imshow(value["pae"],label=model_name,cmap="bwr",vmin=0,vmax=30)
    plt.colorbar()
  plt.savefig(jobname+"_PAE.png")
  plt.show()
  ##################################################################
  #@title Displaying 3D structure... {run: "auto"}
  model_num = 1 
  color = "lDDT" 
  show_sidechains = False 
  show_mainchains = False 
  
  
  
  show_pdb(model_num,show_sidechains, show_mainchains, color).show()
  if color == "lDDT": plot_plddt_legend().show()  
  plot_confidence(outs, model_num).show()
  #@title Packaging and downloading results...
  
  #@markdown When modeling is complete .zip files with results will be downloaded automatically.
  
  citations = {
  "Mirdita2021":  """@article{Mirdita2021,
  author = {Mirdita, Milot and Ovchinnikov, Sergey and Steinegger, Martin},
  doi = {10.1101/2021.08.15.456425},
  journal = {bioRxiv},
  title = {{ColabFold - Making Protein folding accessible to all}},
  year = {2021},
  comment = {ColabFold including MMseqs2 MSA server}
  }""",
    "Mitchell2019": """@article{Mitchell2019,
  author = {Mitchell, Alex L and Almeida, Alexandre and Beracochea, Martin and Boland, Miguel and Burgin, Josephine and Cochrane, Guy and Crusoe, Michael R and Kale, Varsha and Potter, Simon C and Richardson, Lorna J and Sakharova, Ekaterina and Scheremetjew, Maxim and Korobeynikov, Anton and Shlemov, Alex and Kunyavskaya, Olga and Lapidus, Alla and Finn, Robert D},
  doi = {10.1093/nar/gkz1035},
  journal = {Nucleic Acids Res.},
  title = {{MGnify: the microbiome analysis resource in 2020}},
  year = {2019},
  comment = {MGnify database}
  }""",
    "Eastman2017": """@article{Eastman2017,
  author = {Eastman, Peter and Swails, Jason and Chodera, John D. and McGibbon, Robert T. and Zhao, Yutong and Beauchamp, Kyle A. and Wang, Lee-Ping and Simmonett, Andrew C. and Harrigan, Matthew P. and Stern, Chaya D. and Wiewiora, Rafal P. and Brooks, Bernard R. and Pande, Vijay S.},
  doi = {10.1371/journal.pcbi.1005659},
  journal = {PLOS Comput. Biol.},
  number = {7},
  title = {{OpenMM 7: Rapid development of high performance algorithms for molecular dynamics}},
  volume = {13},
  year = {2017},
  comment = {Amber relaxation}
  }""",
    "Jumper2021": """@article{Jumper2021,
  author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'{i}}dek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W. and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
  doi = {10.1038/s41586-021-03819-2},
  journal = {Nature},
  pmid = {34265844},
  title = {{Highly accurate protein structure prediction with AlphaFold.}},
  year = {2021},
  comment = {AlphaFold2 + BFD Database}
  }""",
    "Mirdita2019": """@article{Mirdita2019,
  author = {Mirdita, Milot and Steinegger, Martin and S{\"{o}}ding, Johannes},
  doi = {10.1093/bioinformatics/bty1057},
  journal = {Bioinformatics},
  number = {16},
  pages = {2856--2858},
  pmid = {30615063},
  title = {{MMseqs2 desktop and local web server app for fast, interactive sequence searches}},
  volume = {35},
  year = {2019},
  comment = {MMseqs2 search server}
  }""",
    "Steinegger2019": """@article{Steinegger2019,
  author = {Steinegger, Martin and Meier, Markus and Mirdita, Milot and V{\"{o}}hringer, Harald and Haunsberger, Stephan J. and S{\"{o}}ding, Johannes},
  doi = {10.1186/s12859-019-3019-7},
  journal = {BMC Bioinform.},
  number = {1},
  pages = {473},
  pmid = {31521110},
  title = {{HH-suite3 for fast remote homology detection and deep protein annotation}},
  volume = {20},
  year = {2019},
  comment = {PDB70 database}
  }""",
    "Mirdita2017": """@article{Mirdita2017,
  author = {Mirdita, Milot and von den Driesch, Lars and Galiez, Clovis and Martin, Maria J. and S{\"{o}}ding, Johannes and Steinegger, Martin},
  doi = {10.1093/nar/gkw1081},
  journal = {Nucleic Acids Res.},
  number = {D1},
  pages = {D170--D176},
  pmid = {27899574},
  title = {{Uniclust databases of clustered and deeply annotated protein sequences and alignments}},
  volume = {45},
  year = {2017},
  comment = {Uniclust30/UniRef30 database},
  }""",
    "Berman2003": """@misc{Berman2003,
  author = {Berman, Helen and Henrick, Kim and Nakamura, Haruki},
  booktitle = {Nat. Struct. Biol.},
  doi = {10.1038/nsb1203-980},
  number = {12},
  pages = {980},
  pmid = {14634627},
  title = {{Announcing the worldwide Protein Data Bank}},
  volume = {10},
  year = {2003},
  comment = {templates downloaded from wwPDB server}
  }""",
  }
  
  to_cite = [ "Mirdita2021", "Jumper2021" ]
  if use_msa:       to_cite += ["Mirdita2019"]
  if use_msa:       to_cite += ["Mirdita2017"]
  if use_env:       to_cite += ["Mitchell2019"]
  if use_templates: to_cite += ["Steinegger2019"]
  if use_templates: to_cite += ["Berman2003"]
  if use_amber:     to_cite += ["Eastman2017"]
  
  with open(f"{jobname}.bibtex", 'w') as writer:
    for i in to_cite:
      writer.write(citations[i])
      writer.write("\n")
  
  print(f"Found {len(to_cite)} citation{'s' if len(to_cite) > 1 else ''} for tools or databases.")
  if use_custom_msa:
    print("Don't forget to cite your custom MSA generation method.")
  
  !echo 'FILES TO PACKAGE: $a3m_file $jobname"_"*"relaxed_model_"*".pdb" $jobname"_coverage_lDDT.png" $jobname".bibtex" $jobname"_PAE.png" '
  try:
    print("zipping files...")
    !zip -FSr $jobname".result.zip" $a3m_file $jobname"_"*"relaxed_model_"*".pdb" $jobname"_coverage_lDDT.png" $jobname".bibtex" $jobname"_PAE.png"
  except Exception as e:
    print("unable to zip files")

  filename = f"{jobname}.result.zip"
  if os.path.isfile(filename):
    print("About to download %s" %(filename))
  
    try:
      print("Downloading zip file %s" %(filename))
      files.download(filename)
      print("Start of download successful (NOTE: if the download symbol does not go away it did not work. Download it manually using the folder icon to the left)")
      return filename
    except Exception as e:
      print("Unable to download zip file %s" %(filename))
      return None
  else:
    print("No .zip file %s created" %(filename))
    return None

# RUN THE JOBS HERE

for query_sequence, jobname in zip(query_sequences, jobnames):
  print("\n","****************************************","\n",
         "RUNNING JOB %s with sequence %s\n" %(
    jobname, query_sequence),
    "****************************************","\n")
  # GET TEMPLATES AND SET UP FILES



  # User input of manual templates
  manual_templates_uploaded = cif_filename_dict.get(
      jobname,[])
  if manual_templates_uploaded:
    print("Using uploaded templates %s for this run" %(
        manual_templates_uploaded))

  if 1:
    filename = run_job(query_sequence,
        jobname,
        upload_manual_templates,
        manual_templates_uploaded,
        maximum_templates_from_pdb,
        num_models,
        homooligomer,
        use_msa,
        use_env,
        use_custom_msa,
        use_amber,
        use_templates,
        include_templates_from_pdb,
        number_of_ensembles,
        af_iterations,
        disable_jit,
        target_all_atom_positions)
    if filename:
      print("FINISHED JOB (%s) %s with sequence %s\n" %(
        filename, jobname, query_sequence),
        "****************************************","\n")
    else:
      print("NO RESULT FOR JOB %s with sequence %s\n" %(
    jobname, query_sequence),
    "****************************************","\n")

  if 0:
    print("FAILED: JOB %s with sequence %s\n\n%s\n" %(
    jobname, query_sequence, str(e)),
    "****************************************","\n")


print("\nDOWNLOADING FILES NOW:\n")
for query_sequence, jobname in zip(query_sequences, jobnames):
  filename = f"{jobname}.result.zip"
  if os.path.isfile(filename):
    print(filename)

print("\nALL DONE\n")
  
