In [None]:
#@title blank template
#@markdown This notebook from [github.com/matteoferla/pyrosetta_help](https://github.com/matteoferla/pyrosetta_help).

#@markdown It can be opened in Colabs via [https://colab.research.google.com/github/matteoferla/pyrosetta_help/blob/main/colabs/colabs-pyrosetta-dimer.ipynb](https://colab.research.google.com/github/matteoferla/pyrosetta_help/blob/main/colabs/colabs-pyrosetta-dimer.ipynb)

#@markdown This notebook is intened for use with a [ColabFold output](https://github.com/sokrypton/ColabFold) from a complex of two protein.
#@markdown It loads, relaxes (=energy minises) and scores the interface of these.
#@markdown Some tricks discussed in https://blog.matteoferla.com/2021/08/tweaking-alphafold2-models-with.html


In [None]:
#@title Installation
#@markdown Installing PyRosetta with optional backup to your drive (way quicker next time!).
#@markdown Note that PyRosetta occupies some 10 GB, so you'll need to be on the 100 GB plan of Google Drive (it's one pound a month).

#@markdown The following is not the real password. However, the format is similar.
username = 'boltzmann' #@param {type:"string"}
password = 'constant' #@param {type:"string"}
#@markdown Release to install:
_release = 'release-295' #@param {type:"string"}
#@markdown Use Google Drive for PyRosetta (way faster next time, but takes up space)
#@markdown (NB. You may be prompted to follow a link and possibly authenticate and then copy a code into a box
use_drive = True #@param {type:"boolean"}
#@markdown Installing `rdkit` and `rdkit_to_params` allows the creation of custom topologies (params) for new ligands
#@markdown **Code to use these not copy-pasted over**
install_rdkit = True #@param {type:"boolean"}

# verify the username and password are correct without actually knowing them.
import hashlib
hashed_username = hashlib.sha256(username).hexdigest()
hashed_password = hashlib.sha256(password).hexdigest()
expected_hashed_username = 'cf6f296b8145262b22721e52e2edec13ce57af8c6fc990c8ae1a4aa3e50ae40e'
expected_hashed_password = '45066dd976d8bf0c05dc8dd4d58727945c3437e6eb361ba9870097968db7a0da'
assert hashed_username == expected_hashed_username, 'The hash of the username is not as expected'
assert hashed_password == expected_hashed_password, 'The hash of the password is not as expected'

import sys
import os
import importlib
import pip

def install_and_import(package):
    """This only works for packages with the same name as their module"""
    try:
        importlib.import_module(package)
    except ImportError:
        pip.main(['install', package])
    globals()[package] = importlib.import_module(package)

install_and_import('distro')
assert distro.name() == 'Ubuntu'

py_version = str(sys.version_info.major) + str(sys.version_info.minor)
if use_drive:
  from google.colab import drive
  drive.mount('/content/drive')
  _path = '/content/drive/MyDrive'
  os.chdir(_path)
else:
  _path = '/content'
if not any(['PyRosetta4.Release' in filename for filename in os.listdir()]):
  assert not os.system(f'curl -u {username}:{password} https://graylab.jhu.edu/download/PyRosetta4/archive/release/PyRosetta4.Release.python{py_version}.ubuntu/PyRosetta4.Release.python{py_version}.ubuntu.{_release}.tar.bz2 -o /content/a.tar.bz2')
  assert not os.system('tar -xf /content/a.tar.bz2')
assert not os.system(f'pip3 install -e {_path}/PyRosetta4.Release.python{py_version}.ubuntu.{_release}/setup/')
assert not os.system(f'pip3 install pyrosetta-help biopython')
if install_rdkit:
  assert not os.system(f'pip3 install rdkit-pypi rdkit-to-params')

import site
site.main()

In [None]:
#@title Start PyRosetta
import pyrosetta
import pyrosetta_help as ph

#@markdown Do not optimise hydrogen on loading:
no_optH = False #@param {type:"boolean"}
#@markdown Ignore (True) or raise error (False) if novel residue (e.g. ligand)
ignore_unrecognized_res=False  #@param {type:"boolean"}
#@markdown Use autogenerated PDB residues are often weird (bad geometry, wrong match, protonated etc.): —best do it properly and parameterise it
load_PDB_components=False  #@param {type:"boolean"}
#@markdown Ignore all waters:
ignore_waters=False  #@param {type:"boolean"}

extra_options= ph.make_option_string(no_optH=no_optH,
                                  ex1=None,
                                  ex2=None,
                                  mute='all',
                                  ignore_unrecognized_res=ignore_unrecognized_res,
                                  load_PDB_components=load_PDB_components,
                                  ignore_waters=ignore_waters)


# capture to log
logger = ph.configure_logger()
pyrosetta.init(extra_options=extra_options)


In [None]:
#@title Load pose
folder_name =  'prediction_38cac' #@param {type:"string"}
#@markdown This will create an object called `analyser` 
#@markdown which has the attributes
#@markdown * `scores` (pandas DataFrame),
#@markdown * three dictionaries (`original_poses`, `relaxed_poses`, `phospho_poses`)
#@markdown where the key is the rank number of the pose (value)
#@markdown * `errors` a dictionary with key = rank number and values are the matrices that 

 to be a Rosetta pose vector (without returning a clone)
analyser = ph.AF2NotebookAnalyser(folder=folder_name, load_poses=True)

In [None]:
#@title Energy minimise

#@markdown Check to constraints the C&alpha; atoms of residues with a harmonic function with AF2 the pairwise error as sd.
#@markdown This is applied to two case:
#@markdown * all non–primary-sequence–adjecent residues with an error less than 12 &Aring; and 
#@markdown * residues in different chains that are less than 15 &Aring; apart ([Rosetta-style neighbouring distance](https://blog.matteoferla.com/2020/06/love-thy-neighbours-but-select-them.html), ~C&beta; distance)

constrained_errors=True  #@param {type:"boolean"}

#@markdown Number of FastRelax cycles

cycles=3   #@param {type:"integer"}
analyser.sidechain_relax(cycles) # prevent blowing up...
if constrained_errors:
 analyser.constrain(tolerance=2)
analyser.relax(cycles)
analyser.calculate_interface()

for i in analyser.errors:
    analyser.relaxed_poses[i].pdb_info( analyser.original_poses[i].pdb_info() )
    if i != 1:
        ph.superimpose_by_pLDDT(analyser.relaxed_poses[i], analyser.relaxed_poses[1])

In [None]:
# show Pandas table:
analyser.scores

In [None]:
#@title Add PTMs (optional)
#@markdown This is rather weird because:
#@markdown * PhosphoSitePlus does not have an API.
#@markdown * Colabs does not have a multiline input
#@markdown * this [way to copypaste data into a remove Jupyter Notebook](bhttps://gist.github.com/matteoferla/6de39f0057d283d270f834d487e358a9) does not work in Colabs
#@markdown So for now just copy paste the PTMs table in the next cell instead of `PASTE HERE`


In [None]:
raw = '''PASTE HERE'''

In [None]:
#@title Add PTMs
chain = 'A'  #@param {type:"string"}
pdb_ptms = analyser.parse_phosphosite(raw, maximum=analyser.original_poses[1].chain_end(1))
analyser.make_phosphorylated(pdb_ptms, chain, cycles)

In [None]:
#@title Save poses and pandas dataframe

new_folder_name =  'output' #@param {type:"string"}
analyser.dump(new_folder_name)

In [None]:
# Note that nglview does not work with Colabs but py3Dmol does.
# install py3Dmol
os.system(f'pip3 install py3Dmol')
import site
site.main()
# run
import py3Dmol
view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',)
view.addModel(ph.get_pdbstr(analyser.relaxed_pose[1]),'pdb')
view.zoomTo()
view

In [None]:
#@title Upload to Michelanglo (optional)
#@markdown [Michelanglo](https://michelanglo.sgc.ox.ac.uk/) is a website that
#@markdown allows the creation, annotation and sharing of a webpage with an interactive protein viewport.
#@markdown ([examples](https://michelanglo.sgc.ox.ac.uk/gallery)).
#@markdown The created pages are private —they have a 1 in a quintillion change to be guessed within 5 tries.

#@markdown Registered users (optional) can add interactive annotations to pages.
#@markdown Leave blank for guest:

username = ''  #@param {type:"string"}
password = ''  #@param {type:"string"}

#@markdown Choose initial model.

chosen_pose_series = 'relaxed' #@param ['original', 'relaxed', 'phospho'] {type:"string"}
chosen_rank = 1  #@param {type:"integer"}

os.system(f'pip3 install michelanglo-api')
import site
site.main()
from michelanglo_api import MikeAPI
if not username:
  mike = MikeAPI.guest_login()
else:
  mike = MikeAPI(username, password)

pose = {'original': analyser.original_poses, 
        'relaxed': analyser.relaxed_poses,
        'phospho': analyser.phospho_poses}[chosen_pose_series][chosen_rank]
page = mike.convert_pdb(pdbblock=ph.get_pdbstr(pose))
page.show_link()