<a href="https://colab.research.google.com/github/phbradley/TCRdock/blob/main/tcrdock_colab_pipeline_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TCRdock Colab

This colab notebook is based on the AlphaFold colab notebook https://colab.research.google.com/github/deepmind/alphafold/blob/main/notebooks/AlphaFold.ipynb -- many thanks to the AlphaFold developers for creating and sharing their code and related content.



## Setup

Start by running the 4 cells below to set up TCRdock and all required software.

In [None]:
# Set environment variables before running any other code.
import os
os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'
os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '4.0'


from IPython.utils import io
import os
import subprocess
import tqdm.notebook

TQDM_BAR_FORMAT = '{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} remaining: {remaining}]'

try:
  with tqdm.notebook.tqdm(total=100, bar_format=TQDM_BAR_FORMAT) as pbar:
    with io.capture_output() as captured:
      # Uninstall default Colab version of TF.
      %shell pip uninstall -y tensorflow
      pbar.update(6)

      # Install py3dmol.
      %shell pip install py3dmol
      pbar.update(2)

      # Install OpenMM and pdbfixer.
      %shell rm -rf /opt/conda
      %shell wget -q -P /tmp \
        https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
          && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
          && rm /tmp/Miniconda3-latest-Linux-x86_64.sh
      pbar.update(12)

      PATH=%env PATH
      %env PATH=/opt/conda/bin:{PATH}

      # phil changing to python 3.8 from 3.10 for compat with local versions...
      %shell conda install -qy conda==4.13.0 \
          && conda install -qy -c conda-forge \
            python=3.8
      pbar.update(80)

except subprocess.CalledProcessError:
  print(captured)
  raise

#print(captured)


In [None]:
GIT_REPO = 'https://github.com/phbradley/TCRdock'

PARAMS_URLS = ['https://www.dropbox.com/s/e3uz9mwxkmmv35z/params_model_2_ptm.npz',
               'https://www.dropbox.com/s/jph8v1mfni1q4y8/tcrpmhc_run4_af_mhc_params_891.pkl']

PARAMS_DIR = './alphafold_params/params'

try:
  with tqdm.notebook.tqdm(total=100, bar_format=TQDM_BAR_FORMAT) as pbar:
    with io.capture_output() as captured:
      %shell rm -rf TCRdock
      %shell git clone --branch main {GIT_REPO} TCRdock
      pbar.update(20)
      # Install the required versions of all dependencies.
      %shell pip3 install -r ./TCRdock/requirements_colab_python38.txt
      pbar.update(60)

      # Load parameters
      %shell mkdir --parents "{PARAMS_DIR}"
      for URL in PARAMS_URLS:
        PARAMS_PATH = os.path.join(PARAMS_DIR, os.path.basename(URL))
        %shell wget -O "{PARAMS_PATH}" "{URL}"
      pbar.update(20)

except subprocess.CalledProcessError:
  print(captured)
  raise

#print(captured)

import jax
if jax.local_devices()[0].platform == 'tpu':
  raise RuntimeError('Colab TPU runtime not supported. Change it to GPU via Runtime -> Change Runtime Type -> Hardware accelerator -> GPU.')
elif jax.local_devices()[0].platform == 'cpu':
  raise RuntimeError('Colab CPU runtime not supported. Change it to GPU via Runtime -> Change Runtime Type -> Hardware accelerator -> GPU.')
else:
  print(f'Running with {jax.local_devices()[0].device_kind} GPU')

# Make sure everything we need is on the path.
import sys
sys.path.append('/opt/conda/lib/python3.8/site-packages')




In [None]:
cd TCRdock/

In [None]:
%shell python download_blast.py

## Enter info on the modeling targets

You can use the form in the next block, which will create a file `user_targets.tsv` with the supplied information to prepare for modeling a single target.

Or, to run multiple targets, you could upload a TSV-formatted file (ie, tab-separated-values) to this running colab session using the upload button in the upper left corner. If you save it in `/content/TCRdock/` and give it the filename `user_targets.tsv` then you can skip the next block with the form and go directly to the `setup_for_alphafold.py` command. Or call the file whatever you want and modify the `setup_for_alphafold.py` command to provide the location of your new file with the  `--targets_tsvfile` flag.

In [None]:
#@title Enter the information on the TCR:pMHC complex to be modeled. When you are finished, press the play button (triangle inside circle) on the left.

#@markdown If there are any problems with the data, like unrecognized V/J gene names, there will be errors when the next cell is run.


#consulting here;
# https://colab.research.google.com/notebooks/forms.ipynb
import pandas as pd

organism = 'human' #@param ["human", "mouse"]

mhc_class = 1 #@param [1,2] {type:"raw"}

mhc = 'A*02:01'  #@param {type:"string"}

#@markdown For class II, the peptide should be 11 amino acids long (9 residue core plus 1 residue on either side)
peptide = 'GILGFVFTL' #@param {type:"string"}

#@markdown The gene names should include allele information (ie, they should end in "*01" or something like that)
va = 'TRAV27*01' #@param {type:"string"}
ja ='TRAJ42*01' #@param {type:"string"}
#@markdown The CDR3 sequence starts with the conserved C and ends with the F/Y/W that comes before the GXG in the J region.
#@markdown The CDR3 sequences should be at least 6 residues long.
cdr3a = 'CAGAGSQGNLIF' #@param {type:"string"}
vb = 'TRBV19*01' #@param {type:"string"}
jb = 'TRBJ2-7*01' #@param {type:"string"}
cdr3b = 'CASSSRSSYEQYF' #@param {type:"string"}

targets = pd.DataFrame([
    dict(organism=organism, mhc_class = mhc_class, mhc=mhc, peptide=peptide,
         va=va, ja=ja, cdr3a=cdr3a, vb=vb, jb=jb, cdr3b=cdr3b,
    )])

targets_filename = 'user_targets.tsv'
targets.to_csv(targets_filename, sep='\t', index=False)
print('made:', targets_filename)




## Generate the inputs for AlphaFold modeling

In [None]:
%shell python setup_for_alphafold.py --targets_tsvfile user_targets.tsv --output_dir user_output --new_docking


# Run AlphaFold with the generated inputs

This next python command will build TCRdock models for the targets with information listed in the file `user_output/targets.tsv` . The first target will take longer because the neural network model is being compiled. After that, remaining targets will be much (~5x) faster.

In [None]:
%shell python run_prediction.py --verbose \
    --targets user_output/targets.tsv \
    --outfile_prefix user_output \
    --model_names model_2_ptm_ft4 \
    --data_dir /content/alphafold_params/ \
    --model_params_files /content/alphafold_params/params/tcrpmhc_run4_af_mhc_params_891.pkl


# this command computes the PAE between pMHC and TCR
%shell python add_pmhc_tcr_pae_to_tsvfile.py --infile user_output_final.tsv \
    --outfile user_output_w_pae.tsv


## Look at the TCRdock output

The next cell should generate as output a table with the pMHC-TCR PAE values (in the `pmhc_tcr_pae` column). Models with PAE values less than 6.5 or 7 are higher confidence; models with PAE values greater than 7.5 or 8 are low confidence.

In [None]:
# look at the output
import pandas as pd
results = pd.read_table('user_output_w_pae.tsv')

cols = 'pmhc_tcr_pae mhc peptide va cdr3a vb cdr3b model_pdbfile'.split()
results[cols]


In [None]:
# show the output PDB files
!ls *.pdb

In [None]:
from google.colab import files
from matplotlib import gridspec
import matplotlib.pyplot as plt
import numpy as np
import py3Dmol
from glob import glob

from IPython import display
from ipywidgets import GridspecLayout
from ipywidgets import Output

# the name of the file we want to visualize:
files = glob('user_output_T00000_*_model_2_ptm_ft4.pdb')
fname = files[0]
print('loading:', fname)

with open(fname,'r') as f:
    to_visualize_pdb = f.read()


show_sidechains = True #False

view = py3Dmol.view(width=800, height=600)
view.addModelsAsFrames(to_visualize_pdb)
style = {'cartoon': {}} #{'colorscheme': {'prop': 'b', 'map': color_map}}}
if show_sidechains:
  style['stick'] = {}
view.setStyle({'model': -1}, style)
view.zoomTo()

grid = GridspecLayout(1, 2)
out = Output()
with out:
  view.show()
grid[0, 0] = out

out = Output()
#with out:
#  plot_plddt_legend().show()
grid[0, 1] = out

display.display(grid)


# Some random potentially useful commands for debugging.

In [None]:
# for figuring out what CUDA versions are installed
! nvcc -V

In [None]:
# same for CUDNN
!cat /usr/include/x86_64-linux-gnu/cudnn_v*.h | grep CUDNN_MAJOR -A 2


In [None]:
%shell echo $PATH

In [None]:
%shell which python

In [None]:
%shell which pip3