## Install/load libraries

Notebook utilises tcrdist to generate a tcr specific distance matrix for each chain of the TCRs input into this. See the link below for full documentation.

https://tcrdist3.readthedocs.io/en/latest/tcrdistances.html

To use this notebook:
1. Upload the files from outputs/{CHAIN}/*.csv to the session (can drag and drop from Finder.
2. Create a folder to hold the output, using in a new code block ```! mkdir tcrdist```
3. Run the install chunk, then the chunk for either alpha or beta chain
4. Download the zipped results file (semi-automated, will need to allow on the first go in the session).
5. If running both chains, remove all files (except the pre-loaded 'sample_data' folder), including tcrdist results folder, and run steps 3-4 again.

In [None]:
!pip install tcrdist3

import pandas as pd
from tcrdist.pgen import OlgaModel
from tcrdist import mappers
from tcrdist.repertoire import TCRrep
from tcrdist.public import TCRpublic
from tcrdist.setup_tests import download_and_extract_zip_file

import re
import os

from google.colab import files

Collecting tcrdist3
  Downloading tcrdist3-0.3-py3-none-any.whl.metadata (5.7 kB)
Collecting parasail>=1.1.17 (from tcrdist3)
  Downloading parasail-1.3.4-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)
Collecting pwseqdist>=0.6 (from tcrdist3)
  Downloading pwseqdist-0.6-py3-none-any.whl.metadata (1.8 kB)
Collecting zipdist>=0.1.5 (from tcrdist3)
  Downloading zipdist-0.1.5-py3-none-any.whl.metadata (3.8 kB)
Collecting fishersapi (from tcrdist3)
  Downloading fishersapi-1.0-py3-none-any.whl.metadata (2.5 kB)
Collecting hierdiff>=0.4 (from tcrdist3)
  Downloading hierdiff-0.85-py3-none-any.whl.metadata (2.4 kB)
Collecting palmotif>=0.2 (from tcrdist3)
  Downloading palmotif-0.4-py3-none-any.whl.metadata (602 bytes)
Collecting tcrsampler>=0.1.9 (from tcrdist3)
  Downloading tcrsampler-0.1.9-py3-none-any.whl.metadata (1.6 kB)
Collecting parmap>=1.5.2 (from tcrdist3)
  Downloading parmap-1.7.0-py2.py3-none-any.whl.metadata (9.3 kB)
Collecting olga>=1.2.1 (from

## Run beta chains through this code block

In [None]:
file_ = [i for i in os.listdir() if re.search('.csv', i)]
for i in file_:
  df = pd.read_csv(i)

  df = df.rename(columns = {'v_call': 'v_b_gene',
                            'j_call': 'j_b_gene',
                            'junction_aa': 'cdr3_b_aa'})

  df['v_b_gene'] = [f'{i}*01' for i in df['v_b_gene']] # append fake allele for TCRdist, doesn't recognise decomb ids without alleles
  df['j_b_gene'] = [f'{i}*01' for i in df['j_b_gene']] # append fake allele for TCRdist, doesn't recognise decomb ids without alleles

  tr = TCRrep(cell_df = df,
              organism = 'mouse',
              chains = ['beta'],
              db_file = 'alphabeta_gammadelta_db.tsv',
              store_all_cdr = False)

  olga_beta  = OlgaModel(chain_folder = "mouse_T_beta", recomb_type="VDJ")

  tr.clone_df['pgen_cdr3_b_aa'] = olga_beta.compute_aa_cdr3_pgens(CDR3_seq = tr.clone_df.cdr3_b_aa)

  tr.clone_df.to_csv(f"tcrdist/{re.sub('.csv', '', i)}_pgens.csv", index = False)

  tmp = pd.DataFrame(tr.pw_beta)
  tmp.columns = tr.clone_df.id_nt.to_list()

  tmp.to_csv(f"tcrdist/{re.sub('.csv', '', i)}_tcrdistmatrix.csv", index = True, compression = 'gzip')

!tar -czvf tcrdist.tar.gz tcrdist
files.download("/content/tcrdist.tar.gz")


  self._validate_cell_df()


tcrdist/
tcrdist/.ipynb_checkpoints/
tcrdist/0_complete-data_tcrdistmatrix.csv
tcrdist/0_complete-data_pgens.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Run alpha chains through this code block

In [None]:
file_ = [i for i in os.listdir() if re.search('.csv', i)]
for i in file_:
  df = pd.read_csv(i)

  df = df.rename(columns = {'v_call': 'v_a_gene',
                            'j_call': 'j_a_gene',
                            'junction_aa': 'cdr3_a_aa'})

  df['v_a_gene'] = [f'{i}*01' for i in df['v_a_gene']] # append fake allele for TCRdist, doesn't recognise decomb id without alleles
  df['j_a_gene'] = [f'{i}*01' for i in df['j_a_gene']] # append fake allele for TCRdist, doesn't recognise decomb id without alleles

  tr = TCRrep(cell_df = df,
              organism = 'mouse',
              chains = ['alpha'],
              db_file = 'alphabeta_gammadelta_db.tsv',
              store_all_cdr = False)

  olga_beta  = OlgaModel(chain_folder = "mouse_T_alpha", recomb_type="VJ")

  tr.clone_df['pgen_cdr3_a_aa'] = olga_beta.compute_aa_cdr3_pgens(CDR3_seq = tr.clone_df.cdr3_a_aa)

  tr.clone_df.to_csv(f"tcrdist/{re.sub('.csv', '', i)}_pgens.csv", index = False)

  tmp = pd.DataFrame(tr.pw_alpha)
  tmp.columns = tr.clone_df.id_nt.to_list()

  tmp.to_csv(f"tcrdist/{re.sub('.csv', '', i)}_tcrdistmatrix.csv", index = True, compression = 'gzip')

!tar -czvf tcrdist.tar.gz tcrdist
files.download("/content/tcrdist.tar.gz")


  self._validate_cell_df()


tcrdist/
tcrdist/0_complete-data_tcrdistmatrix.csv
tcrdist/0_complete-data_pgens.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>