In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, shutil, bz2, copy
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', 50)
import numpy as np

### Plotting imports ###
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.colors as mcolors
import matplotlib as mpl
from matplotlib.patches import StepPatch
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec
import logomaker as lm
palette = list(mcolors.TABLEAU_COLORS.keys())
sns.set_theme(style="ticks", palette="muted")
sns.set_context("talk")
%matplotlib inline

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
# Navigate back to NBdir in case of re-running a code block:
if not 'NBdir' in globals():
    NBdir = os.getcwd()
print('Notebook is in: {}'.format(NBdir))
os.chdir(NBdir)  # If you changed the current working dir, this will take you back to the notebook dir.

# Define the path to the repo folder.
# Change if necessary.
homedir = '/'.join(NBdir.split('/')[0:-2])
print('Repo is in: {}'.format(homedir))
sys.path.insert(1, homedir)
from src.misc import index_to_sample_df, downsample_raw_input, read_tRNAdb_info, sample_df_to_dict
from src.read_processing import AR_merge, BC_split, Kmer_analysis, BC_analysis, UMI_trim
from src.alignment import SWIPE_align
from src.stats_collection import STATS_collection
from src.plotting import TRNA_plot
from src.transcript_mutations import TM_analysis

# These are default folder names for data and raw fastq files
# relative to the folder in which this notebook is in:
data_dir = 'data'
seq_dir = 'raw_fastq'
seq_dir_noDS = seq_dir # Not downsampled

# These folder names are used in subsequent processing steps
# to dump data. Best to not change:
AdapterRemoval_dir = 'AdapterRemoval'
BC_dir = 'BC_split'
UMI_dir = 'UMI_trimmed'
align_dir = 'SWalign'
stats_dir = 'stats_collection'
TM_dir = 'transcript_mutations'
plotting_dir = 'plotting'
tRNA_database = dict()
tRNA_database['human'] = '{}/tRNA_database/human/hg38-tRNAs.fa'.format(homedir)
tRNA_database['mouse'] = '{}/tRNA_database/mouse/mm10-tRNAs.fa'.format(homedir)
tRNA_database_masked = dict()
tRNA_database_masked['human'] = '{}/tRNA_database_masked/human/human-tRNAs.fa'.format(homedir)
# Read information (length, codon etc) of tRNAs into dictionary:
tRNA_data = read_tRNAdb_info(tRNA_database)
SWIPE_score_mat = '{}/utils/nuc_score-matrix.txt'.format(homedir)
SWIPE_score_mat2 = '{}/utils/nuc_score-matrix_2.txt'.format(homedir) # For masked reference sequences
# tRNA sequencing yields many duplicated reads.
# Adding these commonly seen sequences to a list prevents duplicated alignment:
common_seqs = '{}/utils/common-seqs.fasta.bz2'.format(homedir)

# Define minimum read length based on minimum insert size:
MIN_INSERT_LEN = 10
UMI_LEN = 10
BC_MAX_LEN = 19
MIN_READ_LEN = MIN_INSERT_LEN + UMI_LEN + BC_MAX_LEN
print('Using minimum read length: {} (after merge)'.format(MIN_READ_LEN))

# Read index information:
index_list_fnam = 'index_list.xlsx'
index_df = pd.read_excel('{}/utils/{}'.format(homedir, index_list_fnam))

Notebook is in: /home/sulab/tRNA-charge-seq/projects/Alicia_2023-06-06
Repo is in: /home/sulab/tRNA-charge-seq
Using minimum read length: 39 (after merge)


In [3]:
### Input settings ###
sample_list_fnam = 'sample_list.xlsx'
sample_df = pd.read_excel('{}/{}'.format(NBdir, sample_list_fnam))
# Add barcode sequences:
sample_df = index_to_sample_df(sample_df, index_df)
# Read elementary info (replicate, barcode, species)
# for each unique sample name into a dictionary:
sample_dict = sample_df_to_dict(sample_df)
# Get filenames from the sample information:
inp_file_df = sample_df[['fastq_mate1_filename', 'fastq_mate2_filename', 'P5_index', 'P7_index', 'P5_index_seq', 'P7_index_seq']].copy().drop_duplicates().reset_index(drop=True)

# Downsample:
if False:
    sample_df, inp_file_df, seq_dir = downsample_raw_input(sample_df, inp_file_df, NBdir, data_dir, seq_dir_noDS, downsample_absolute=1e4)

# Make a dictionary with paths used for data processing:
dir_dict = dict(NBdir = NBdir,
                data_dir = data_dir,
                seq_dir = seq_dir,
                AdapterRemoval_dir = AdapterRemoval_dir,
                BC_dir = BC_dir,
                UMI_dir = UMI_dir,
                align_dir = align_dir,
                stats_dir = stats_dir,
                TM_dir = TM_dir,
                plotting_dir = plotting_dir)

In [4]:
### Run AdapterRemoval ###
AR_obj = AR_merge(dir_dict, inp_file_df, MIN_READ_LEN, \
                  overwrite_dir=False, check_input=False)
inp_file_df = AR_obj.run_parallel(n_jobs=4, overwrite=False)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/Alicia_2023-06-06/data/AdapterRemoval


In [5]:
### Split files based on barcodes ###
BCsplit_obj = BC_split(dir_dict, sample_df, inp_file_df, overwrite_dir=False)
sample_df, inp_file_df = BCsplit_obj.run_parallel(n_jobs=12, load_previous=True)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/Alicia_2023-06-06/data/BC_split
Loaded results from previous run... Not running barcode split.


In [6]:
UMItrim_obj = UMI_trim(dir_dict, sample_df, overwrite_dir=False, \
                       downsample_absolute=2e6)
sample_df = UMItrim_obj.run_parallel(n_jobs=4, load_previous=True)
sample_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/Alicia_2023-06-06/data/UMI_trimmed
Loaded results from previous run... Not running UMI trimming.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,N_after_trim,N_UMI_observed,N_UMI_expected,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_after_downsample
0,A01,A01,1,2023-06-06/P03_R1.fastq.bz2,2023-06-06/P03_R2.fastq.bz2,D501,D703,l1Sp,human,293T-cys @&@ 293T-sergly @&@ 293T-met @&@ 293T...,Genotype-Time-Drug,WT-0,1,AGGCTATA,CGCTCATT,GGCTGCCATGCGACTA,5598086,308346,5237383,5545729,99.064734,94.439937,5478089,509270,524272.802045,97.856464,97.13836,2000000
1,A02,A02,1,2023-06-06/P03_R1.fastq.bz2,2023-06-06/P03_R2.fastq.bz2,D501,D703,l2Sp,human,293T-cys,Genotype-Time-Drug,WT-6,2,AGGCTATA,CGCTCATT,GGCTGCCATGCTGTCACG,5362565,281781,5026667,5308448,98.990837,94.691838,5244422,506781,524264.267492,97.796894,96.66518,2000000
2,A03,A03,1,2023-06-06/P03_R1.fastq.bz2,2023-06-06/P03_R2.fastq.bz2,D501,D703,l3Sp,human,293T-cys,Genotype-Time-Drug,WT-48,3,AGGCTATA,CGCTCATT,GGCTGCCATGCTGCGA,416891,97926,304409,402335,96.50844,75.660581,407214,179506,283156.76116,97.67877,63.394566,407214


In [7]:
### Align reads to database of reference tRNAs ###
align_obj = SWIPE_align(dir_dict, tRNA_database_masked, sample_df, SWIPE_score_mat2, \
                        gap_penalty=6, extension_penalty=3, min_score_align=15, \
                        common_seqs=common_seqs, overwrite_dir=False)
sample_df = align_obj.run_parallel(n_jobs=4, overwrite=False, load_previous=True)
sample_df.head(3)

Using common sequences to prevent duplicated alignment.
Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/Alicia_2023-06-06/data/SWalign
Loaded results from previous run... Not running alignment.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,N_after_trim,N_UMI_observed,N_UMI_expected,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_after_downsample,N_mapped,percent_single_annotation,percent_multiple_annotation,percent_multiple_codons,Mapping_percent
0,A01,A01,1,2023-06-06/P03_R1.fastq.bz2,2023-06-06/P03_R2.fastq.bz2,D501,D703,l1Sp,human,293T-cys @&@ 293T-sergly @&@ 293T-met @&@ 293T...,Genotype-Time-Drug,WT-0,1,AGGCTATA,CGCTCATT,GGCTGCCATGCGACTA,5598086,308346,5237383,5545729,99.064734,94.439937,5478089,509270,524272.802045,97.856464,97.13836,2000000,1902411,65.62809,34.37191,5.552533,95.12055
1,A02,A02,1,2023-06-06/P03_R1.fastq.bz2,2023-06-06/P03_R2.fastq.bz2,D501,D703,l2Sp,human,293T-cys,Genotype-Time-Drug,WT-6,2,AGGCTATA,CGCTCATT,GGCTGCCATGCTGTCACG,5362565,281781,5026667,5308448,98.990837,94.691838,5244422,506781,524264.267492,97.796894,96.66518,2000000,1835940,65.38084,34.61916,5.512544,91.797
2,A03,A03,1,2023-06-06/P03_R1.fastq.bz2,2023-06-06/P03_R2.fastq.bz2,D501,D703,l3Sp,human,293T-cys,Genotype-Time-Drug,WT-48,3,AGGCTATA,CGCTCATT,GGCTGCCATGCTGCGA,416891,97926,304409,402335,96.50844,75.660581,407214,179506,283156.76116,97.67877,63.394566,407214,383146,76.832069,23.167931,11.057665,94.089594


In [8]:
### Collect alignment statistics ###
stats_obj = STATS_collection(dir_dict, tRNA_data, sample_df, common_seqs=common_seqs, \
                             overwrite_dir=False)
stats_df = stats_obj.run_parallel(n_jobs=8, load_previous=True)
stats_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/Alicia_2023-06-06/data/stats_collection
Loaded results from previous run... Not running stats collection.


Unnamed: 0,sample_name_unique,sample_name,replicate,barcode,species,tRNA_annotation,tRNA_annotation_len,unique_annotation,5p_cover,align_3p_nt,codon,anticodon,amino_acid,align_gap,fmax_score>0.9,count,UMIcount,UMI_percent_exp
0,A01,A01,1,l1Sp,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,False,False,3,3,100.000191
1,A01,A01,1,l1Sp,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,False,True,39,39,100.003624
2,A01,A01,1,l1Sp,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,True,False,16,16,100.001431


In [9]:
### Generate standard tRNAseq data plots ###
plot_obj = TRNA_plot(dir_dict, sample_df, overwrite_dir=False, pull_default=False)

Folder exists and overwrite set to false... Doing nothing.


In [10]:
# Ecoli control:
plot_obj.plot_Ecoli_ctr(min_obs=100)

In [11]:
# Codon abundance barchart:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_abundance_grp', \
                        group=True, min_obs=500)

# Codon charge barchart:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_charge_grp', \
                        group=True, min_obs=500, charge_plot=True)


Now plotting sample/group:  293T-cys  293T-his  293T-lys  293T-met  293T-phe  293T-sergly  293T-tyr  HCT-cys  HCT-his  HCT-lys  HCT-met  HCT-phe  HCT-sergly  HCT-tyr
Now plotting sample/group:  293T-cys  293T-his  293T-lys  293T-met  293T-phe  293T-sergly  293T-tyr  HCT-cys  HCT-his  HCT-lys  HCT-met  HCT-phe  HCT-sergly  HCT-tyr

In [12]:
# Abundance correlation:
plot_obj.plot_abundance_corr(sample_unique_pairs=[['A01', 'A39'], \
                                                  ['A20', 'A58']], \
                             plot_type='transcript', plot_name='tr_abundance_corr_WT-GCN2', \
                             min_obs=500, charge_plot=False, log=True)


Now plotting sample pairs:  (A01 - A20)  (A39 - A58)

In [12]:
# Coverage plots for cyto/mito transcripts:
plot_obj.plot_coverage(compartment='cyto', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_cyto_behrens_norm', n_jobs=4)

plot_obj.plot_coverage(compartment='mito', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_mito_behrens_norm', n_jobs=4)


Now collecting data for sample:  A01  A05  A02  A03  A08  A06  A07  A09  A10  A11  A12  A13  A14  A15  A16  A17  A18  A19  A20  A21  A22  A23  A24  A25  A27  A26  A28  A29  A31  A30  A32  A34  A33  A35  A37  A36  A38  A41  A39  A40  A42  A43  A45  A44  A47  A46  A48  A49  A50  A51  A52  A53  A54  A57  A55  A56  A58  A59  A60  A61  A62  A63  A65  A64  A66  A67  A69  A68  A70  A71  A74  A72  A73  A75  A76
Now plotting sample:  A01  A02  A03  A05  A06  A07  A08  A09  A10  A11  A12  A13  A14  A15  A16  A17  A18  A19  A20  A21  A22  A23  A24  A25  A26  A27  A28  A29  A30  A31  A32  A33  A34  A35  A36  A37  A38  A39  A40  A41  A42  A43  A44  A45  A46  A47  A48  A49  A50  A51  A52  A53  A54  A55  A56  A57  A58  A59  A60  A61  A62  A63  A64  A65  A66  A67  A68  A69  A70  A71  A72  A73  A74  A75  A76
Now collecting data for sample:  A01  A02  A03  A05  A08  A07  A09  A06  A10  A11  A12  A13  A14  A15  A16  A17  A18  A20  A19  A21  A22  A23  A25  A24  A26  A27  A28  A29  A30  A31  A32  A34  A33

In [13]:
# UMI logo:
plot_obj.plot_UMI_logo(n_jobs=4)


Now collecting data for sample:  A01  A02  A03  A05  A08  A07  A09  A06  A10  A11  A12  A13  A14  A16  A15  A17  A18  A19  A20  A21  A22  A23  A24  A25  A26  A27  A28  A29  A30  A31  A32  A33  A34  A38  A35  A36  A37  A39  A41  A40  A42  A43  A44  A45  A46  A47  A49  A48  A50  A51  A53  A52  A54  A55  A56  A57  A58  A59  A60  A61  A62  A63  A65  A64  A66  A67  A69  A70  A68  A71  A73  A72  A74  A75  A76
Now plotting logo plot.

In [14]:
# Non-template nucleotides:
plot_obj.plot_non_temp(end='5p', plot_name='_5p-non-template_logo', \
                       seq_len_percentile=99, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_logo', \
                       seq_len_percentile=99.9, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_3p-cover_logo', \
                       seq_len_percentile=99.9, _3p_cover=True, n_jobs=4)


Now collecting data for sample:  A01  A05  A02  A03  A08  A07  A09  A06  A10  A11  A12  A13  A14  A15  A16  A17  A18  A20  A19  A21  A22  A23  A24  A25  A26  A27  A28  A30  A29  A31  A32  A33  A34  A38  A36  A35  A37  A39  A40  A41  A42  A43  A44  A46  A45  A48  A47  A50  A49  A51  A52  A53  A54  A55  A56  A57  A58  A60  A59  A61  A62  A64  A63  A65  A66  A67  A68  A70  A69  A71  A72  A73  A74  A75  A76
Now plotting logo plot.
Now collecting data for sample:  A01  A05  A02  A03  A08  A09  A07  A06  A10  A12  A11  A13  A14  A15  A17  A16  A18  A19  A21  A20  A22  A23  A24  A25  A26  A27  A28  A29  A30  A31  A32  A33  A34  A38  A35  A36  A37  A41  A39  A40  A42  A44  A43  A45  A46  A47  A49  A48  A50  A51  A53  A52  A54  A55  A56  A57  A58  A59  A60  A61  A62  A63  A65  A64  A66  A67  A68  A69  A70  A71  A73  A74  A72  A75  A76
Now plotting logo plot.
Now collecting data for sample:  A01  A03  A02  A05  A08  A07  A09  A06  A10  A11  A13  A12  A14  A16  A15  A17  A18  A19  A20  A21  A22 

In [10]:
# Export data:
plot_obj.write_charge_df(df_type='aa', fnam='charge-df_aa_2023-06-06')
plot_obj.write_charge_df(df_type='codon', fnam='charge-df_codon_2023-06-06')
plot_obj.write_charge_df(df_type='transcript', fnam='charge-df_transcript_2023-06-06')

In [13]:
### Perform transcript mutation analysis ###
TM_obj = TM_analysis(dir_dict, sample_df, tRNA_database, pull_default=False, \
                     common_seqs=common_seqs, ignore_common_count=False, \
                     overwrite_dir=False)

Using common sequences...
Folder exists and overwrite set to false... Doing nothing.


In [14]:
# Find mutations and save them:
if False:
    TM_obj.find_muts(n_jobs=4, unique_anno=True)
    TM_obj.pickle_muts_write(pickle_name='saved_muts_unique-anno.pickle')
else:
    TM_obj.pickle_muts_read(pickle_name='saved_muts_unique-anno.pickle')

In [15]:
# Plot mutation/gap/RT stops for methionine depleted samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Met_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A01', 'A20', 'A20', \
                                                         'A39', 'A39', 'A58', 'A58'], \
                                                        ['A10', 'A11', 'A29', 'A30', \
                                                         'A48', 'A49', 'A67', 'A68']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Met_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A01', 'A20', 'A20', \
                                                         'A39', 'A39', 'A58', 'A58'], \
                                                        ['A10', 'A11', 'A29', 'A30', \
                                                         'A48', 'A49', 'A67', 'A68']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Met_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A01', 'A20', 'A20', \
                                                         'A39', 'A39', 'A58', 'A58'], \
                                                        ['A10', 'A11', 'A29', 'A30', \
                                                         'A48', 'A49', 'A67', 'A68']], \
                                   topN=10, topN_select='max_diff')

In [17]:
# Plot mutation/gap/RT stops for lysine depleted samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Lys_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A13', 'A32', 'A51', 'A70']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Lys_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A13', 'A32', 'A51', 'A70']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Lys_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A13', 'A32', 'A51', 'A70']], \
                                   topN=10, topN_select='max_diff')

In [19]:
# Plot mutation/gap/RT stops for histidine depleted samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='His_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A15', 'A34', 'A53', 'A72']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='His_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A15', 'A34', 'A53', 'A72']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='His_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A15', 'A34', 'A53', 'A72']], \
                                   topN=10, topN_select='max_diff')

In [20]:
# Plot mutation/gap/RT stops for tyrosine depleted samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Tyr_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A17', 'A36', 'A55', 'A74']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Tyr_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A17', 'A36', 'A55', 'A74']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Tyr_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A17', 'A36', 'A55', 'A74']], \
                                   topN=10, topN_select='max_diff')

In [21]:
# Plot mutation/gap/RT stops for phenylalanine depleted samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Phe_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A19', 'A38', 'A57', 'A76']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Phe_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A19', 'A38', 'A57', 'A76']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='Phe_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A19', 'A38', 'A57', 'A76']], \
                                   topN=10, topN_select='max_diff')

In [22]:
# Plot mutation/gap/RT stops for serine/glycine depleted, plus rotenone, samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='SerGlyRot_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A09', 'A28', 'A47', 'A66']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='SerGlyRot_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A09', 'A28', 'A47', 'A66']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='SerGlyRot_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A09', 'A28', 'A47', 'A66']], \
                                   topN=10, topN_select='max_diff')

In [26]:
# Plot mutation/gap/RT stops for cysteine depleted, plus ferrostatin, samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='CysFer_tr-mut_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A05', 'A24', 'A43', 'A62']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='CysFer_tr-gap_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A05', 'A24', 'A43', 'A62']], \
                                   topN=10, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='CysFer_tr-RTstops_matrix_comp_top10-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=500, \
                                   sample_unique_pairs=[['A01', 'A20', 'A39', 'A58'], \
                                                        ['A05', 'A24', 'A43', 'A62']], \
                                   topN=10, topN_select='max_diff')

In [24]:
# Export the mutation/gap/RT stops data:
TM_obj.write_transcript_mut(data_type='mut', csv_name='tr-mut_matrix')
TM_obj.write_transcript_mut(data_type='gap', csv_name='tr-gap_matrix')
TM_obj.write_transcript_mut(data_type='RTstops', csv_name='tr-RTstops_matrix')