In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, shutil, bz2, copy
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', 50)
import numpy as np

### Plotting imports ###
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.colors as mcolors
import matplotlib as mpl
from matplotlib.patches import StepPatch
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec
import logomaker as lm
palette = list(mcolors.TABLEAU_COLORS.keys())
sns.set_theme(style="ticks", palette="muted")
sns.set_context("talk")
%matplotlib inline

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
# Navigate back to NBdir in case of re-running a code block:
if not 'NBdir' in globals():
    NBdir = os.getcwd()
print('Notebook is in: {}'.format(NBdir))
os.chdir(NBdir)  # If you changed the current working dir, this will take you back to the notebook dir.

# Define the path to the repo folder.
# Change if necessary.
homedir = '/'.join(NBdir.split('/')[0:-2])
print('Repo is in: {}'.format(homedir))
sys.path.insert(1, homedir)
from src.misc import index_to_sample_df, downsample_raw_input, read_tRNAdb_info, sample_df_to_dict
from src.read_processing import AR_merge, BC_split, Kmer_analysis, BC_analysis, UMI_trim
from src.alignment import SWIPE_align
from src.stats_collection import STATS_collection
from src.plotting import TRNA_plot
from src.transcript_mutations import TM_analysis

# These are default folder names for data and raw fastq files
# relative to the folder in which this notebook is in:
data_dir = 'data'
seq_dir = 'raw_fastq'
seq_dir_noDS = seq_dir # Not downsampled

# These folder names are used in subsequent processing steps
# to dump data. Best to not change:
AdapterRemoval_dir = 'AdapterRemoval'
BC_dir = 'BC_split'
UMI_dir = 'UMI_trimmed'
align_dir = 'SWalign'
stats_dir = 'stats_collection'
TM_dir = 'transcript_mutations'
plotting_dir = 'plotting'
tRNA_database = dict()
tRNA_database['human'] = '{}/tRNA_database/human/hg38-tRNAs.fa'.format(homedir)
tRNA_database['mouse'] = '{}/tRNA_database/mouse/mm10-tRNAs.fa'.format(homedir)
tRNA_database_masked = dict()
tRNA_database_masked['human'] = '{}/tRNA_database_masked/human/human-tRNAs.fa'.format(homedir)
tRNA_database_masked['mouse'] = '{}/tRNA_database_masked/mouse/mouse-tRNAs.fa'.format(homedir)
# Read information (length, codon etc) of tRNAs into dictionary:
tRNA_data = read_tRNAdb_info(tRNA_database)
SWIPE_score_mat = '{}/utils/nuc_score-matrix.txt'.format(homedir)
SWIPE_score_mat2 = '{}/utils/nuc_score-matrix_2.txt'.format(homedir) # For masked reference sequences
# tRNA sequencing yields many duplicated reads.
# Adding these commonly seen sequences to a list prevents duplicated alignment:
common_seqs = '{}/utils/common-seqs.fasta.bz2'.format(homedir)

# Define minimum read length based on minimum insert size:
MIN_INSERT_LEN = 10
UMI_LEN = 10
BC_MAX_LEN = 5
MIN_READ_LEN = MIN_INSERT_LEN + UMI_LEN + BC_MAX_LEN
print('Using minimum read length: {} (after merge)'.format(MIN_READ_LEN))

# Read index information:
index_list_fnam = 'index_list.xlsx'
index_df = pd.read_excel('{}/utils/{}'.format(homedir, index_list_fnam))

Notebook is in: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen
Repo is in: /home/sulab/tRNA-charge-seq
Using minimum read length: 25 (after merge)


In [3]:
### Input settings ###
sample_list_fnam = 'sample_list.xlsx'
sample_df = pd.read_excel('{}/{}'.format(NBdir, sample_list_fnam))
# Add barcode sequences:
sample_df = index_to_sample_df(sample_df, index_df)
# Read elementary info (replicate, barcode, species)
# for each unique sample name into a dictionary:
sample_dict = sample_df_to_dict(sample_df)
# Get filenames from the sample information:
inp_file_df = sample_df[['fastq_mate1_filename', 'fastq_mate2_filename', 'P5_index', 'P7_index', 'P5_index_seq', 'P7_index_seq']].copy().drop_duplicates().reset_index(drop=True)

# Downsample:
if False:
    sample_df, inp_file_df, seq_dir = downsample_raw_input(sample_df, inp_file_df, NBdir, data_dir, seq_dir_noDS, downsample_absolute=1e4)

# Make a dictionary with paths used for data processing:
dir_dict = dict(NBdir = NBdir,
                data_dir = data_dir,
                seq_dir = seq_dir,
                AdapterRemoval_dir = AdapterRemoval_dir,
                BC_dir = BC_dir,
                UMI_dir = UMI_dir,
                align_dir = align_dir,
                stats_dir = stats_dir,
                TM_dir = TM_dir,
                plotting_dir = plotting_dir)

In [4]:
### Run AdapterRemoval ###
AR_obj = AR_merge(dir_dict, inp_file_df, MIN_READ_LEN, overwrite_dir=False, \
                  check_input=False)
inp_file_df = AR_obj.run_parallel(n_jobs=2, overwrite=False)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/AdapterRemoval


In [5]:
### Split files based on barcodes ###
BCsplit_obj = BC_split(dir_dict, sample_df, inp_file_df, overwrite_dir=False)
sample_df, inp_file_df = BCsplit_obj.run_parallel(n_jobs=6, load_previous=True)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/BC_split
Loaded results from previous run... Not running barcode split.


In [6]:
### Split files based on barcodes ###
UMItrim_obj = UMI_trim(dir_dict, sample_df, overwrite_dir=False, \
                       downsample_absolute=2.5e6)
sample_df = UMItrim_obj.run_parallel(n_jobs=4, load_previous=True)
sample_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/UMI_trimmed
Loaded results from previous run... Not running UMI trimming.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_after_trim,N_UMI_expected,N_after_downsample,N_UMI_observed
0,L-1,L,1,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l1,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,TAAGGCAT,3732885,1727039,1639607,3366646,90.188849,48.701497,98.663848,94.441311,3683008,523821.61594,2500000,494704
1,L-2,L,2,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l2,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,CGTACTCG,4737211,1714169,2726081,4440250,93.731312,61.394764,98.671497,96.224165,4674277,524217.591635,2500000,504424
2,L-1+2,L,3,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l3,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,ATGCAGTA,4910788,1292825,3318665,4611490,93.905296,71.965135,98.640524,97.015269,4844027,524237.065559,2500000,508590


In [7]:
### Align reads to database of reference tRNAs ###
align_obj = SWIPE_align(dir_dict, tRNA_database_masked, sample_df, SWIPE_score_mat2, \
                        gap_penalty=6, extension_penalty=3, min_score_align=15, \
                        overwrite_dir=False)
sample_df = align_obj.run_parallel(n_jobs=6, overwrite=False, load_previous=True)
sample_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/SWalign
Loaded results from previous run... Not running alignment.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_after_trim,N_UMI_expected,N_after_downsample,N_UMI_observed,N_mapped,percent_single_annotation,percent_multiple_annotation,percent_multiple_codons,Mapping_percent
0,L-1,L,1,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l1,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,TAAGGCAT,3732885,1727039,1639607,3366646,90.188849,48.701497,98.663848,94.441311,3683008,523821.61594,2500000,494704,2320697,88.412964,11.587036,0.718965,63.010914
1,L-2,L,2,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l2,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,CGTACTCG,4737211,1714169,2726081,4440250,93.731312,61.394764,98.671497,96.224165,4674277,524217.591635,2500000,504424,2394116,88.263016,11.736984,0.782168,51.218959
2,L-1+2,L,3,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l3,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,ATGCAGTA,4910788,1292825,3318665,4611490,93.905296,71.965135,98.640524,97.015269,4844027,524237.065559,2500000,508590,2396555,87.979871,12.020129,0.798938,49.474435


In [8]:
### Collect alignment statistics ###
stats_obj = STATS_collection(dir_dict, tRNA_data, sample_df, \
                             overwrite_dir=False)
stats_df = stats_obj.run_parallel(n_jobs=6, load_previous=True)
stats_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/stats_collection
Loaded results from previous run... Not running stats collection.


Unnamed: 0,sample_name_unique,sample_name,replicate,barcode,species,tRNA_annotation,tRNA_annotation_len,unique_annotation,5p_cover,align_3p_nt,codon,anticodon,amino_acid,align_gap,fmax_score>0.9,count,UMIcount,UMI_percent_exp
0,L-1,L,1,l1,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,False,False,337,337,100.032047
1,L-1,L,1,l1,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,False,True,4270,4270,100.407676
2,L-1,L,1,l1,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,True,False,317,317,100.030139


In [11]:
### Generate standard tRNAseq data plots ###
plot_obj = TRNA_plot(dir_dict, sample_df, overwrite_dir=False, pull_default=False)

In [13]:
# Ecoli control:
plot_obj.plot_Ecoli_ctr(min_obs=100)

In [14]:
# Codon abundance barchart:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_abundance_grp', \
                        group=True, min_obs=500)

# Codon charge barchart:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_charge_grp', \
                        group=True, min_obs=500, charge_plot=True)


Now plotting sample/group:  143B-ETCinhib  Atpenin-rescue  CytB-test  FCCP-test  Fix-test  H1299-ETCinhib  H1299-rotenone  Liver-tissue  Muscle-tissue  NaCl-Control  Pilot-exp  Tumor-tissue  UCPH-titration
Now plotting sample/group:  143B-ETCinhib  Atpenin-rescue  CytB-test  FCCP-test  Fix-test  H1299-ETCinhib  H1299-rotenone  Liver-tissue  Muscle-tissue  NaCl-Control  Pilot-exp  Tumor-tissue  UCPH-titration

In [15]:
# Coverage plots for cyto/mito transcripts:
plot_obj.plot_coverage(compartment='cyto', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_cyto_behrens_norm', n_jobs=12)

plot_obj.plot_coverage(compartment='mito', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_mito_behrens_norm', n_jobs=12)


Now collecting data for sample:  L-1  L-2  L-1+2  A-2  A-1+2  Rich-1  Rich-2  Rich-1+2  A-NaCl  L-NaCl  Rich-NaCl  A-1  U-1  Fix2  0U2  U-NaCl  2U2  FT  U-2  Fix3  Fix1  U-1+2  2U1  0U1  HAV2  HAV1  8U1  HVV1  4U1  HVV2  4U2  8U2  HVS1  HVP2  HVS2  HVP1  HAS1  HCV1  HAS2  HCV2  HCS1  HR30S1  HR30P1  HR30V2  HR30P2  HCS2  HR30V1  HR30S2  BVV1  BVV2  BVS1  BVS2  BVP1  HR20V1  HR40V1  HR20S1  BVP2  HR20S2  HR20V2  HR20P1  HR40P1  HR20P2  HR40S2  HR40S1  HR80P2  HR40V2  HR80S1  HR40P2  HR80V2  HR80V1  HR80P1  HR80S2  Tu2  Tu1  Mu2  Tu3  Li3  Tu4  Li1  Li2  Li4  Mu1  Mu4  Mu3  BAV1  BAV2  BVR1  BVR2  BAR2  BAR1  CyP  CyA
Now plotting sample:  L-1  L-2  L-1+2  L-NaCl  A-1  A-2  A-1+2  A-NaCl  Rich-1  Rich-2  Rich-1+2  Rich-NaCl  U-1  U-2  U-1+2  U-NaCl  Fix1  Fix2  Fix3  FT  0U1  0U2  2U1  2U2  4U1  4U2  8U1  8U2  HVV1  HVV2  HVS1  HVS2  HVP1  HVP2  HAV1  HAV2  HAS1  HAS2  HCV1  HCV2  HCS1  HCS2  HR30V1  HR30V2  HR30S1  HR30S2  HR30P1  HR30P2  BVV1  BVV2  BVS1  BVS2  BVP1  BVP2  HR20V1  HR2

In [16]:
# UMI logo:
plot_obj.plot_UMI_logo(n_jobs=4)


Now collecting data for sample:  L-1  L-2  L-1+2  L-NaCl  A-NaCl  A-1  A-2  A-1+2  Rich-NaCl  Rich-1  Rich-2  Rich-1+2  U-1  U-1+2  U-2  U-NaCl  Fix1  Fix2  Fix3  FT  0U1  0U2  2U2  2U1  4U1  8U2  4U2  8U1  HVV1  HVS1  HVS2  HVV2  HVP1  HVP2  HAV1  HAS1  HAV2  HAS2  HCV1  HCV2  HCS1  HCS2  HR30V1  HR30S1  HR30V2  HR30S2  HR30P2  HR30P1  BVV1  BVP1  BVS2  BVV2  BVS1  BVP2  HR20V1  HR20S1  HR20S2  HR20V2  HR20P1  HR40S1  HR20P2  HR40V1  HR40V2  HR40S2  HR40P1  HR40P2  HR80V1  HR80P1  HR80V2  HR80S1  HR80S2  Tu2  HR80P2  Tu1  Tu3  Tu4  Li1  Li2  Li3  Li4  Mu1  Mu2  BAV1  Mu3  Mu4  BVR1  BAV2  BVR2  BAR1  BAR2  CyP  CyA
Now plotting logo plot.

In [17]:
# Non-template nucleotides:
plot_obj.plot_non_temp(end='5p', plot_name='_5p-non-template_logo', \
                       seq_len_percentile=99, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_logo', \
                       seq_len_percentile=99.9, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_3p-cover_logo', \
                       seq_len_percentile=99.9, _3p_cover=True, n_jobs=4)


Now collecting data for sample:  L-1  L-2  L-1+2  L-NaCl  A-1  A-2  A-NaCl  A-1+2  Rich-1  Rich-NaCl  Rich-2  Rich-1+2  U-1  U-2  U-1+2  U-NaCl  Fix1  Fix2  FT  Fix3  0U2  0U1  2U1  2U2  4U2  8U1  4U1  8U2  HVV1  HVV2  HVS1  HVS2  HVP1  HAV1  HVP2  HAV2  HAS1  HAS2  HCV1  HCV2  HCS1  HR30V1  HCS2  HR30V2  HR30S1  HR30S2  HR30P1  BVV1  HR30P2  BVS1  BVS2  BVV2  BVP1  BVP2  HR20V1  HR20S1  HR20V2  HR20S2  HR20P1  HR40S1  HR20P2  HR40V1  HR40V2  HR40S2  HR40P1  HR80V1  HR40P2  HR80S2  HR80S1  HR80V2  HR80P1  Tu1  HR80P2  Tu2  Tu3  Li1  Tu4  Li2  Li3  Mu1  Li4  Mu2  Mu4  Mu3  BAV1  BAV2  BVR1  BVR2  BAR1  BAR2  CyP  CyA
Now plotting logo plot.
Now collecting data for sample:  L-1  L-2  L-NaCl  L-1+2  A-1  A-2  A-NaCl  A-1+2  Rich-1  Rich-NaCl  Rich-2  Rich-1+2  U-1  U-1+2  U-2  U-NaCl  Fix1  Fix2  FT  Fix3  0U1  0U2  2U1  2U2  4U1  8U1  4U2  8U2  HVV1  HVV2  HVS1  HVS2  HVP1  HAV1  HVP2  HAV2  HAS1  HAS2  HCV1  HCV2  HCS1  HCS2  HR30V1  HR30V2  HR30S1  HR30S2  HR30P1  HR30P2  BVV1  BVP1  

In [9]:
### Perform transcript mutation analysis ###
TM_obj = TM_analysis(dir_dict, sample_df, tRNA_database, pull_default=False, \
                     overwrite_dir=False)

Folder exists and overwrite set to false... Doing nothing.


In [10]:
# Find mutations and save them:
if False:
    TM_obj.find_muts(n_jobs=8, unique_anno=True)
    TM_obj.pickle_muts_write(pickle_name='saved_muts_unique-anno.pickle')
else:
    TM_obj.pickle_muts_read(pickle_name='saved_muts_unique-anno.pickle')

In [11]:
# Plot mutation/gap/RT stops for Atpenin rescue:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='143B-Atp-Rot-rescue_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['BVP', 'BVP', 'BVP', 'BAV', 'BAV', 'BVR'], \
                                                 ['BAV', 'BVR', 'BAR', 'BVR', 'BAR', 'BAR']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='143B-Atp-Rot-rescue_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['BVP', 'BVP', 'BVP', 'BAV', 'BAV', 'BVR'], \
                                                 ['BAV', 'BVR', 'BAR', 'BVR', 'BAR', 'BAR']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='143B-Atp-Rot-rescue_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['BVP', 'BVP', 'BVP', 'BAV', 'BAV', 'BVR'], \
                                                 ['BAV', 'BVR', 'BAR', 'BVR', 'BAR', 'BAR']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

In [12]:
# Plot mutation/gap/RT stops for tRNA pilot experiment:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='pilot-exp_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['Rich', 'Rich', 'Rich'], \
                                                 ['L',    'A',    'U']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='pilot-exp_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['Rich', 'Rich', 'Rich'], \
                                                 ['L',    'A',    'U']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='pilot-exp_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['Rich', 'Rich', 'Rich'], \
                                                 ['L',    'A',    'U']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

In [13]:
# Plot mutation/gap/RT stops for UCPH titration:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='UCPH_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['0U', '0U', '0U'], \
                                                 ['2U', '4U', '8U']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='UCPH_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['0U', '0U', '0U'], \
                                                 ['2U', '4U', '8U']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='UCPH_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['0U', '0U', '0U'], \
                                                 ['2U', '4U', '8U']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

In [18]:
# Plot mutation/gap/RT stops for H1299 rotenone titration:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='H1299-rot-tit_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['HVV',   'HVV',   'HVV',   'HVV',   'HVV'], \
                                                 ['HR20V', 'HR30V', 'HR40V', 'HR80V', 'HR20P']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='H1299-rot-tit_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['HVV',   'HVV',   'HVV',   'HVV',   'HVV'], \
                                                 ['HR20V', 'HR30V', 'HR40V', 'HR80V', 'HR20P']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='H1299-rot-tit_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['HVV',   'HVV',   'HVV',   'HVV',   'HVV'], \
                                                 ['HR20V', 'HR30V', 'HR40V', 'HR80V', 'HR20P']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

In [21]:
# Plot mutation/gap/RT stops for H1299 rotenone, vehicle vs. pyruvate:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='H1299-rot-pyr_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['HVV', 'HR20V', 'HR30V', 'HR40V', 'HR80V'], \
                                                 ['HVP', 'HR20P', 'HR30P', 'HR40P', 'HR80P']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='H1299-rot-pyr_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['HVV', 'HR20V', 'HR30V', 'HR40V', 'HR80V'], \
                                                 ['HVP', 'HR20P', 'HR30P', 'HR40P', 'HR80P']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='H1299-rot-pyr_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['HVV', 'HR20V', 'HR30V', 'HR40V', 'HR80V'], \
                                                 ['HVP', 'HR20P', 'HR30P', 'HR40P', 'HR80P']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')