In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, shutil, bz2, copy
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', 50)
import numpy as np

### Plotting imports ###
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.colors as mcolors
import matplotlib as mpl
from matplotlib.patches import StepPatch
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec
import logomaker as lm
palette = list(mcolors.TABLEAU_COLORS.keys())
sns.set_theme(style="ticks", palette="muted")
sns.set_context("talk")
%matplotlib inline

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
# Navigate back to NBdir in case of re-running a code block:
if not 'NBdir' in globals():
    NBdir = os.getcwd()
print('Notebook is in: {}'.format(NBdir))
os.chdir(NBdir)  # If you changed the current working dir, this will take you back to the notebook dir.

# Define the path to the repo folder.
# Change if necessary.
homedir = '/'.join(NBdir.split('/')[0:-2])
print('Repo is in: {}'.format(homedir))
sys.path.insert(1, homedir)
from src.misc import index_to_sample_df, downsample_raw_input, read_tRNAdb_info, sample_df_to_dict
from src.read_processing import AR_merge, BC_split, Kmer_analysis, BC_analysis, UMI_trim
from src.alignment import SWIPE_align
from src.stats_collection import STATS_collection
from src.plotting import TRNA_plot
from src.transcript_mutations import TM_analysis

# These are default folder names for data and raw fastq files
# relative to the folder in which this notebook is in:
data_dir = 'data'
seq_dir = 'raw_fastq'
seq_dir_noDS = seq_dir # Not downsampled

# These folder names are used in subsequent processing steps
# to dump data. Best to not change:
AdapterRemoval_dir = 'AdapterRemoval'
BC_dir = 'BC_split'
UMI_dir = 'UMI_trimmed'
align_dir = 'SWalign'
stats_dir = 'stats_collection'
TM_dir = 'transcript_mutations'
plotting_dir = 'plotting'
tRNA_database = dict()
tRNA_database['human'] = '{}/tRNA_database/human/hg38-tRNAs.fa'.format(homedir)
tRNA_database_masked = dict()
tRNA_database_masked['human'] = '{}/tRNA_database_masked/human/human-tRNAs.fa'.format(homedir)
# Read information (length, codon etc) of tRNAs into dictionary:
tRNA_data = read_tRNAdb_info(tRNA_database)
SWIPE_score_mat = '{}/utils/nuc_score-matrix.txt'.format(homedir)
SWIPE_score_mat2 = '{}/utils/nuc_score-matrix_2.txt'.format(homedir) # For masked reference sequences
# tRNA sequencing yields many duplicated reads.
# Adding these commonly seen sequences to a list prevents duplicated alignment:
common_seqs = '{}/utils/common-seqs.fasta.bz2'.format(homedir)

# Define minimum read length based on minimum insert size:
MIN_INSERT_LEN = 10
UMI_LEN = 10
BC_MAX_LEN = 19
MIN_READ_LEN = MIN_INSERT_LEN + UMI_LEN + BC_MAX_LEN
print('Using minimum read length: {} (after merge)'.format(MIN_READ_LEN))

# Read index information:
index_list_fnam = 'index_list.xlsx'
index_df = pd.read_excel('{}/utils/{}'.format(homedir, index_list_fnam))

Notebook is in: /home/sulab/tRNA-charge-seq/projects/tRNAseq_mtests
Repo is in: /home/sulab/tRNA-charge-seq
Using minimum read length: 39 (after merge)


In [3]:
### Input settings ###
sample_list_fnam = 'sample_list.xlsx'
sample_df = pd.read_excel('{}/{}'.format(NBdir, sample_list_fnam))
# Add barcode sequences:
sample_df = index_to_sample_df(sample_df, index_df)
# Read elementary info (replicate, barcode, species)
# for each unique sample name into a dictionary:
sample_dict = sample_df_to_dict(sample_df)
# Get filenames from the sample information:
inp_file_df = sample_df[['fastq_mate1_filename', 'fastq_mate2_filename', 'P5_index', 'P7_index', 'P5_index_seq', 'P7_index_seq']].copy().drop_duplicates().reset_index(drop=True)

# Downsample:
if False:
    sample_df, inp_file_df, seq_dir = downsample_raw_input(sample_df, inp_file_df, NBdir, data_dir, seq_dir_noDS, downsample_absolute=1e4)

# Make a dictionary with paths used for data processing:
dir_dict = dict(NBdir = NBdir,
                data_dir = data_dir,
                seq_dir = seq_dir,
                AdapterRemoval_dir = AdapterRemoval_dir,
                BC_dir = BC_dir,
                UMI_dir = UMI_dir,
                align_dir = align_dir,
                stats_dir = stats_dir,
                TM_dir = TM_dir,
                plotting_dir = plotting_dir)

In [4]:
### Run AdapterRemoval ###
AR_obj = AR_merge(dir_dict, inp_file_df, MIN_READ_LEN, overwrite_dir=False, \
                  check_input=False)
inp_file_df = AR_obj.run_parallel(n_jobs=2, overwrite=False)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_mtests/data/AdapterRemoval


In [5]:
### Split files based on barcodes ###
BCsplit_obj = BC_split(dir_dict, sample_df, inp_file_df, overwrite_dir=False)
sample_df, inp_file_df = BCsplit_obj.run_parallel(n_jobs=6, load_previous=True)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_mtests/data/BC_split
Loaded results from previous run... Not running barcode split.


In [6]:
### Generate UMI stats and write final trimmed tRNA sequences ###
UMItrim_obj = UMI_trim(dir_dict, sample_df, overwrite_dir=False)
sample_df = UMItrim_obj.run_parallel(n_jobs=6, load_previous=True)
sample_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_mtests/data/UMI_trimmed
Loaded results from previous run... Not running UMI trimming.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,N_after_trim,N_UMI_observed,N_UMI_expected,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_after_downsample
0,100p1_v1,100p,1,2022-12-28/P1_R1.fastq.bz2,2022-12-28/P1_R2.fastq.bz2,D501,D701,l1Sp,human,Charge-titration,Percent charge,100p,1,AGGCTATA,ATTACTCG,GGCTGCCATGCGACTA,4882095,223688,4625327,4849015,99.322422,95.386939,4801265,515457,524232.737118,98.344358,98.325985,2000000
1,100p3_v2,100p,3,2022-12-28/P3_R1.fastq.bz2,2022-12-28/P3_R2.fastq.bz2,D501,D703,l2Sp,human,Charge-titration,Percent charge,100p,1,AGGCTATA,CGCTCATT,GGCTGCCATGCTGTCACG,3002203,149777,2828460,2978237,99.20172,94.970951,2951343,492411,522405.132752,98.305911,94.258454,2000000
2,0m_p1_v2,100p_0m,1,2023-02-28/P4_R1.fastq.bz2,2023-02-28/P4_R2.fastq.bz2,D502,D701,l5Sp,human,Charge-titration,Percent charge,100p_0m,2,GCCTCTAT,ATTACTCG,GGCTGCCATGCAACGCATC,2989616,163303,2798525,2961828,99.070516,94.486412,2932028,494676,522334.47346,98.073733,94.704835,2000000


In [7]:
### Align reads to database of reference tRNAs ###
align_obj = SWIPE_align(dir_dict, tRNA_database_masked, sample_df, SWIPE_score_mat2, \
                        gap_penalty=6, extension_penalty=3, min_score_align=15, \
                        common_seqs=common_seqs, overwrite_dir=False)
sample_df = align_obj.run_parallel(n_jobs=4, overwrite=False, load_previous=True)
sample_df.head(3)

Using common sequences to prevent duplicated alignment.
Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_mtests/data/SWalign
Loaded results from previous run... Not running alignment.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,N_after_trim,N_UMI_observed,N_UMI_expected,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_after_downsample,N_mapped,percent_single_annotation,percent_multiple_annotation,percent_multiple_codons,Mapping_percent
0,100p1_v1,100p,1,2022-12-28/P1_R1.fastq.bz2,2022-12-28/P1_R2.fastq.bz2,D501,D701,l1Sp,human,Charge-titration,Percent charge,100p,1,AGGCTATA,ATTACTCG,GGCTGCCATGCGACTA,4882095,223688,4625327,4849015,99.322422,95.386939,4801265,515457,524232.737118,98.344358,98.325985,2000000,1988871,78.933475,21.066525,2.359127,99.44355
1,100p3_v2,100p,3,2022-12-28/P3_R1.fastq.bz2,2022-12-28/P3_R2.fastq.bz2,D501,D703,l2Sp,human,Charge-titration,Percent charge,100p,1,AGGCTATA,CGCTCATT,GGCTGCCATGCTGTCACG,3002203,149777,2828460,2978237,99.20172,94.970951,2951343,492411,522405.132752,98.305911,94.258454,2000000,1977342,76.058416,23.941584,2.572494,98.8671
2,0m_p1_v2,100p_0m,1,2023-02-28/P4_R1.fastq.bz2,2023-02-28/P4_R2.fastq.bz2,D502,D701,l5Sp,human,Charge-titration,Percent charge,100p_0m,2,GCCTCTAT,ATTACTCG,GGCTGCCATGCAACGCATC,2989616,163303,2798525,2961828,99.070516,94.486412,2932028,494676,522334.47346,98.073733,94.704835,2000000,1975184,73.976349,26.023651,2.819383,98.7592


In [8]:
### Collect alignment statistics ###
stats_obj = STATS_collection(dir_dict, tRNA_data, sample_df, common_seqs=common_seqs, \
                             overwrite_dir=False)
stats_df = stats_obj.run_parallel(n_jobs=8, load_previous=True)
stats_df.head(3)

Using common sequences...
Collecting stats from:  100p1_v1  100p3_v2  0m_p1_v2  0m_p2_v2  0m_p3_v2  0m_p4_v2  85p1_v1  85p2_v1  70p2_v2  85p4_v1  85p2_v2  70p1_v1  85p1_v2  85p3_v2  70p1_v2  85p4_v2  70p5_v2  55p1_v1  70p3_v2  70p4_v2  55p3_v1  55p3_v2  55p2_v2  55p1_v2  40p2_v1  40p1_v1  55p4_v2  55p5_v2  40p2_v2  40p4_v1  40p1_v2  40p3_v2  25p1_v1  40p4_v2  25p1_v2  25p3_v2  25p5_v2  25p2_v2  25p4_v2  25p6_v2  10p1_v1  10p3_v1  10p4_v2  10p1_v2  0p1_v1  10p2_v2  10p3_v2  10p5_v2  0p4_v1  0p2_v1  0p1_v2  0p3_v2  0p2_v2  0p4_v2  0p_90m1_v1  0p_90m2_v1  0p_90m3_v1  0p_90m4_v1  0m_1  8m_1  4m_1  32m_1  16m_1  2h_1  1h_1  4h_1  8h_1  40h_NoOx_1  40h_1  16h_1  4m_2  0m_2  16m_2  8m_2  32m_2  1h_2  2h_2  8h_2  4h_2  16h_2  40h_NoOx_2  40h_2  0m_3  16m_3  8m_3  4m_3  32m_3  1h_3  4h_3  2h_3  8h_3  40h_3  16h_3  8m_4  40h_NoOx_3  0m_4  32m_4  4m_4  16m_4  1h_4  2h_4  4h_4  8h_4  16h_4  8h_p1  40h_NoOx_4  40h_4  8h_p2  8h_p4  8h_p3  8h_p5  8h_p7  8h_p8  8h_p9  8h_p6

Unnamed: 0,sample_name_unique,sample_name,replicate,barcode,species,tRNA_annotation,tRNA_annotation_len,unique_annotation,5p_cover,align_3p_nt,codon,anticodon,amino_acid,align_gap,fmax_score>0.9,count,UMIcount,UMI_percent_exp
0,100p1_v1,100p,1,l1Sp,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,False,False,1,1,100.0
1,100p1_v1,100p,1,l1Sp,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,False,True,50,49,98.00458
2,100p1_v1,100p,1,l1Sp,human,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,True,False,1,1,100.0


In [13]:
### Generate standard tRNAseq data plots ###
plot_obj = TRNA_plot(dir_dict, sample_df, overwrite_dir=False, pull_default=False)

In [14]:
# Ecoli control:
plot_obj.plot_Ecoli_ctr(min_obs=100)

In [15]:
# Codon abundance barchart:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_abundance_grp', \
                        group=True, min_obs=500)

# Codon charge barchart:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_charge_grp', \
                        group=True, min_obs=500, charge_plot=True)


Now plotting sample/group:  Acylation-half-life  Barcode test  Charge-titration
Now plotting sample/group:  Acylation-half-life  Barcode test  Charge-titration

In [17]:
# Coverage plots for cyto/mito transcripts:
plot_obj.plot_coverage(compartment='cyto', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_cyto_behrens_norm', n_jobs=12)

plot_obj.plot_coverage(compartment='mito', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_mito_behrens_norm', n_jobs=12)


Now collecting data for sample:  100p1_v1  100p3_v2  0m_p1_v2  0m_p4_v2  85p1_v1  85p2_v1  85p4_v1  85p1_v2  85p2_v2  85p3_v2  0m_p2_v2  0m_p3_v2  85p4_v2  70p1_v1  55p3_v1  55p1_v1  70p1_v2  70p5_v2  70p3_v2  55p1_v2  55p2_v2  55p3_v2  70p4_v2  70p2_v2  40p1_v1  55p5_v2  55p4_v2  40p2_v1  40p4_v1  40p2_v2  40p4_v2  40p3_v2  40p1_v2  25p1_v2  25p1_v1  25p2_v2  25p3_v2  25p6_v2  10p1_v1  25p4_v2  25p5_v2  10p3_v1  10p5_v2  10p2_v2  10p4_v2  10p1_v2  10p3_v2  0p1_v1  0p2_v1  0p1_v2  0p4_v1  0p3_v2  0p2_v2  0p4_v2  4m_1  0p_90m1_v1  0m_1  0p_90m2_v1  0p_90m4_v1  0p_90m3_v1  8m_1  16m_1  32m_1  40h_1  2h_1  1h_1  8h_1  4h_1  16h_1  40h_NoOx_1  4m_2  0m_2  8m_2  32m_2  16m_2  40h_NoOx_2  2h_2  4h_2  0m_3  16h_2  1h_2  40h_2  8h_2  4m_3  8m_3  16m_3  32m_3  1h_3  2h_3  8h_3  4h_3  40h_NoOx_3  40h_3  16h_3  0m_4  4m_4  8m_4  16m_4  1h_4  32m_4  2h_4  16h_4  4h_4  8h_4  40h_4  8h_p1  40h_NoOx_4  8h_p2  8h_p3  8h_p5  8h_p4  8h_p6  8h_p7  8h_p8  8h_p9
Now plotting sample:  100p1_v1  100p3_v2  0

In [18]:
# UMI logo:
plot_obj.plot_UMI_logo(n_jobs=4)


Now collecting data for sample:  100p1_v1  100p3_v2  0m_p1_v2  0m_p2_v2  0m_p3_v2  0m_p4_v2  85p1_v1  85p2_v1  85p4_v1  85p1_v2  85p2_v2  85p3_v2  85p4_v2  70p1_v2  70p1_v1  70p2_v2  70p3_v2  70p5_v2  70p4_v2  55p1_v1  55p3_v1  55p1_v2  55p2_v2  55p3_v2  55p4_v2  40p1_v1  55p5_v2  40p2_v1  40p4_v1  40p1_v2  40p2_v2  40p3_v2  40p4_v2  25p1_v1  25p1_v2  25p2_v2  25p3_v2  25p4_v2  25p5_v2  25p6_v2  10p1_v1  10p3_v1  10p1_v2  10p2_v2  10p3_v2  10p4_v2  10p5_v2  0p1_v1  0p4_v1  0p2_v1  0p1_v2  0p2_v2  0p3_v2  0p4_v2  0p_90m1_v1  0p_90m2_v1  0p_90m3_v1  0p_90m4_v1  0m_1  4m_1  8m_1  32m_1  16m_1  1h_1  2h_1  4h_1  8h_1  16h_1  40h_1  40h_NoOx_1  0m_2  4m_2  8m_2  16m_2  32m_2  1h_2  2h_2  4h_2  8h_2  16h_2  40h_2  40h_NoOx_2  0m_3  4m_3  8m_3  16m_3  32m_3  2h_3  1h_3  4h_3  8h_3  16h_3  40h_3  40h_NoOx_3  0m_4  4m_4  16m_4  8m_4  32m_4  1h_4  4h_4  2h_4  8h_4  16h_4  40h_NoOx_4  40h_4  8h_p2  8h_p1  8h_p3  8h_p4  8h_p5  8h_p6  8h_p7  8h_p8  8h_p9
Now plotting logo plot.

In [19]:
# Non-template nucleotides:
plot_obj.plot_non_temp(end='5p', plot_name='_5p-non-template_logo', \
                       seq_len_percentile=99, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_logo', \
                       seq_len_percentile=99.9, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_3p-cover_logo', \
                       seq_len_percentile=99.9, _3p_cover=True, n_jobs=4)


Now collecting data for sample:  100p1_v1  100p3_v2  0m_p1_v2  0m_p2_v2  0m_p3_v2  0m_p4_v2  85p1_v1  85p2_v1  85p4_v1  85p1_v2  85p3_v2  85p2_v2  85p4_v2  70p1_v1  70p1_v2  70p2_v2  70p3_v2  70p4_v2  70p5_v2  55p1_v1  55p3_v1  55p1_v2  55p3_v2  55p2_v2  55p4_v2  55p5_v2  40p1_v1  40p2_v1  40p4_v1  40p3_v2  40p2_v2  40p1_v2  40p4_v2  25p1_v1  25p1_v2  25p2_v2  25p3_v2  25p4_v2  25p5_v2  25p6_v2  10p1_v1  10p3_v1  10p1_v2  10p2_v2  10p3_v2  10p4_v2  10p5_v2  0p1_v1  0p2_v1  0p4_v1  0p2_v2  0p1_v2  0p3_v2  0p4_v2  0p_90m1_v1  0p_90m2_v1  0p_90m3_v1  0p_90m4_v1  0m_1  4m_1  8m_1  16m_1  32m_1  2h_1  1h_1  4h_1  8h_1  40h_1  16h_1  40h_NoOx_1  0m_2  4m_2  8m_2  16m_2  32m_2  1h_2  2h_2  4h_2  8h_2  16h_2  40h_2  40h_NoOx_2  0m_3  16m_3  4m_3  8m_3  32m_3  1h_3  2h_3  4h_3  8h_3  16h_3  40h_3  40h_NoOx_3  0m_4  4m_4  16m_4  8m_4  32m_4  1h_4  4h_4  2h_4  8h_4  16h_4  40h_4  40h_NoOx_4  8h_p1  8h_p3  8h_p2  8h_p4  8h_p5  8h_p6  8h_p7  8h_p8  8h_p9
Now plotting logo plot.
Now collecting data

In [20]:
# Plot of abundance and charge correlation:
plot_obj.plot_abundance_corr(sample_pairs=[['100p', '100p', '85p', '85p', \
                                            '40p', '0m',  '40h'], \
                                           ['85p',  '0p',   '40p', '0p', \
                                            '0p',  '40h', '40h_NoOx']], \
                             sample_pairs_col='sample_name', \
                             plot_type='transcript', plot_name='tr_abundance_corr', \
                             min_obs=500, charge_plot=False, log=True)

plot_obj.plot_abundance_corr(sample_pairs=[['100p', '100p', '85p', '85p', \
                                            '40p', '0m',  '40h'], \
                                           ['85p',  '0p',   '40p', '0p', \
                                            '0p',  '40h', '40h_NoOx']], \
                             sample_pairs_col='sample_name', \
                             plot_type='codon', plot_name='codon_abundance_corr', \
                             min_obs=500, charge_plot=False, log=True)

plot_obj.plot_abundance_corr(sample_unique_pairs=[['100p1_v1', '85p1_v1', '55p1_v1', \
                                                   '40p1_v1', '0p1_v1', '40h_1', '40h_NoOx_1'], \
                                                  ['100p3_v2', '85p4_v1', '55p3_v2', \
                                                   '40p2_v2', '0p2_v2', '40h_2', '40h_NoOx_2']], \
                             plot_type='codon', plot_name='codon_charge_corr', \
                             min_obs=500, charge_plot=True, one2one_corr=True)


Now plotting sample pairs:  (100p - 85p)  (100p - 0p)  (85p - 40p)  (85p - 0p)  (40p - 0p)  (0m - 40h)  (40h - 40h_NoOx)
Now plotting sample pairs:  (100p - 85p)  (100p - 0p)  (85p - 40p)  (85p - 0p)  (40p - 0p)  (0m - 40h)  (40h - 40h_NoOx)
Now plotting sample pairs:  (100p1_v1 - 100p3_v2)  (85p1_v1 - 85p4_v1)  (55p1_v1 - 55p3_v2)  (40p1_v1 - 40p2_v2)  (0p1_v1 - 0p2_v2)  (40h_1 - 40h_2)  (40h_NoOx_1 - 40h_NoOx_2)

In [9]:
### Perform transcript mutation analysis ###
TM_obj = TM_analysis(dir_dict, sample_df, tRNA_database, pull_default=False, \
                     common_seqs=common_seqs, ignore_common_count=False, \
                     overwrite_dir=False)

Using common sequences...


In [10]:
# Find mutations and save them:
if False:
    TM_obj.find_muts(n_jobs=8, unique_anno=True)
    TM_obj.pickle_muts_write(pickle_name='saved_muts_unique-anno.pickle')
else:
    TM_obj.pickle_muts_read(pickle_name='saved_muts_unique-anno.pickle')

Collecting stats from:  100p1_v1  0m_p2_v2  100p3_v2  0m_p1_v2  85p1_v1  0m_p4_v2  0m_p3_v2  85p2_v1  70p1_v1  85p2_v2  70p1_v2  85p1_v2  85p4_v1  70p2_v2  85p4_v2  85p3_v2  70p5_v2  70p4_v2  70p3_v2  55p3_v1  55p1_v2  55p1_v1  55p2_v2  55p3_v2  55p4_v2  40p2_v1  55p5_v2  40p1_v1  40p4_v1  40p1_v2  40p4_v2  40p2_v2  25p1_v1  40p3_v2  25p1_v2  25p2_v2  25p5_v2  25p3_v2  25p6_v2  10p1_v1  25p4_v2  10p1_v2  10p3_v1  10p4_v2  0p1_v1  10p2_v2  10p3_v2  10p5_v2  0p2_v1  0p4_v1  0p1_v2  0p3_v2  0p2_v2  0p4_v2  0p_90m3_v1  0p_90m1_v1  0m_1  0p_90m4_v1  0p_90m2_v1  4m_1  8m_1  32m_1  16m_1  2h_1  8h_1  4h_1  1h_1  16h_1  40h_1  40h_NoOx_1  4m_2  0m_2  8m_2  32m_2  16m_2  1h_2  2h_2  4h_2  8h_2  16h_2  0m_3  8m_3  40h_2  40h_NoOx_2  4m_3  16m_3  32m_3  1h_3  2h_3  4h_3  8h_3  16h_3  40h_3  40h_NoOx_3  0m_4  4m_4  32m_4  8m_4  16m_4  1h_4  2h_4  16h_4  4h_4  8h_4  40h_4  40h_NoOx_4  8h_p1  8h_p2  8h_p4  8h_p3  8h_p6  8h_p5  8h_p7  8h_p8  8h_p9

In [11]:
# Plot mutation/gap/RT stops for charge titration samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='charge-tit_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['100p', '100p', '85p', '85p', '40p'], \
                                                 ['85p',  '0p',   '40p', '0p',  '0p']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='charge-tit_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['100p', '100p', '85p', '85p', '40p'], \
                                                 ['85p',  '0p',   '40p', '0p',  '0p']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='charge-tit_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['100p', '100p', '85p', '85p', '40p'], \
                                                 ['85p',  '0p',   '40p', '0p',  '0p']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

In [12]:
# Plot mutation/gap/RT stops for charge titration samples:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='hl_mut_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='mut', \
                                   min_count_show=400, \
                                   sample_pairs=[['0m', '0m', '0m',  '4h',  '40h',], \
                                                 ['1h', '4h', '40h', '16h', '40h_NoOx']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='hl_gap_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='gap', \
                                   min_count_show=400, \
                                   sample_pairs=[['0m', '0m', '0m',  '4h',  '40h',], \
                                                 ['1h', '4h', '40h', '16h', '40h_NoOx']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='hl_RTstops_matrix_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False,
                                   data_type='RTstops', \
                                   min_count_show=400, \
                                   sample_pairs=[['0m', '0m', '0m',  '4h',  '40h',], \
                                                 ['1h', '4h', '40h', '16h', '40h_NoOx']], \
                                   sample_pairs_col='sample_name', \
                                   freq_avg_weighted=False, \
                                   topN=20, topN_select='max_diff')