In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, shutil, bz2, copy
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', 50)
import numpy as np

### Plotting imports ###
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.colors as mcolors
import matplotlib as mpl
from matplotlib.patches import StepPatch
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec
import logomaker as lm
palette = list(mcolors.TABLEAU_COLORS.keys())
sns.set_theme(style="ticks", palette="muted")
sns.set_context("talk")
%matplotlib inline

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
# Navigate back to NBdir in case of re-running a code block:
if not 'NBdir' in globals():
    NBdir = os.getcwd()
print('Notebook is in: {}'.format(NBdir))
os.chdir(NBdir)  # If you changed the current working dir, this will take you back to the notebook dir.

# Define the path to the repo folder.
# Change if necessary.
homedir = '/'.join(NBdir.split('/')[0:-2])
print('Repo is in: {}'.format(homedir))
sys.path.insert(1, homedir)
from src.misc import index_to_sample_df, downsample_raw_input, read_tRNAdb_info, sample_df_to_dict
from src.read_processing import AR_merge, BC_split, Kmer_analysis, BC_analysis, UMI_trim
from src.alignment import SWIPE_align
from src.stats_collection import STATS_collection
from src.plotting import TRNA_plot
from src.transcript_mutations import TM_analysis

# These are default folder names for data and raw fastq files
# relative to the folder in which this notebook is in:
data_dir = 'data'
seq_dir = 'raw_fastq'
seq_dir_noDS = seq_dir # Not downsampled

# These folder names are used in subsequent processing steps
# to dump data. Best to not change:
AdapterRemoval_dir = 'AdapterRemoval'
BC_dir = 'BC_split'
UMI_dir = 'UMI_trimmed'
align_dir = 'SWalign'
stats_dir = 'stats_collection'
TM_dir = 'transcript_mutations'
plotting_dir = 'plotting'
tRNA_database = dict()
tRNA_database['human'] = '{}/tRNA_database/human/hg38-tRNAs.fa'.format(homedir)
tRNA_database['mouse'] = '{}/tRNA_database/mouse/mm10-tRNAs.fa'.format(homedir)
tRNA_database_masked = dict()
tRNA_database_masked['human'] = '{}/tRNA_database_masked/human/human-tRNAs.fa'.format(homedir)
tRNA_database_masked['mouse'] = '{}/tRNA_database_masked/mouse/mm10-tRNAs.fa'.format(homedir)
# Read information (length, codon etc) of tRNAs into dictionary:
tRNA_data = read_tRNAdb_info(tRNA_database)
SWIPE_score_mat = '{}/utils/nuc_score-matrix.txt'.format(homedir)
SWIPE_score_mat2 = '{}/utils/nuc_score-matrix_2.txt'.format(homedir) # For masked reference sequences
# tRNA sequencing yields many duplicated reads.
# Adding these commonly seen sequences to a list prevents duplicated alignment:
common_seqs = '{}/utils/common-seqs.fasta.bz2'.format(homedir)

# Define minimum read length based on minimum insert size:
MIN_INSERT_LEN = 10
UMI_LEN = 10
BC_MAX_LEN = 5
MIN_READ_LEN = MIN_INSERT_LEN + UMI_LEN + BC_MAX_LEN
print('Using minimum read length: {} (after merge)'.format(MIN_READ_LEN))

# Read index information:
index_list_fnam = 'index_list.xlsx'
index_df = pd.read_excel('{}/utils/{}'.format(homedir, index_list_fnam))

Notebook is in: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen
Repo is in: /home/sulab/tRNA-charge-seq
Using minimum read length: 25 (after merge)


### Settings

In [3]:
sample_list_fnam = 'sample_list.xlsx'
sample_df = pd.read_excel('{}/{}'.format(NBdir, sample_list_fnam))
# Add barcode sequences:
sample_df = index_to_sample_df(sample_df, index_df)
# Read elementary info (replicate, barcode, species)
# for each unique sample name into a dictionary:
sample_dict = sample_df_to_dict(sample_df)
# Get filenames from the sample information:
inp_file_df = sample_df[['fastq_mate1_filename', 'fastq_mate2_filename', 'P5_index', 'P7_index', 'P5_index_seq', 'P7_index_seq']].copy().drop_duplicates().reset_index(drop=True)

# Downsample:
if False:
    sample_df, inp_file_df, seq_dir = downsample_raw_input(sample_df, inp_file_df, NBdir, data_dir, seq_dir_noDS, downsample_absolute=1e4)

# Make a dictionary with paths used for data processing:
dir_dict = dict(NBdir = NBdir,
                data_dir = data_dir,
                seq_dir = seq_dir,
                AdapterRemoval_dir = AdapterRemoval_dir,
                BC_dir = BC_dir,
                UMI_dir = UMI_dir,
                align_dir = align_dir,
                stats_dir = stats_dir,
                TM_dir = TM_dir,
                plotting_dir = plotting_dir)

In [None]:
# Run AdapterRemoval:
AR_obj = AR_merge(dir_dict, inp_file_df, MIN_READ_LEN, overwrite_dir=False)
inp_file_df = AR_obj.run_parallel(n_jobs=4, overwrite=False)

In [4]:
# Split files based on barcodes:
BCsplit_obj = BC_split(dir_dict, sample_df, inp_file_df, overwrite_dir=False)
sample_df, inp_file_df = BCsplit_obj.run_parallel(n_jobs=12, load_previous=True)

In [4]:
### Generate UMI stats and write final trimmed tRNA sequences ###
# Note, the cDNA input amount is so large that it is very unlikely to sequence
# the same PCR amplified DNA twice. Therefore, this processing step does not
# attempt to merge possible UMI duplicates.
UMItrim_obj = UMI_trim(dir_dict, sample_df, overwrite_dir=False, check_input=False)
sample_df = UMItrim_obj.run_parallel(n_jobs=12, load_previous=True)
sample_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/UMI_trimmed
Loaded results from previous run... Not running UMI trimming.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,N_after_trim,N_UMI_observed,N_UMI_expected,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp
0,L-1,L,1,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l1,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,TAAGGCAT,3732885,1727046,1639602,3366648,90.188902,48.70132,3683016,494716,523821.623056,98.664063,94.4436
1,L-2,L,2,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l2,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,CGTACTCG,4737214,1714150,2726027,4440177,93.729711,61.394557,4674289,504438,524217.593246,98.671688,96.226835
2,L-1+2,L,3,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l3,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,ATGCAGTA,4910826,1292817,3318699,4611516,93.905099,71.965466,4844079,508574,524237.07061,98.640819,97.012216


In [5]:
### Align reads to database of reference tRNAs ###
align_obj = SWIPE_align(dir_dict, tRNA_database_masked, sample_df, SWIPE_score_mat2, \
                        gap_penalty=6, extension_penalty=3, min_score_align=15, \
                        overwrite_dir=False)
sample_df = align_obj.run_parallel(n_jobs=6, overwrite=False, load_previous=True)
sample_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/SWalign
Loaded results from previous run... Not running alignment.


Unnamed: 0,sample_name_unique,sample_name,replicate,fastq_mate1_filename,fastq_mate2_filename,P5_index,P7_index,barcode,species,plot_group,hue_name,hue_value,hue_order,P5_index_seq,P7_index_seq,barcode_seq,N_total,N_CC,N_CCA,N_CCA+CC,CCA+CC_percent_total,percent_CCA,N_after_trim,N_UMI_observed,N_UMI_expected,percent_seqs_after_UMI_trim,percent_UMI_obs-vs-exp,N_mapped,percent_single_annotation,percent_multiple_annotation,percent_multiple_codons,Mapping_percent
0,L-1,L,1,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l1,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,TAAGGCAT,3732885,1727046,1639602,3366648,90.188902,48.70132,3683016,494716,523821.623056,98.664063,94.4436,3419143,87.400439,12.599561,0.723953,92.83541
1,L-2,L,2,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l2,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,CGTACTCG,4737214,1714150,2726027,4440177,93.729711,61.394557,4674289,504438,524217.593246,98.671688,96.226835,4475974,88.112755,11.887245,0.785617,95.757323
2,L-1+2,L,3,2021-11-22/L_R1.fastq.bz2,2021-11-22/L_R2.fastq.bz2,D501,D701,l3,human,Pilot-exp,Treatment,No Leu,2,AGGCTATA,ATTACTCG,ATGCAGTA,4910826,1292817,3318699,4611516,93.905099,71.965466,4844079,508574,524237.07061,98.640819,97.012216,4644107,87.785488,12.214512,0.802716,95.871826


In [8]:
### Collect alignment statistics ###
stats_obj = STATS_collection(dir_dict, tRNA_data, sample_df, \
                             overwrite_dir=False)
stats_df = stats_obj.run_parallel(n_jobs=12, load_previous=False)
# The dataframe returned is the "ALL_stats_aggregate_filtered.csv"
# which is the aggregated data filtered to contain only the
# most relevant columnns and requiring the 3' must be covered
# and have no 3' non-template bases.
# The CSV file output "ALL_stats_aggregate.csv" is the data
# aggregated based on all values identical except
# readID, 5p_UMI and 3p_BC. I.e. every information, except
# the UMI seequence is maintained in the aggregated CSV.
stats_df.head(3)

Using existing folder because overwrite set to false: /home/sulab/tRNA-charge-seq/projects/tRNAseq_first-gen/data/stats_collection
Collecting stats from:  L-1  L-2  L-1+2  A-2  A-1+2  A-NaCl  Rich-1  Rich-2  Rich-1+2  Rich-NaCl  L-NaCl  A-1  2U1  U-1  0U1  Fix1  U-2  U-NaCl  U-1+2  0U2  2U2  Fix3  8U1  4U1  FT  Fix2  HVS2  HVV1  HAV2  4U2  HAV1  HVP2  8U2  HAS2  HVS1  HCV2  HVP1  HR30V1  HVV2  HCV1  HR30P1  HAS1  BVV1  HCS2  HR30V2  HCS1  HR30P2  HR30S2  BVV2  HR20V1  BVP2  BVS1  HR20P1  BVS2  HR30S1  HR40P1  HR40V2  HR20S2  HR20S1  BVP1  HR20V2  HR40S2  HR80V1  HR80P1  Tu3  HR40P2  HR80V2  HR80P2  HR40S1  HR40V1  HR20P2  Tu1  HR80S1  Li2  Tu2  Mu2  BAV1  Li1  Tu4  Mu1  Mu4  CyA  HR80S2  BAR2  BAR1  BVR1  BAV2  CyP  Li4  Mu3  BVR2  Li3

Unnamed: 0,sample_name_unique,sample_name,replicate,barcode,tRNA_annotation,tRNA_annotation_len,unique_annotation,5p_cover,align_3p_nt,codon,anticodon,amino_acid,count
0,L-1,L,1,l1,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,A,AAA,TTT,eColiLys,7291
1,L-1,L,1,l1,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,False,C,AAA,TTT,eColiLys,6288
2,L-1,L,1,l1,Escherichia_coli_str_K_12_substr_MG1655_tRNA-e...,76,True,True,A,AAA,TTT,eColiLys,3787


In [9]:
### Generate standard tRNAseq data plots ###
plot_obj = TRNA_plot(dir_dict, sample_df, overwrite_dir=True, pull_default=False)



In [10]:



plot_obj.plot_coverage(compartment='cyto', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_cyto_behrens_norm', n_jobs=8)

plot_obj.plot_coverage(compartment='mito', plot_type='behrens', y_norm=True, \
                       plot_name='cov_plot_mito_behrens_norm', n_jobs=8)

plot_obj.plot_non_temp(end='5p', plot_name='_5p-non-template_logo', \
                       seq_len_percentile=99, n_jobs=4)

plot_obj.plot_UMI_logo(n_jobs=8)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_logo', \
                       seq_len_percentile=99.9, n_jobs=4)

plot_obj.plot_non_temp(end='3p', plot_name='_3p-non-template_3p-cover_logo', \
                       seq_len_percentile=99.9, _3p_cover=True, n_jobs=4)




Now collecting data for sample:  L-1  L-2  L-1+2  L-NaCl  A-1  A-2  A-1+2  A-NaCl  U-1  Rich-1  U-1+2  Rich-1+2  Rich-2  Rich-NaCl  U-NaCl  U-2  Fix2  Fix1  FT  0U1  2U2  0U2  Fix3  4U1  8U2  HVV2  HVS2  4U2  HVS1  2U1  HVV1  8U1  HAS2  HVP2  HAS1  HAV1  HCV1  HCV2  HVP1  HAV2  HR30V1  HR30V2  HR30P1  HCS2  HCS1  HR30S1  HR30S2  BVV1  BVV2  BVP1  HR30P2  BVP2  HR20V2  BVS1  HR20S2  BVS2  HR20P1  HR40P1  HR40V2  HR20V1  HR20P2  HR40V1  HR20S1  HR40S2  HR80V2  HR40P2  HR80S2  HR80P1  HR80V1  Li1  Tu2  HR80P2  Tu3  HR40S1  Li2  Tu4  Mu2  Li3  HR80S1  Mu1  Mu4  BVR2  Tu1  BVR1  Mu3  CyP  CyA  BAV2  Li4  BAV1  BAR2  BAR1
Now plotting sample:  L-1  L-2  L-1+2  L-NaCl  A-1  A-2  A-1+2  A-NaCl  Rich-1  Rich-2  Rich-1+2  Rich-NaCl  U-1  U-2  U-1+2  U-NaCl  Fix1  Fix2  Fix3  FT  0U1  0U2  2U1  2U2  4U1  4U2  8U1  8U2  HVV1  HVV2  HVS1  HVS2  HVP1  HVP2  HAV1  HAV2  HAS1  HAS2  HCV1  HCV2  HCS1  HCS2  HR30V1  HR30V2  HR30S1  HR30S2  HR30P1  HR30P2  BVV1  BVV2  BVS1  BVS2  BVP1  BVP2  HR20V1  HR2

RuntimeError: Worker-4 died unexpectedly

In [11]:
plot_obj.plot_abundance(plot_type='codon', plot_name='codon_abundance_grp', \
                        group=True, min_obs=500)

plot_obj.plot_abundance(plot_type='aa', plot_name='aa_abundance_grp', \
                        group=True, min_obs=500)

plot_obj.plot_abundance(plot_type='codon', plot_name='codon_charge_grp', \
                        group=True, min_obs=500, charge_plot=True)

plot_obj.plot_abundance(plot_type='aa', plot_name='aa_charge_grp', \
                        group=True, min_obs=500, charge_plot=True)

plot_obj.plot_abundance_corr(sample_pairs=[['100p', '100p', '85p', '85p', '40p', '40h'], \
                                           ['85p', '0p', '40p', '0p', '0p', '40h_NoOx']], \
                             plot_type='codon', plot_name='codon_abundance_corr', \
                             min_obs=500, charge_plot=False)

plot_obj.plot_abundance_corr(sample_pairs=[['100p', '100p', '85p', '85p', '40p', '40h'], \
                                           ['85p', '0p', '40p', '0p', '0p', '40h_NoOx']], \
                             plot_type='transcript', plot_name='tr_abundance_corr', \
                             min_obs=500, charge_plot=False)

plot_obj.plot_abundance_corr(sample_unique_pairs=[['L1', 'L1', 'R1'], ['R1', 'L6', 'R6']], \
                             plot_type='codon', plot_name='codon_abundance_corr_L-R', \
                             min_obs=500, charge_plot=False)

plot_obj.plot_abundance_corr(sample_unique_pairs=[['L1', 'L1', 'R1'], ['R1', 'L6', 'R6']], \
                             plot_type='transcript', plot_name='tr_abundance_corr_L-R', \
                             min_obs=500, charge_plot=False)

plot_obj.plot_abundance_corr(sample_unique_pairs=[['100p1', '85p1', '55p1', '40p1', '0p1', '40h_1', '40h_NoOx_1'], \
                                           ['100p3', '85p4', '55p3', '40p2', '0p2', '40h_2', '40h_NoOx_2']], \
                             plot_type='codon', plot_name='codon_charge_corr', \
                             min_obs=500, charge_plot=True)


Now plotting sample/group:  Charge-titration  Acylation-half-life  Barcode test  293t-NoLeu  293t-NoArg  293t-NoGln  HCT116-Depletion
Now plotting sample/group:  Charge-titration  Acylation-half-life  Barcode test  293t-NoLeu  293t-NoArg  293t-NoGln  HCT116-Depletion
Now plotting sample/group:  Charge-titration  Acylation-half-life  Barcode test  293t-NoLeu  293t-NoArg  293t-NoGln  HCT116-Depletion
Now plotting sample/group:  Charge-titration  Acylation-half-life  Barcode test  293t-NoLeu  293t-NoArg  293t-NoGln  HCT116-Depletion
Now plotting sample pairs:  (100p - 85p)  (100p - 0p)  (85p - 40p)  (85p - 0p)  (40p - 0p)  (40h - 40h_NoOx)
Now plotting sample pairs:  (100p - 85p)  (100p - 0p)  (85p - 40p)  (85p - 0p)  (40p - 0p)  (40h - 40h_NoOx)
Now plotting sample pairs:  (L1 - R1)  (L1 - L6)  (R1 - R6)
Now plotting sample pairs:  (L1 - R1)  (L1 - L6)  (R1 - R6)
Now plotting sample pairs:  (40h_1 - 40h_2)  (40h_NoOx_1 - 40h_NoOx_2)

In [None]:
# Use the TM_analysis to find the most mutated positions in the tRNA transcripts.
# Then generate a new version of the tRNA transcripts database with these positions masked
# Then, using the masked sequences, re-run the alignment, stats collected and plotting

# Re-run using gap_open = -3 and gap_extension = -2 to reflect how gaps are more tolerated

### The masked tRNA database will only be used for alignment
# All other steps will use the old unmasked database

In [12]:
### Perform transcript mutation analysis ###
TM_obj = TM_analysis(dir_dict, sample_df, tRNA_database, pull_default=False, \
                     overwrite_dir=False)


#TM_obj.find_muts(n_jobs=12, unique_anno=True)
#TM_obj.find_muts(n_jobs=4)

#TM_obj.plot_transcript_logo()



# tr_mut_out = TM_obj.plot_transcript_mut(topN=30, no_plot_return=False, mito=False, gap_only=False, plot_name='test_heat', min_count_show=10)
#tr_mut_out = TM_obj.plot_transcript_mut(topN=30, no_plot_return=False, mito=False, gap_only=False, plot_name='test_heat2')


#tr_cov_out = TM_obj.plot_transcript_cov(topN=40, no_plot_return=False, mito=False, plot_name='test_heat_cov', sort_rows=True)
#tr_mut_out = TM_obj.plot_transcript_mut(topN=40, no_plot_return=False, mito=False, gap_only=False, plot_name='test_heat2', sort_rows=tr_cov_out[2])


#TM_obj.mask_tRNA_database(min_mut_freq=0.5, min_pos_count=50, min_tr_count=100, frac_max_score=0.90)

#tRNA_database_masked = TM_obj.write_masked_tRNA_database(out_dir='tRNA_database_masked')

In [5]:
#TM_obj.find_muts(n_jobs=10, unique_anno=True, fix_end=True, \
#                 sample_list=['100p1', '100p3', '0p1', '0p2', '0p4', \
#                              '0m_1', '40h_NoOx_1', '40h_NoOx_2', '0m_3',
#                              'L1', 'L3', 'L6', 'L8', 'L12', \
#                              'R1', 'R3', 'R6', 'R8', 'R12'])

In [13]:
#alicia_samples.remove('R4')
TM_obj.find_muts(n_jobs=10, unique_anno=False, fix_end=True)

Collecting stats from:  L-1  L-2  A-2  A-1  A-1+2  L-NaCl  A-NaCl  L-1+2  Rich-1  Rich-2  U-1+2  Rich-1+2  Fix3  U-2  Rich-NaCl  U-1  Fix1  FT  Fix2  U-NaCl  0U2  2U2  2U1  0U1  HVV1  HVS1  HVP1  4U1  4U2  8U1  HVP2  HAV1  HAV2  HVS2  8U2  HAS2  HVV2  HAS1  HCV2  HCV1  HCS2  HR30V2  HR30P2  HCS1  HR30P1  HR30V1  HR30S2  BVV1  HR30S1  BVP1  BVV2  BVS1  BVP2  BVS2  HR20S2  HR20P1  HR20V2  HR20P2  HR40P2  HR20V1  HR40V2  HR20S1  HR40S2  HR80S2  HR40V1  HR80S1  HR80V1  Li1  HR80P2  HR40S1  Tu4  Tu2  Tu3  HR40P1  Tu1  HR80V2  Li2  HR80P1  Li4  Mu1  BAV1  Mu2  Mu4  BAR2  Mu3  Li3  BVR2  BAR1  BVR1  CyP  BAV2  CyA

In [13]:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='NoLeu-NoArg_tr-mut_matrix_comp_top20-max-diff', \
                                   no_plot_return=True, \
                                   mito=False, gap_only=False, \
                                   min_count_show=1000, \
                                   sample_unique_pairs=[['L1', 'L8',  'L1', 'L6',  'R1', 'R8',  'R1', 'R6'], \
                                                        ['L6', 'L12', 'L8', 'L12', 'R6', 'R12', 'R8', 'R12']], \
                                   freq_avg_weighted=True, \
                                   topN=20, topN_select='max_diff')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='NoLeu-NoArg_tr-mut_matrix_comp_AA-Arg', \
                                   no_plot_return=True, \
                                   mito=False, gap_only=False, \
                                   min_count_show=1000, \
                                   sample_unique_pairs=[['L1', 'L8',  'L1', 'L6',  'R1', 'R8',  'R1', 'R6'], \
                                                        ['L6', 'L12', 'L8', 'L12', 'R6', 'R12', 'R8', 'R12']], \
                                   freq_avg_weighted=True, \
                                   anno_substring_compare='Arg')

TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='NoLeu-NoArg_tr-mut_matrix_comp_AA-Leu', \
                                   no_plot_return=True, \
                                   mito=False, gap_only=False, \
                                   min_count_show=1000, \
                                   sample_unique_pairs=[['L1', 'L8',  'L1', 'L6',  'R1', 'R8',  'R1', 'R6'], \
                                                        ['L6', 'L12', 'L8', 'L12', 'R6', 'R12', 'R8', 'R12']], \
                                   freq_avg_weighted=True, \
                                   anno_substring_compare='Leu')

In [14]:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='NoLeu-NoArg_tr-mut_matrix_comp_AA-Asn', \
                                   no_plot_return=True, \
                                   mito=False, gap_only=False, \
                                   min_count_show=400, \
                                   sample_unique_pairs=[['L1', 'L8',  'L1', 'L6',  'R1', 'R8',  'R1', 'R6'], \
                                                        ['L6', 'L12', 'L8', 'L12', 'R6', 'R12', 'R8', 'R12']], \
                                   freq_avg_weighted=True, \
                                   anno_substring_compare='Asn')

In [15]:
TM_obj.plot_transcript_mut_compare(species='human', \
                                   plot_name='NoLeu-NoArg_tr-mut_matrix_comp_AA-Ala', \
                                   no_plot_return=True, \
                                   mito=False, gap_only=False, \
                                   min_count_show=400, \
                                   sample_unique_pairs=[['L1', 'L8',  'L1', 'L6',  'R1', 'R8',  'R1', 'R6'], \
                                                        ['L6', 'L12', 'L8', 'L12', 'R6', 'R12', 'R8', 'R12']], \
                                   freq_avg_weighted=True, \
                                   anno_substring_compare='Ala')

In [16]:
TM_obj.plot_transcript_logo(sample_list=['100p1', '100p3', '0p1', '0p2', '0p4', \
                                         '0m_1', '40h_NoOx_1', '40h_NoOx_2', '0m_3', \
                                         'L1', 'L3', 'L6', 'R1', 'R3', 'R6'], \
                            plot_name='tr-muts_logo')

In [17]:
tr_cov_out = TM_obj.plot_transcript_cov(topN=40, no_plot_return=True, mito=False, \
                                        plot_name='tr-cov_matrix', sort_rows=True, \
                                        sample_list=['100p1', '100p3', '0p1', '0p2', '0p4', \
                                                     '0m_1', '40h_NoOx_1', '40h_NoOx_2', '0m_3', \
                                                     'L1', 'L3', 'L6', 'R1', 'R3', 'R6'])

In [18]:
tr_mut_out = TM_obj.plot_transcript_mut(topN=40, no_plot_return=True, mito=False, \
                                        gap_only=False, min_count_show=1000, \
                                        plot_name='tr-mut_matrix', \
                                        sample_list=['100p1', '100p3', '0p1', '0p2', '0p4', \
                                                     '0m_1', '40h_NoOx_1', '40h_NoOx_2', '0m_3', \
                                                     'L1', 'L3', 'L6', 'R1', 'R3', 'R6'])

In [18]:
TM_obj.write_transcript_mut(list(TM_obj.tr_muts.keys()), \
                            csv_name='first-gen_tRNAseq_mut-matrix')

In [19]:
TM_obj.write_transcript_mut(list(TM_obj.tr_muts.keys()), \
                            csv_name='first-gen_tRNAseq_mut-matrix_left-aligned')