In [None]:
#Specific Modification Analysis of m62A and m1acp3psu
import read_raw_current_class as rrc
from pathlib import Path
import pod5
import remora
from remora import io, refine_signal_map, util
import numpy as np
import argparse
import pysam
from tqdm import tqdm
import math
from itertools import repeat
import json
import re
import polars as pl
from multiprocessing import Process, Queue
import matplotlib.pyplot as plt
import os
import pandas as pd
import h5py



def plot_reference_coordinate(
    pod5_dr,
    df,
    bam_fh,
    kmer_table,
    sig_map_refiner,
    coordinate,
    bases_upstream,
    bases_downstream,
    ref_fragment,
    ax,
    ax2,
    color1,
    reference_path,
    condition,
    read_ids
):
    fasta_file = pysam.FastaFile(reference_path)
    reference = fasta_file.references[0]
    reference_sequence = str(fasta_file.fetch(reference))
    extracted_sequence = reference_sequence[
        coordinate - bases_downstream : coordinate + bases_upstream + 1
    ]
    record_length = 1 + bases_upstream + bases_downstream
    for fragment_id_list, fragment in zip(df["IDS"], df["Fragment"]):
        final_motif = "X" * record_length
        if fragment == ref_fragment:
            fragment_id_list = eval(fragment_id_list)
            valid_read_ids = []
            key_dict = {}
            for i in tqdm(fragment_id_list):
                key_dict[i] = i
            for i in tqdm(read_ids):
                try:
                    valid_read_ids.append(key_dict[i])
                except KeyError:
                    continue
            mean_values = [[] for i in range(record_length)]
            mean_values_dwell = [[] for i in range(record_length)]
            for iteration, single_id in tqdm(
                enumerate(valid_read_ids), total=len(valid_read_ids)
            ):
                if condition == "IVT 18S" and iteration >= 20000:
                    break
                dataset = rrc.RawCurrentReadDataset(
                    id=single_id,
                    pod5_dr=pod5_dr,
                    bam_fh=bam_fh,
                    kmer_table=kmer_table,
                    sig_map_refiner=sig_map_refiner,
                )
                try:
                    (
                        ref_motifs,
                        ref_signals,
                        ref_mean_signals,
                        ref_trimmean_signals,
                        ref_dwell_signals,
                    ) = dataset.extract_signal_reference_coordinates(coordinate, 10, 10)
                    x = [i for i in range(len(ref_trimmean_signals))]  # "GCAATAACAGGTCTGT"
                    if len(ref_motifs) == record_length:
                        final_motif = ref_motifs
                        #ax.plot([i for i in range(record_length)],ref_mean_signals,marker='o',alpha=0.1,linewidth=0.1,color=color1)
                        #ax2.scatter([i for i in range(record_length)],ref_dwell_signals,marker='o',alpha=0.1,color=color1)
                        for i, signal in enumerate(ref_trimmean_signals):
                            mean_values[i].append(signal)
                        for j ,dwell in enumerate(ref_dwell_signals):
                            mean_values_dwell[j].append(dwell)
                except IndexError as e:
                    #print(e)
                    continue
                except TypeError as e:
                    #print(e)
                    continue
    mean_of_mean_values = []
    std_of_mean_values = []
    mean_of_mean_dwell_time = []
    std_of_mean_dwell_time = []
    if mean_values[0] != []:
        temp_std_lower_list = []
        temp_std_upper_list = []
        for k in mean_values:
            i = [float(element) for element in k if str(element) != "nan"]
            temp_mean = np.mean(i)
            temp_std = np.std(i)
            upper = [q for q in i if q >= temp_mean]
            lower = [q for q in i if q <= temp_mean]
            temp_std_upper = np.std(np.array([m for m in upper]))
            temp_std_lower = np.std(np.array([m for m in lower]))
            mean_of_mean_values.append(temp_mean)
            temp_std_upper_list.append(temp_std_upper)
            temp_std_lower_list.append(temp_std_lower)
        std_of_mean_values = [temp_std_lower_list,temp_std_upper_list]
        temp_std_dwell_lower_list = []
        temp_std_dwell_upper_list = []
        for m in mean_values_dwell:
            j = [float(element) for element in m if str(element) != "nan"]
            temp_mean_dwell = np.mean(j)
            temp_std_dwell = np.std(j)
            upper_dwell = [q for q in j if q >= temp_mean_dwell]
            lower_dwell = [q for q in j if q <= temp_mean_dwell]
            temp_std_dwell_upper = np.std(np.array([m for m in upper_dwell]))
            temp_std_dwell_lower = np.std(np.array([m for m in lower_dwell]))
            mean_of_mean_dwell_time.append(temp_mean_dwell)
            temp_std_dwell_upper_list.append(temp_std_dwell_upper)
            temp_std_dwell_lower_list.append(temp_std_dwell_lower)
        std_of_mean_dwell_time = [temp_std_dwell_lower_list,temp_std_dwell_upper_list]
        x = [i for i in range(len(mean_of_mean_values))]
        ax.errorbar(
            x,
            mean_of_mean_values,
            yerr=std_of_mean_values,
            color=color1,
            marker="o",
            linestyle=None,
            lw = 2,
            alpha=0.4,
            label=condition
        )
        y = [i for i in range(len(mean_of_mean_dwell_time))]
        ax2.errorbar(
            y,
            mean_of_mean_dwell_time,
            yerr=std_of_mean_dwell_time,
            color=color1,
            marker="o",
            linestyle=None,
            alpha=0.4,
            lw = 2,
            label = condition
        )
        fasta_file = pysam.FastaFile(reference_path)
        reference = fasta_file.references[0]
        reference_sequence = str(fasta_file.fetch(reference))
        extracted_sequence = reference_sequence[
        coordinate - bases_downstream : coordinate + bases_upstream + 1
        ]
        #ax.set_title(f"{condition}", fontsize = 12)
        ax.set_xticks([i for i in range(record_length)])
        ax.set_xticklabels([i for i in extracted_sequence])
        ax.set_xlabel("reference sequence")
        ax.set_ylabel("z-normalized signal")
        ax.set_ylim(-3, 3)
        ax.legend()
        ax2.set_xticks([i for i in range(record_length)])
        ax2.set_xticklabels([i for i in range(coordinate-bases_downstream,coordinate+bases_downstream+1)])
        ax2.set_xlabel("reference position")
        ax2.set_ylabel("dwell time")
        ax2.set_ylim(0,400)
        ax2.legend(handles = [])
    return final_motif, extracted_sequence

def make_table_from_reference_coordinate(
    pod5_dr,
    df,
    bam_fh,
    kmer_table,
    sig_map_refiner,
    coordinate,
    bases_upstream,
    bases_downstream,
    ref_fragment,
    reference_path,
    condition,
    read_ids,
    mean_signal_df,
    dwell_time_df
):
    fasta_file = pysam.FastaFile(reference_path)
    reference = fasta_file.references[0]
    reference_sequence = str(fasta_file.fetch(reference))
    extracted_sequence = reference_sequence[
        coordinate - bases_downstream : coordinate + bases_upstream + 1
    ]
    record_length = 1 + bases_upstream + bases_downstream
    columns = [f"{i+1}" for i in range(record_length)]
    columns.append("condition")

    #temp_dwell_time_df = pl.DataFrame(schema={col: pl.Float64 if col != "condition" else pl.Utf8 for col in columns})
    temp_dwell_time_df_list = []
    #temp_signal_df = pl.DataFrame(schema={col: pl.Float64 if col != "condition" else pl.Utf8 for col in columns})
    temp_signal_df_list = []
    for fragment_id_list, fragment in zip(df["IDS"], df["Fragment"]):
        if fragment == ref_fragment:
            fragment_id_list = eval(fragment_id_list)
            valid_read_ids = []
            key_dict = {}
            for i in tqdm(fragment_id_list):
                key_dict[i] = i
            for i in tqdm(read_ids):
                try:
                    valid_read_ids.append(key_dict[i])
                except KeyError:
                    continue
            for iteration, single_id in tqdm(
                enumerate(valid_read_ids), total=len(valid_read_ids)
            ):
                if condition == "IVT" and iteration >= 20000:
                    break
                dataset = rrc.RawCurrentReadDataset(
                    id=single_id,
                    pod5_dr=pod5_dr,
                    bam_fh=bam_fh,
                    kmer_table=kmer_table,
                    sig_map_refiner=sig_map_refiner,
                )
                try:
                    (
                        ref_motifs,
                        ref_signals,
                        ref_mean_signals,
                        ref_trimmean_signals,
                        ref_dwell_signals,
                    ) = dataset.extract_signal_reference_coordinates(coordinate, 10, 10)
                    if len(ref_motifs) == record_length:
                        #print(ref_dwell_signals)
                        new_ref_dwell_signals = list(ref_dwell_signals)
                        new_ref_dwell_signals.append(condition)
                        #new_dwell_time_row = pl.DataFrame([[float(i)] if column != "condition" else [str(i)] for i,column in zip(new_ref_dwell_signals,columns)],schema=columns)
                        temp_dwell_time_df_list.append(new_ref_dwell_signals)
                        new_ref_trimmean_signals = list(ref_trimmean_signals)
                        new_ref_trimmean_signals.append(condition)
                        #print(ref_trimmean_signals)
                        #new_trimmean_signals_row = pl.DataFrame([[float(i)] if column != "condition" else [str(i)] for i,column in zip(new_ref_trimmean_signals,columns)],schema=columns)
                        #print(new_trimmean_signals_row)
                        temp_signal_df_list.append(new_ref_trimmean_signals)
                except IndexError:
                    continue
                except TypeError:
                    #print(new_ref_dwell_signals)
                    continue
    temp_dwell_time_df = pl.DataFrame(temp_dwell_time_df_list,schema={col: pl.Float64 if col != "condition" else pl.Utf8 for col in columns})
    temp_signal_df = pl.DataFrame(temp_signal_df_list,schema={col: pl.Float64 if col != "condition" else pl.Utf8 for col in columns})
    if dwell_time_df is not None:
        dwell_time_df = dwell_time_df.vstack(temp_dwell_time_df)
    else:
        dwell_time_df = temp_dwell_time_df
    if mean_signal_df is not None:
        mean_signal_df = mean_signal_df.vstack(temp_signal_df)
    else:
        mean_signal_df = temp_signal_df
    return mean_signal_df,dwell_time_df 
    

In [None]:
fig, (ax1,ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
ref_fragment = "18S"
coordinate = 4901
bases_upstream = 5
bases_downstream = 5
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
name_modification = "m1acp3psU"
#fig.suptitle(f"{name_modification}", fontsize=16)

condition = "IVPA Cytoplasm"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table, do_rough_rescale=True, scale_iters=1, do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"navy",reference_path,condition,read_ids)



condition = "IVPA Nucleus"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"darkred",reference_path,condition,read_ids)


condition = "NP Cytoplasm"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"royalblue",reference_path,condition,read_ids)


condition = "NP Nucleus"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/Synology/Data_nano_ribolyzer/Synology/Data_nano_ribolyzer//Synology/Data_nano_ribolyzer//kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"indianred",reference_path,condition,read_ids)



condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"green",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

In [None]:
fig, (ax1,ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
ref_fragment = "18S"
coordinate = 5503
bases_upstream = 5
bases_downstream = 5
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
name_modification = "m62A"
fig.suptitle(f"{name_modification}", fontsize=16)

condition = "IVPA Cytoplasm"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table, do_rough_rescale=True, scale_iters=1, do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"navy",reference_path,condition,read_ids)



condition = "IVPA Nucleus"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"darkred",reference_path,condition,read_ids)


condition = "NP Cytoplasm"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"royalblue",reference_path,condition,read_ids)


condition = "NP Nucleus"
pod5_dr = pod5.DatasetReader("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"indianred",reference_path,condition,read_ids)



condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"green",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "NP Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()


In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "NP Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "IVPA Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    #"seagreen",
    "deepskyblue",
    "darkolivegreen",
    #"saddlebrown",
    "gray",
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "IVPA Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]


# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5504
name_modification = "m62A"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "DIMT1L KO 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/DIMT1L_KO/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]



# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5292
name_modification = "m7G"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "WBSCR KO 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/WBSCR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/WBSCR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/WBSCR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]



# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5431
name_modification = "unknown"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")

pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]


# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 4901
name_modification = "m1acp3psU"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "TSR KO 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/TSR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
print(read_ids)
df = pl.read_csv("~/directRNA_004/TSR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/TSR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
print(bam_fh)
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]

# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 3714
name_modification = "G3714"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5504
name_modification = "m62A"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "DIMT1L KO 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/DIMT1L_KO/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg.svg",format="svg")



In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
   "#4D2A7C",
   "#DB7093",
   "#2E8B58",
   "#282A74"
]

# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cyt 18S #8A181A
# IVPA Nuc 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 4901
name_modification = "m1acp3psU"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
   "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "TSR KO 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/TSR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
print(read_ids)
df = pl.read_csv("~/directRNA_004/TSR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/TSR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
print(bam_fh)
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_LSR_KO_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")



colors = [
   "#4D2A7C",
   "#DB7093",
   "#2E8B58",
   "#282A74",
   
]



In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5292
name_modification = "m7G"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 5
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "WBSCR KO 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/WBSCR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/WBSCR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/WBSCR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)


plt.show()
fig.savefig(f"{name_modification}_comparison_WBSCR_KO_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5431
name_modification = "unknown"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261e"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg.svg",format="svg")

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(7, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4D2A7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 3714
name_modification = "G3714"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 5
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg.svg",format="svg")

Store tables for all conditions and all positions separately

In [None]:

coordinate = 5504
name_modification = "m62A"
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
colors = [
    "#4D2A7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]
#ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]

# condition = "Nucleus_R1"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)



# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Nucleus_R2"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Nuc1_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)



# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)
    
    
    
# condition = "Nucleus_R3"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Nuc2_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)


# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)





# ref_fragments = "18S"
# color = "#8A181A"

# condition = "Cytoplasm_R1"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Cytoplasm_R2"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Cyt1_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Cytoplasm_R3"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Cyt2_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



ref_fragment = "18S"
condition = "IVT"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)


ref_fragment = "18S"
condition = "DIMT1L_KO_18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/DIMT1L_KO/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/directRNA_004/DIMT1L_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)


In [None]:
coordinate = 4901
name_modification = "m1acp3psU"
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"

ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4D2A7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# condition = "Nucleus_R1"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)



# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Nucleus_R2"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Nuc1_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)



# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)
    
    
    
# condition = "Nucleus_R3"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Nuc2_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)


# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)




# ref_fragments = "18S"
# color = "#8A181A"

# condition = "Cytoplasm_R1"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Cytoplasm_R2"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Cyt1_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Cytoplasm_R3"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Cyt2_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



ref_fragment = "18S"
condition = "IVT"
pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)


ref_fragment = "18S"
condition = "TSR3_KO_18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/TSR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
print(read_ids)
df = pl.read_csv("~/directRNA_004/TSR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/TSR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
print(bam_fh)
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)


In [None]:
coordinate = 5292
name_modification = "m7G"
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"

# ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]

# condition = "Nucleus_R1"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)



# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Nucleus_R2"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Nuc1_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Nuc1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)



# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)
    
    
    
# condition = "Nucleus_R3"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Nuc2_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Nuc2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)


# for ref_fragment, color in zip(ref_fragments, colors):
#     mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
#     mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
#     dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)




# ref_fragments = "18S"
# color = "#8A181A"

# condition = "Cytoplasm_R1"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Cytoplasm_R2"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Cyt1_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Cyt1_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)

# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# condition = "Cytoplasm_R3"
# name_condition = condition
# condition_label = condition.replace("_", " ")


# pod5_dr = pod5.DatasetReader(
#     "~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered.pod5"
# )
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/20250416_Cyt2_dRNA/template_based_analysis/template_fragment_df.csv", separator=";")
# reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
# bam_fh = io.ReadIndexedBam("~/directRNA_004/20250416_Cyt2_dRNA/filtered_pod5/filtered_pod5_basecalled.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)



# ref_fragment = "18S"
# condition = "IVT"
# pod5_dr = pod5.DatasetReader("~/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
# read_ids = list(pod5_dr.read_ids)
# df = pl.read_csv("~/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# # print(df)
# bam_fh = io.ReadIndexedBam("~/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
# kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
# sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
# mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
# mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
# dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)


ref_fragment = "18S"
condition = "WBSCR22_KO_18S"
pod5_dr = pod5.DatasetReader("~/directRNA_004/WBSCR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/directRNA_004/WBSCR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/directRNA_004/WBSCR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
mean_signal_df,dwell_time_df = make_table_from_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,reference_path,condition,read_ids,None,None)
mean_signal_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_mean_signal.csv",separator = ";", include_header=True)
dwell_time_df.write_csv(f"./{name_modification}_{ref_fragment}_{condition}_raw_current_dwell_time.csv",separator = ";", include_header=True)

In [None]:
def asymmetric_deviation(col: pl.Series):
    median = col.median()
    above = col.filter(col > median)
    below = col.filter(col < median)
    pos_dev = above.mean() - median if len(above) > 0 else 0.0
    neg_dev = median - below.mean() if len(below) > 0 else 0.0
    return [pos_dev, neg_dev]

# Summarize multiple datasets
def summarize_datasets(datasets_to_summarize: list, final_condition: str, fig, color: str):
    final_df_mean = pd.DataFrame()
    final_df_std_lower = pd.DataFrame()
    final_df_std_upper = pd.DataFrame()

    for dataset in datasets_to_summarize:
        df = pl.read_csv(dataset, separator=";").drop_nans()
        conditions = df["condition"].unique().to_list()
        # Group by condition and compute mean
        df_grouped_mean = df.group_by("condition").agg(pl.all().exclude("condition").mean()).drop("condition")
        final_df_mean = pd.concat([final_df_mean, df_grouped_mean.to_pandas()], axis=0)
        # Compute asymmetric deviations per condition
        std_lower_rows = []
        std_upper_rows = []

        for condition in conditions:
            subset = df.drop("condition")
            row_upper = {}
            row_lower = {}
            for col in subset.columns:
                pos_dev, neg_dev = asymmetric_deviation(subset[col])
                row_upper[col] = pos_dev
                row_lower[col] = neg_dev
            #row_upper["condition"] = condition
            #row_lower["condition"] = condition
            std_upper_rows.append(row_upper)
            std_lower_rows.append(row_lower)

        # Convert to DataFrames and accumulate
        df_upper = pd.DataFrame(std_upper_rows)
        df_lower = pd.DataFrame(std_lower_rows)

        final_df_std_upper = pd.concat([final_df_std_upper, df_upper], axis=0)
        final_df_std_lower = pd.concat([final_df_std_lower, df_lower], axis=0)

    
    final_df_mean["condition"] = [final_condition for i in range(final_df_mean.shape[0])]
    final_df_std_lower["condition"] = [final_condition for i in range(final_df_std_lower.shape[0])] 
    final_df_std_upper["condition"] = [final_condition for i in range(final_df_std_upper.shape[0])]   
    if final_df_mean.shape[0] > 1:
        final_df_mean = final_df_mean.groupby("condition").mean()
    if final_df_std_lower.shape[0] > 1:
        final_df_std_lower = final_df_std_lower.groupby("condition").mean() 
    if final_df_std_upper.shape[0] > 1:
        final_df_std_upper = final_df_std_upper.groupby("condition").mean()
    # print("Mean:\n", final_df_mean)
    # print("Asymmetric Std Lower:\n", final_df_std_lower)
    # print("Asymmetric Std Upper:\n", final_df_std_upper)
    summarized_mean = list(final_df_mean.iloc[0])[:-1]
    summarized_std_lower = list(final_df_std_lower.iloc[0])[:-1]
    summarized_std_upper = list(final_df_std_upper.iloc[0])[:-1]
    # print(summarized_mean)
    # print(summarized_std_lower)
    # print(summarized_std_upper)
    fig.errorbar(
            [i+1 for i in range(len(summarized_mean))],
            summarized_mean,
            yerr=[summarized_std_lower,summarized_std_upper],
            color=color,
            marker="o",
            linestyle=None,
            lw = 1,
            alpha=0.4,
            label=final_condition
        )

        
       
        

In [None]:
fig1,ax = plt.subplots(nrows=1,ncols=1,figsize=(7, 7))
  
 
m1acp3psU_21S_Nucleus = summarize_datasets(["m1acp3psU_21S_Nucleus_R1_raw_current_mean_signal.csv",
"m1acp3psU_21S_Nucleus_R2_raw_current_mean_signal.csv",
"m1acp3psU_21S_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 21S",
ax,
"#4D2A7C"
)


m1acp3psU_21S_C_Nucleus = summarize_datasets(["m1acp3psU_21S-C_Nucleus_R1_raw_current_mean_signal.csv",
"m1acp3psU_21S-C_Nucleus_R2_raw_current_mean_signal.csv",
"m1acp3psU_21S-C_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 21S-C",
ax,
"#DB7093"
)


m1acp3psU_18S_E_Nucleus = summarize_datasets(["m1acp3psU_18S-E_Nucleus_R1_raw_current_mean_signal.csv", 
"m1acp3psU_18S-E_Nucleus_R2_raw_current_mean_signal.csv",
"m1acp3psU_18S-E_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 18S-E",
ax,
"#41BBEC"
)


# m1acp3psU_18S_Nucleus = summarize_datasets(["m1acp3psU_18S_Nucleus_R1_raw_current_mean_signal.csv", 
# "m1acp3psU_18S_Nucleus_R2_raw_current_mean_signal.csv",
# "m1acp3psU_18S_Nucleus_R3_raw_current_mean_signal.csv"],
# "Nucleus 18S",
# ax,
# "#2C2F72"
# )


m1acp3psU_18S_Cytoplasm = summarize_datasets(["m1acp3psU_18S_Cytoplasm_R1_raw_current_mean_signal.csv",
"m1acp3psU_18S_Cytoplasm_R2_raw_current_mean_signal.csv",
"m1acp3psU_18S_Cytoplasm_R3_raw_current_mean_signal.csv"], 
"Cytoplasm 18S", 
ax,
"#8A181B"
)
   
m1acp3psU_18S_IVT = summarize_datasets(["m1acp3psU_18S_IVT_raw_current_mean_signal.csv"],
                                       "IVT 18S",
                                       ax,
                                       "#FED728"
                                       )

m1acp3psU_18S_TSR3_KO = summarize_datasets(["m1acp3psU_18S_TSR3_KO_18S_raw_current_mean_signal.csv"], 
                                           "TSR3 Mut", 
                                           ax,
                                           "#020202"
                                           )

coordinate = 4901
name_modification = "m1acp3psU"
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
fasta_file = pysam.FastaFile(reference_path)
reference = fasta_file.references[0]
reference_sequence = str(fasta_file.fetch(reference))
extracted_sequence = reference_sequence[
coordinate - bases_downstream : coordinate + bases_upstream + 1]
ax.set_xticks([i+1 for i in range(21)])
ax.set_xticklabels([i for i in extracted_sequence])
ax.set_xlabel("reference sequence")
ax.set_ylabel("z-normalized signal")
ax.set_ylim(-3, 3)
ax.legend()


In [None]:
fig1,ax = plt.subplots(nrows=1,ncols=1,figsize=(7, 7))
  
 
m7G_21S_Nucleus = summarize_datasets(["m7G_21S_Nucleus_R1_raw_current_mean_signal.csv",
"m7G_21S_Nucleus_R2_raw_current_mean_signal.csv",
"m7G_21S_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 21S",
ax,
"#4D2A7C"
)


m7G_21S_C_Nucleus = summarize_datasets(["m7G_21S-C_Nucleus_R1_raw_current_mean_signal.csv",
"m7G_21S-C_Nucleus_R2_raw_current_mean_signal.csv",
"m7G_21S-C_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 21S-C",
ax,
"#DB7093"
)


m7G_18S_E_Nucleus = summarize_datasets(["m7G_18S-E_Nucleus_R1_raw_current_mean_signal.csv", 
"m7G_18S-E_Nucleus_R2_raw_current_mean_signal.csv",
"m7G_18S-E_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 18S-E",
ax,
"#41BBEC"
)


# m7G_18S_Nucleus = summarize_datasets(["m7G_18S_Nucleus_R1_raw_current_mean_signal.csv", 
# "m7G_18S_Nucleus_R2_raw_current_mean_signal.csv",
# "m7G_18S_Nucleus_R3_raw_current_mean_signal.csv"],
# "Nucleus 18S",
# ax,
# "#2C2F72"
# )


m7G_18S_Cytoplasm = summarize_datasets(["m7G_18S_Cytoplasm_R1_raw_current_mean_signal.csv",
"m7G_18S_Cytoplasm_R2_raw_current_mean_signal.csv",
"m7G_18S_Cytoplasm_R3_raw_current_mean_signal.csv"], 
"Cytoplasm 18S", 
ax,
"#8A181B"
)
   
m7G_18S_IVT = summarize_datasets(["m7G_18S_IVT_raw_current_mean_signal.csv"],
                                       "IVT 18S",
                                       ax,
                                       "#FED728"
                                       )

m7G_18S_WBSCR22_KO = summarize_datasets(["m7G_18S_WBSCR22_KO_18S_raw_current_mean_signal.csv"], 
                                           "WBSCR22 Mut", 
                                           ax,
                                           "#020202"
                                           )

coordinate = 5292
name_modification = "m7G"
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
fasta_file = pysam.FastaFile(reference_path)
reference = fasta_file.references[0]
reference_sequence = str(fasta_file.fetch(reference))
extracted_sequence = reference_sequence[
coordinate - bases_downstream : coordinate + bases_upstream + 1]
ax.set_xticks([i+1 for i in range(21)])
ax.set_xticklabels([i for i in extracted_sequence])
ax.set_xlabel("reference sequence")
ax.set_ylabel("z-normalized signal")
ax.set_ylim(-3, 3)
ax.legend()



In [None]:
fig1,ax = plt.subplots(nrows=1,ncols=1, figsize=(7,7))
  
 
m62A_21S_Nucleus = summarize_datasets(["m62A_21S_Nucleus_R1_raw_current_mean_signal.csv",
"m62A_21S_Nucleus_R2_raw_current_mean_signal.csv",
"m62A_21S_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 21S",
ax,
"#4D2A7C"
)


m62A_21S_C_Nucleus = summarize_datasets(["m62A_21S-C_Nucleus_R1_raw_current_mean_signal.csv",
"m62A_21S-C_Nucleus_R2_raw_current_mean_signal.csv",
"m62A_21S-C_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 21S-C",
ax,
"#DB7093"
)


m62A_18S_E_Nucleus = summarize_datasets(["m62A_18S-E_Nucleus_R1_raw_current_mean_signal.csv", 
"m62A_18S-E_Nucleus_R2_raw_current_mean_signal.csv",
"m62A_18S-E_Nucleus_R3_raw_current_mean_signal.csv"],
"Nucleus 18S-E",
ax,
"#41BBEC"
)


# m62A_18S_Nucleus = summarize_datasets(["m62A_18S_Nucleus_R1_raw_current_mean_signal.csv", 
# "m62A_18S_Nucleus_R2_raw_current_mean_signal.csv",
# "m62A_18S_Nucleus_R3_raw_current_mean_signal.csv"],
# "Nucleus 18S",
# ax,
# "#2C2F72"
# )


m62A_18S_Cytoplasm = summarize_datasets(["m62A_18S_Cytoplasm_R1_raw_current_mean_signal.csv",
"m62A_18S_Cytoplasm_R2_raw_current_mean_signal.csv",
"m62A_18S_Cytoplasm_R3_raw_current_mean_signal.csv"], 
"Cytoplasm 18S", 
ax,
"#8A181B"
)
   
m62A_18S_IVT = summarize_datasets(["m62A_18S_IVT_raw_current_mean_signal.csv"],
                                       "IVT 18S",
                                       ax,
                                       "#FED728"
                                       )

m62A_18S_WBSCR22_KO = summarize_datasets(["m62A_18S_DIMT1L_KO_18S_raw_current_mean_signal.csv"], 
                                           "DIMT1L Mut", 
                                           ax,
                                           "#020202"
                                        )

coordinate = 5504
name_modification = "m62A"
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
fasta_file = pysam.FastaFile(reference_path)
reference = fasta_file.references[0]
reference_sequence = str(fasta_file.fetch(reference))
extracted_sequence = reference_sequence[
coordinate - bases_downstream : coordinate + bases_upstream + 1]
record_length = 1 + bases_upstream + bases_downstream
ax.set_xticks([i+1 for i in range(record_length)])
ax.set_xticklabels([i for i in extracted_sequence])
ax.set_xlabel("reference sequence")
ax.set_ylabel("z-normalized signal")
ax.set_ylim(-3, 3)
ax.legend()


