In [26]:
#Specific Modification Analysis of m62A and m1acp3psu
import read_raw_current_class as rrc
from pathlib import Path
import pod5
import remora
from remora import io, refine_signal_map, util
import numpy as np
import argparse
import pysam
from tqdm import tqdm
import math
from itertools import repeat
import json
import re
import polars as pl
from multiprocessing import Process, Queue
import matplotlib.pyplot as plt
import os
import pandas as pd
import h5py



def plot_reference_coordinate(
    pod5_dr,
    df,
    bam_fh,
    kmer_table,
    sig_map_refiner,
    coordinate,
    bases_upstream,
    bases_downstream,
    ref_fragment,
    ax,
    ax2,
    color1,
    reference_path,
    condition,
    read_ids
):
    fasta_file = pysam.FastaFile(reference_path)
    reference = fasta_file.references[0]
    reference_sequence = str(fasta_file.fetch(reference))
    extracted_sequence = reference_sequence[
        coordinate - bases_downstream : coordinate + bases_upstream + 1
    ]
    record_length = 1 + bases_upstream + bases_downstream
    for fragment_id_list, fragment in zip(df["IDS"], df["Fragment"]):
        final_motif = "X" * record_length
        if fragment == ref_fragment:
            fragment_id_list = eval(fragment_id_list)
            valid_read_ids = []
            key_dict = {}
            for i in tqdm(fragment_id_list):
                key_dict[i] = i
            for i in tqdm(read_ids):
                try:
                    valid_read_ids.append(key_dict[i])
                except KeyError:
                    continue
            mean_values = [[] for i in range(record_length)]
            mean_values_dwell = [[] for i in range(record_length)]
            for iteration, single_id in tqdm(
                enumerate(valid_read_ids), total=len(valid_read_ids)
            ):
                if condition == "IVT 18S" and iteration >= 20000:
                    break
                dataset = rrc.RawCurrentReadDataset(
                    id=single_id,
                    pod5_dr=pod5_dr,
                    bam_fh=bam_fh,
                    kmer_table=kmer_table,
                    sig_map_refiner=sig_map_refiner,
                )
                try:
                    (
                        ref_motifs,
                        ref_signals,
                        ref_mean_signals,
                        ref_trimmean_signals,
                        ref_dwell_signals,
                    ) = dataset.extract_signal_reference_coordinates(coordinate, 10, 10)
                    x = [i for i in range(len(ref_trimmean_signals))]  # "GCAATAACAGGTCTGT"
                    if len(ref_motifs) == record_length:
                        final_motif = ref_motifs
                        #ax.plot([i for i in range(record_length)],ref_mean_signals,marker='o',alpha=0.1,linewidth=0.1,color=color1)
                        #ax2.scatter([i for i in range(record_length)],ref_dwell_signals,marker='o',alpha=0.1,color=color1)
                        for i, signal in enumerate(ref_trimmean_signals):
                            mean_values[i].append(signal)
                        for j ,dwell in enumerate(ref_dwell_signals):
                            mean_values_dwell[j].append(dwell)
                except IndexError as e:
                    #print(e)
                    continue
                except TypeError as e:
                    #print(e)
                    continue
    mean_of_mean_values = []
    std_of_mean_values = []
    mean_of_mean_dwell_time = []
    std_of_mean_dwell_time = []
    if mean_values[0] != []:
        temp_std_lower_list = []
        temp_std_upper_list = []
        for k in mean_values:
            i = [float(element) for element in k if str(element) != "nan"]
            temp_mean = np.mean(i)
            temp_std = np.std(i)
            upper = [q for q in i if q >= temp_mean]
            lower = [q for q in i if q <= temp_mean]
            temp_std_upper = np.std(np.array([m for m in upper]))
            temp_std_lower = np.std(np.array([m for m in lower]))
            mean_of_mean_values.append(temp_mean)
            temp_std_upper_list.append(temp_std_upper)
            temp_std_lower_list.append(temp_std_lower)
        std_of_mean_values = [temp_std_lower_list,temp_std_upper_list]
        temp_std_dwell_lower_list = []
        temp_std_dwell_upper_list = []
        for m in mean_values_dwell:
            j = [float(element) for element in m if str(element) != "nan"]
            temp_mean_dwell = np.mean(j)
            temp_std_dwell = np.std(j)
            upper_dwell = [q for q in j if q >= temp_mean_dwell]
            lower_dwell = [q for q in j if q <= temp_mean_dwell]
            temp_std_dwell_upper = np.std(np.array([m for m in upper_dwell]))
            temp_std_dwell_lower = np.std(np.array([m for m in lower_dwell]))
            mean_of_mean_dwell_time.append(temp_mean_dwell)
            temp_std_dwell_upper_list.append(temp_std_dwell_upper)
            temp_std_dwell_lower_list.append(temp_std_dwell_lower)
        std_of_mean_dwell_time = [temp_std_dwell_lower_list,temp_std_dwell_upper_list]
        x = [i for i in range(len(mean_of_mean_values))]
        ax.errorbar(
            x,
            mean_of_mean_values,
            yerr=std_of_mean_values,
            color=color1,
            marker="o",
            linestyle=None,
            lw = 2,
            alpha=0.4,
            label=condition
        )
        y = [i for i in range(len(mean_of_mean_dwell_time))]
        ax2.errorbar(
            y,
            mean_of_mean_dwell_time,
            yerr=std_of_mean_dwell_time,
            color=color1,
            marker="o",
            linestyle=None,
            alpha=0.4,
            lw = 2,
            label = condition
        )
        fasta_file = pysam.FastaFile(reference_path)
        reference = fasta_file.references[0]
        reference_sequence = str(fasta_file.fetch(reference))
        extracted_sequence = reference_sequence[
        coordinate - bases_downstream : coordinate + bases_upstream + 1
        ]
        #ax.set_title(f"{condition}", fontsize = 12)
        ax.set_xticks([i for i in range(record_length)])
        ax.set_xticklabels([i for i in extracted_sequence])
        ax.set_xlabel("reference sequence")
        ax.set_ylabel("z-normalized signal")
        ax.set_ylim(-3, 3)
        ax.legend()
        ax2.set_xticks([i for i in range(record_length)])
        ax2.set_xticklabels([i for i in range(coordinate-bases_downstream,coordinate+bases_downstream+1)])
        ax2.set_xlabel("reference position")
        ax2.set_ylabel("dwell time")
        ax2.set_ylim(0,400)
        ax2.legend(handles = [])
    return final_motif, extracted_sequence





In [None]:
fig, (ax1,ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
ref_fragment = "18S"
coordinate = 4901
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
name_modification = "m1acp3psU"
#fig.suptitle(f"{name_modification}", fontsize=16)

condition = "IVPA Cytoplasm"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table, do_rough_rescale=True, scale_iters=1, do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"navy",reference_path,condition,read_ids)



condition = "IVPA Nucleus"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"darkred",reference_path,condition,read_ids)


condition = "NP Cytoplasm"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"royalblue",reference_path,condition,read_ids)


condition = "NP Nucleus"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"indianred",reference_path,condition,read_ids)



condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"green",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

In [None]:
fig, (ax1,ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
ref_fragment = "18S"
coordinate = 5503
bases_upstream = 10
bases_downstream = 10
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
name_modification = "m62A"
fig.suptitle(f"{name_modification}", fontsize=16)

condition = "IVPA Cytoplasm"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table, do_rough_rescale=True, scale_iters=1, do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"navy",reference_path,condition,read_ids)



condition = "IVPA Nucleus"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"darkred",reference_path,condition,read_ids)


condition = "NP Cytoplasm"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"royalblue",reference_path,condition,read_ids)


condition = "NP Nucleus"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"indianred",reference_path,condition,read_ids)



condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"green",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "NP Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()


In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "NP Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    "deepskyblue",
    "darkolivegreen"
]

coordinate = 5504
name_modification = "m62A"
condition = "IVPA Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "indigo",
    "palevioletred",
    #"seagreen",
    "deepskyblue",
    "darkolivegreen",
    #"saddlebrown",
    "gray",
]

coordinate = 4901
name_modification = "m1acp3psU"
condition = "IVPA Cytoplasm"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"red",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]


# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5504
name_modification = "m62A"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]



# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5292
name_modification = "m7G"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]



# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5431
name_modification = "unknown"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")

pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_{name_condition}_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]



# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 4901
name_modification = "m1acp3psU"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#41BBEC",
    "#282A74"
]

# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 3714
name_modification = "G3714"
condition = "IVPA Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Nuc/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#8A181A"
condition = "IVPA Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_1M.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_IVPA_Cyt/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)



ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

fig.savefig(f"{name_modification}_comparison_IVPA_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5504
name_modification = "m62A"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)


ref_fragment = "18S"
condition = "DIMT1L KO 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/DIMT1L_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/DIMT1L_KO/template_based_analysis/template_fragment_df.csv", separator=";")
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/DIMT1L_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg.svg",format="svg")



In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
   "#4DZA7C",
   "#DB7093",
   "#2E8B58",
   "#282A74"
]

# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cyt 18S #8A181A
# IVPA Nuc 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 4901
name_modification = "m1acp3psU"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
   "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "TSR KO 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/TSR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
print(read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/TSR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/TSR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
print(bam_fh)
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_LSR_KO_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")



In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5292
name_modification = "m7G"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "WBSCR KO 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/WBSCR_KO/filtered_pod5/filtered.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/WBSCR_KO/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/WBSCR_KO/filtered_pod5/filtered_pod5_rebasecalled.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#010101",reference_path,condition,read_ids)


plt.show()
fig.savefig(f"{name_modification}_comparison_WBSCR_KO_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg",format="svg")

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 5431
name_modification = "unknown"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261e"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg.svg",format="svg")

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(14, 14))
output_df = pd.DataFrame(columns=["fragment", "mean", "std", "n_resquiggled_reads"])

#"30S", "26S"
ref_fragments = [ "21S", "21S-C", "18S-E", "18S"]
colors = [
    "#4DZA7C",
    "#DB7093",
    "#2E8B58",
    "#4B6EB5"
]

# NP/IVPA Nuc 21S #4DZA7C
# NP/IVPA Nuc 21S-C #DB7093
# NP Nuc 18S-E #2E8B58
# IVPA Nuc 18S-E #41BBEC
# NP Nucleus 18S #4B6EB5
# NP Cytoplasm 18S #CB6261
# IVPA Cytoplasm 18S #8A181A
# IVPA Nucleus 18S #282A74
# IVT 18S #FBD629
# KOs #010101

coordinate = 3714
name_modification = "G3714"
condition = "NP Nucleus"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Nuc/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10


for ref_fragment, color in zip(ref_fragments, colors):
    plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream,ref_fragment,ax1,ax2,color,reference_path,f"{condition} {ref_fragment}",read_ids)


ref_fragments = "18S"
color = "#CB6261"
condition = "NP Cytoplasm 18S"
name_condition = condition
condition_label = condition.replace("_", " ")


pod5_dr = pod5.DatasetReader(
    "~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered.pod5"
)
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/template_based_analysis/template_fragment_df.csv", separator=";")
reference_path = "~/wf-nanoribolyzer/references/RNA45SN1.fasta"
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/20231114_RNA004_NP_Cyt/filtered_pod5/filtered_rebasecalled_aligned.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
bases_upstream = 10
bases_downstream = 10
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,color,reference_path,condition,read_ids)

ref_fragment = "18S"
condition = "IVT 18S"
pod5_dr = pod5.DatasetReader("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_1M.pod5")
read_ids = list(pod5_dr.read_ids)
df = pl.read_csv("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/template_based_analysis/template_fragment_df.csv", separator=";")
# print(df)
bam_fh = io.ReadIndexedBam("~/Synology/Data_nano_ribolyzer/directRNA_004/IVT_18S/filtered_pod5/filtered_rebasecalled_aligned_1M.bam")
kmer_table = "~/kmer_models/rna004/9mer_levels_v1.txt"
sig_map_refiner = refine_signal_map.SigMapRefiner(kmer_model_filename=kmer_table,do_rough_rescale=True,scale_iters=1,do_fix_guage=True)
final_motif, extracted_sequence = plot_reference_coordinate(pod5_dr,df,bam_fh,kmer_table,sig_map_refiner,coordinate,bases_upstream,bases_downstream, ref_fragment,ax1,ax2,"#FBD629",reference_path,condition,read_ids)

plt.show()
fig.savefig(f"{name_modification}_comparison_NP_Nucleus_Cytoplasm_intermediates_signal_and_dwell_time_asymmetric_std_trimmed.svg.svg",format="svg")